Compare commits

..

1 Commits

Author SHA1 Message Date
Jakob Borg
6065b6cb93 Don't consider prereleases for -upgrade (fixes #436) 2014-07-24 12:59:51 +02:00
1386 changed files with 76350 additions and 245996 deletions

View File

@@ -1,20 +0,0 @@
comment: false
coverage:
range: "40...100"
precision: 1
status:
patch:
default:
informational: true
project:
default:
informational: true
github_checks:
annotations: false
ignore:
- "**.pb.go"
- "**_mocked.go"
- "**/mocks/*"

View File

@@ -1,12 +0,0 @@
version = 1
exclude_patterns = ["**/*.pb.go"]
test_patterns = ["**/*_test.go"]
[[analyzers]]
name = "go"
enabled = true
[analyzers.meta]
import_paths = ["github.com/syncthing/syncthing"]
build_tags = ["noassets"]

8
.gitattributes vendored
View File

@@ -1,8 +0,0 @@
# Text files use LF line endings in this repository
* text=auto
# Except the dependencies, which we leave alone
vendor/** -text=auto
# Diffs on these files are meaningless
*.svg -diff

11
.github/FUNDING.yml vendored
View File

@@ -1,11 +0,0 @@
github: syncthing
custom: "https://syncthing.net/donations/"
# patreon: # Replace with a single Patreon username
# open_collective: # Replace with a single Open Collective username
# ko_fi: # Replace with a single Ko-fi username
# tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
# community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
# liberapay: # Replace with a single Liberapay username
# issuehunt: # Replace with a single IssueHunt username
# otechie: # Replace with a single Otechie username

View File

@@ -1,29 +0,0 @@
name: Feature request
description: File a new feature request
labels: ["enhancement", "needs-triage"]
type: Feature
body:
- type: textarea
id: feature
attributes:
label: Feature description
description: Please describe the behavior you'd like to see.
validations:
required: true
- type: textarea
id: problem-usecase
attributes:
label: Problem or use case
description: Please explain which problem this would solve, or what the use case is for the feature. Keep in mind that it's more likely to be implemented if it's generally useful for a larger number of users.
validations:
required: true
- type: textarea
id: alternatives
attributes:
label: Alternatives or workarounds
description: Please describe any alternatives or workarounds you have considered and, possibly, rejected.
validations:
required: true

View File

@@ -1,52 +0,0 @@
name: Bug report
description: If you're actually looking for support instead, see "I need help / I have a question".
labels: ["bug", "needs-triage"]
type: Bug
body:
- type: markdown
attributes:
value: |
:no_entry_sign: If you want to report a security issue, please see [our Security Policy](https://syncthing.net/security/) and do not report the issue here.
:interrobang: If you are not sure if there is a bug, but something isn't working right and you need help, please [use the forum](https://forum.syncthing.net/).
- type: textarea
id: what-happened
attributes:
label: What happened?
description: Also tell us, what did you expect to happen, and any steps we might use to reproduce the problem.
placeholder: Tell us what you see!
validations:
required: true
- type: input
id: version
attributes:
label: Syncthing version
description: What version of Syncthing are you running?
placeholder: v1.27.4
validations:
required: true
- type: input
id: platform
attributes:
label: Platform & operating system
description: On what platform(s) are you seeing the problem?
placeholder: Linux arm64
validations:
required: true
- type: input
id: browser
attributes:
label: Browser version
description: If the problem is related to the GUI, describe your browser and version.
placeholder: Safari 17.3.1
- type: textarea
id: logs
attributes:
label: Relevant log output
description: Please copy and paste any relevant log output or crash backtrace. This will be automatically formatted into code, so no need for backticks.
render: shell

View File

@@ -1,8 +0,0 @@
blank_issues_enabled: false
contact_links:
- name: I need help / I have a question
url: https://forum.syncthing.net/
about: Ask questions, get support, and discuss with other community members.
- name: Android issues
url: https://github.com/syncthing/syncthing-android/issues/
about: The Android app has its own issue tracker.

View File

@@ -1,27 +0,0 @@
### Purpose
Describe the purpose of this change. If there is an existing issue that is
resolved by this pull request, ensure that the commit subject is on the form
`Some short description (fixes #1234)` where 1234 is the issue number.
### Testing
Describe what testing has been done, and how the reviewer can test the change
if new tests are not included.
### Screenshots
If this is a GUI change, include screenshots of the change. If not, please
feel free to just delete this section.
### Documentation
If this is a user visible change (including API and protocol changes), add a link here
to the corresponding pull request on https://github.com/syncthing/docs or describe
the documentation changes necessary.
## Authorship
Your name and email will be added automatically to the AUTHORS file
based on the commit metadata.

10
.github/SECURITY.md vendored
View File

@@ -1,10 +0,0 @@
## Reporting a Vulnerability
If you believe that you've found a Syncthing-related security vulnerability,
please report it by sending email to the address security@syncthing.net. The
[PGP key for security@syncthing.net
(B683AD7B76CAB013)](https://syncthing.net/security-key.txt) can be used to
send encrypted mail or to verify responses received from that address.
You can read more about Syncthing security at
https://syncthing.net/security/.

View File

@@ -1,13 +0,0 @@
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: monthly
open-pull-requests-limit: 10
- package-ecosystem: "gomod"
directory: "/"
schedule:
interval: monthly
open-pull-requests-limit: 10

23
.github/labeler.yml vendored
View File

@@ -1,23 +0,0 @@
version: 1
labels:
- label: enhancement
title: ^feat\b
- label: bug
title: ^fix\b
- label: documentation
title: ^docs\b
- label: chore
title: ^chore\b
- label: chore
title: ^refactor\b
- label: build
title: ^build\b
- label: dependencies
title: ^build\(deps\)\b

52
.github/regsync.yml vendored
View File

@@ -1,52 +0,0 @@
version: 1
creds:
- registry: docker.io
user: "{{env \"DOCKERHUB_USERNAME\"}}"
pass: "{{env \"DOCKERHUB_TOKEN\"}}"
defaults:
ratelimit:
min: 100
retry: 1m
parallel: 4
sync:
- source: ghcr.io/syncthing/syncthing
target: docker.io/syncthing/syncthing
type: repository
tags:
allow:
- latest
- rc
- edge
- \d+
- \d+\.\d+
- \d+\.\d+\.\d+
- \d+\.\d+\.\d+-rc\.\d+
- source: ghcr.io/syncthing/relaysrv
target: docker.io/syncthing/relaysrv
type: repository
tags:
allow:
- latest
- rc
- edge
- \d+
- \d+\.\d+
- \d+\.\d+\.\d+
- \d+\.\d+\.\d+-rc\.\d+
- source: ghcr.io/syncthing/discosrv
target: docker.io/syncthing/discosrv
type: repository
tags:
allow:
- latest
- rc
- edge
- \d+
- \d+\.\d+
- \d+\.\d+\.\d+
- \d+\.\d+\.\d+-rc\.\d+

17
.github/release.yml vendored
View File

@@ -1,17 +0,0 @@
changelog:
exclude:
labels:
- dependencies
categories:
- title: Fixes
labels:
- bug
- title: Features
labels:
- enhancement
- title: Other
labels:
- '*'

View File

@@ -1,85 +0,0 @@
name: Build Infrastructure Images
on:
push:
branches:
- infrastructure
- infra-*
env:
GO_VERSION: "~1.25.0"
CGO_ENABLED: "0"
BUILD_USER: docker
BUILD_HOST: github.syncthing.net
permissions:
contents: read
packages: write
jobs:
docker-syncthing:
name: Build and push Docker images
if: github.repository == 'syncthing/syncthing'
runs-on: ubuntu-latest
environment: docker
strategy:
matrix:
pkg:
- stcrashreceiver
- strelaypoolsrv
- stupgrades
- ursrv
steps:
- uses: actions/checkout@v5
with:
fetch-depth: 0
- uses: actions/setup-go@v6
with:
go-version: ${{ env.GO_VERSION }}
check-latest: true
- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Login to GHCR
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build binaries
run: |
for arch in arm64 amd64; do
go run build.go -goos linux -goarch "$arch" build ${{ matrix.pkg }}
mv ${{ matrix.pkg }} ${{ matrix.pkg }}-linux-"$arch"
done
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Set Docker tags (all branches)
run: |
tags=docker.io/syncthing/${{ matrix.pkg }}:${{ github.sha }},ghcr.io/syncthing/infra/${{ matrix.pkg }}:${{ github.sha }}
echo "TAGS=$tags" >> $GITHUB_ENV
- name: Set Docker tags (latest)
if: github.ref == 'refs/heads/infrastructure'
run: |
tags=docker.io/syncthing/${{ matrix.pkg }}:latest,ghcr.io/syncthing/infra/${{ matrix.pkg }}:latest,${{ env.TAGS }}
echo "TAGS=$tags" >> $GITHUB_ENV
- name: Build and push
uses: docker/build-push-action@v5
with:
context: .
file: ./Dockerfile.${{ matrix.pkg }}
platforms: linux/amd64,linux/arm64
push: true
tags: ${{ env.TAGS }}
labels: |
org.opencontainers.image.revision=${{ github.sha }}

View File

@@ -1,18 +0,0 @@
name: Build Syncthing (Nightly)
on:
schedule:
# Run nightly build at 05:00 UTC
- cron: '00 05 * * *'
workflow_dispatch:
permissions:
contents: write
packages: write
jobs:
build-syncthing:
uses: ./.github/workflows/build-syncthing.yaml
# if we only want nightlies to run for specific users:
# if: contains(fromJSON('["syncthing", "calmh"]'), github.repository_owner)
secrets: inherit

View File

File diff suppressed because it is too large Load Diff

View File

@@ -1,18 +0,0 @@
name: Mirrors
on: [push, delete]
jobs:
codeberg:
name: Mirror to Codeberg
if: github.repository_owner == 'syncthing'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- uses: yesolutions/mirror-action@master
with:
REMOTE: ssh://git@codeberg.org/${{ github.repository }}.git
GIT_SSH_PRIVATE_KEY: ${{ secrets.CODEBERG_PUSH_KEY }}
GIT_SSH_NO_VERIFY_HOST: "true"

View File

@@ -1,20 +0,0 @@
name: Org membership recommendations
on:
workflow_dispatch:
schedule:
- cron: '0 0 1 * *'
jobs:
run-recommendation:
runs-on: ubuntu-latest
name: Check for a recommendation
steps:
- uses: docker://ghcr.io/calmh/github-org-members:latest
env:
GITHUB_ORGANISATION: syncthing
GITHUB_TOKEN: ${{ secrets.GOM_GITHUB_TOKEN }}
GOM_IGNORE_USERS: ${{ secrets.GOM_IGNORE_USERS }}
GOM_ALSO_REPOS: ${{ secrets.GOM_ALSO_REPOS }}

View File

@@ -1,27 +0,0 @@
name: PR metadata
on:
pull_request_target:
types:
- opened
- reopened
- edited
- synchronize
permissions:
contents: read
pull-requests: write
jobs:
#
# Set labels on PRs, which are then used to categorise release notes
#
labels:
name: Set labels
runs-on: ubuntu-latest
steps:
- uses: srvaroa/labeler@v1
env:
GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"

View File

@@ -1,60 +0,0 @@
name: Release Syncthing
on:
push:
branches:
- release
- release-rc*
permissions:
contents: write
jobs:
create-release-tag:
name: Create release tag
runs-on: ubuntu-latest
environment: release
steps:
- uses: actions/checkout@v5
with:
fetch-depth: 0
ref: ${{ github.ref }} # https://github.com/actions/checkout/issues/882
token: ${{ secrets.ACTIONS_GITHUB_TOKEN }}
- uses: actions/setup-go@v6
with:
go-version: stable
- name: Determine version to release
run: |
if [[ "$GITHUB_REF_NAME" == "release" ]] ; then
next=$(go run ./script/next-version.go)
else
next=$(go run ./script/next-version.go --pre)
fi
echo "NEXT=$next" >> $GITHUB_ENV
echo "Next version is $next"
prev=$(git describe --exclude "*-*" --abbrev=0)
echo "PREV=$prev" >> $GITHUB_ENV
echo "Previous version is $prev"
- name: Determine release notes
run: |
go run ./script/relnotes.go --new-ver "$NEXT" --branch "$GITHUB_REF_NAME" --prev-ver "$PREV" > notes.md
env:
GITHUB_TOKEN: ${{ secrets.ACTIONS_GITHUB_TOKEN }}
- name: Create and push tag
run: |
git config --global user.name 'Syncthing Release Automation'
git config --global user.email 'release@syncthing.net'
git tag -a -F notes.md --cleanup=whitespace "$NEXT"
git push origin "$NEXT"
- name: Trigger the build
uses: benc-uk/workflow-dispatch@v1
with:
workflow: build-syncthing.yaml
ref: refs/tags/${{ env.NEXT }}
token: ${{ secrets.ACTIONS_GITHUB_TOKEN }}

View File

@@ -1,22 +0,0 @@
name: Trigger nightly build & release
on:
workflow_dispatch:
schedule:
# Run nightly build at 01:00 UTC
- cron: '00 01 * * *'
jobs:
trigger-nightly:
if: github.repository_owner == 'syncthing'
runs-on: ubuntu-latest
name: Push to release-nightly to trigger build
steps:
- uses: actions/checkout@v5
with:
token: ${{ secrets.ACTIONS_GITHUB_TOKEN }}
fetch-depth: 0
- run: |
git push origin main:release-nightly

View File

@@ -1,28 +0,0 @@
name: Update translations and documentation
on:
workflow_dispatch:
schedule:
- cron: '42 3 * * 1'
jobs:
update_transifex_docs:
runs-on: ubuntu-latest
name: Update translations and documentation
steps:
- uses: actions/checkout@v5
with:
fetch-depth: 0
token: ${{ secrets.ACTIONS_GITHUB_TOKEN }}
- uses: actions/setup-go@v6
with:
go-version: stable
- run: |
set -euo pipefail
git config --global user.name 'Syncthing Release Automation'
git config --global user.email 'release@syncthing.net'
bash build.sh translate
bash build.sh prerelease
git push
env:
WEBLATE_TOKEN: ${{ secrets.WEBLATE_TOKEN }}

22
.gitignore vendored
View File

@@ -1,20 +1,12 @@
/syncthing
/stdiscosrv
syncthing
syncthing.exe
stcli
stcli.exe
*.tar.gz
*.zip
*.asc
*.deb
*.exe
*.sublime*
discosrv
stpidx
.jshintrc
coverage.out
files/pidx
bin
perfstats*.csv
coverage.xml
syncthing.sig
RELEASE
deb
*.bz2
/repos
/proto/scripts/protoc-gen-gosyncthing
/compat.json

View File

@@ -1,96 +0,0 @@
version: "2"
linters:
default: all
disable:
- cyclop
- depguard
- err113
- exhaustive
- exhaustruct
- forbidigo
- funcorder
- funlen
- gochecknoglobals
- gochecknoinits
- gocognit
- goconst
- gocyclo
- godot
- godox
- gomoddirectives
- inamedparam
- interfacebloat
- ireturn
- lll
- maintidx
- mnd
- musttag
- nestif
- nlreturn
- noinlineerr
- nonamedreturns
- paralleltest
- prealloc
- predeclared
- protogetter
- recvcheck
- revive
- tagalign
- tagliatelle
- testpackage
- usetesting # go 1.24
- varnamelen
- whitespace
- wrapcheck
- wsl
- wsl_v5
exclusions:
generated: lax
presets:
- comments
- common-false-positives
- legacy
- std-error-handling
paths:
- internal/gen
- internal/db/olddb
- cmd/dev
- repos
- third_party$
- builtin$
- examples$
- _test\.go$
rules:
# relax the slog rules for debug lines, for now
- linters: [sloglint]
source: Debug
# contexts are irrelevant for SQLite
- linters: [noctx]
text: database/sql
# Rollback errors can be ignored
- linters: [errcheck]
source: Rollback
# Embedded fields named in selectors may add clarity
- linters: [staticcheck]
text: QF1008
# Don't necessarily rewrite !(foo || bar) to !foo && !bar
- linters: [staticcheck]
text: QF1001
settings:
sloglint:
context: "scope"
static-msg: true
msg-style: capitalized
key-naming-case: camel
formatters:
enable:
- gofumpt
exclusions:
generated: lax
paths:
- internal/gen
- cmd/dev
- repos
- third_party$
- builtin$
- examples$

View File

@@ -1,111 +0,0 @@
# This is the policy-bot configuration for this repository. It controls
# which approvals are required for any given pull request. The format is
# described at https://github.com/palantir/policy-bot. The syntax of the
# policy can be verified by the bot:
# curl https://pb.syncthing.net/api/validate -X PUT -T .policy.yml
# The policy below is what is required for any pull request.
policy:
approval:
- subject is conventional commit
- or:
- project metadata requires maintainer approval
- a maintainer claims responsibility
- or:
- is approved by a syncthing contributor
- is a translation or dependency update by a contributor
- is a trivial change by a contributor
- a maintainer claims responsibility
# Additionally, maintainers can disapprove of a PR
disapproval:
requires:
teams:
- syncthing/maintainers
# The rules for the policy are described below.
approval_rules:
# All commits (PRs before squashing) should have a valid conventional
# commit type subject.
- name: subject is conventional commit
requires:
conditions:
title:
matches:
- '^(feat|fix|docs|chore|refactor|build): [a-z].+'
- '^(feat|fix|docs|chore|refactor|build)\(\w+(, \w+)*\): [a-z].+'
# Changes to important project metadata and documentation, including this
# policy, require signoff by a maintainer
- name: project metadata requires maintainer approval
if:
changed_files:
paths:
- ^[^/]+\.md
- ^\.policy\.yml
- ^LICENSE
requires:
count: 1
teams:
- syncthing/maintainers
options:
ignore_update_merges: true
allow_non_author_contributor: true
# Regular pull requests require approval by an active contributor
- name: is approved by a syncthing contributor
requires:
count: 1
teams:
- syncthing/contributors
options:
ignore_update_merges: true
allow_non_author_contributor: true
# Changes to some files (translations, dependencies, compatibility) do not
# require approval if they were proposed by a contributor and have a
# matching commit subject
- name: is a translation or dependency update by a contributor
if:
only_changed_files:
paths:
- ^gui/default/assets/lang/
- ^go\.mod$
- ^go\.sum$
- ^compat\.yaml$
title:
matches:
- '^chore\(gui\):'
- '^build\(deps\):'
- '^build\(compat\):'
has_author_in:
teams:
- syncthing/contributors
# If the change is small and the label "trivial" is added, we accept that
# on trust. These PRs can be audited after the fact as appropriate.
# Features are not trivial.
- name: is a trivial change by a contributor
if:
modified_lines:
total: "< 25"
title:
not_matches:
- '^feat'
has_labels:
- trivial
has_author_in:
teams:
- syncthing/contributors
# A member of the maintainers group can take responsibility by adding the
# appropriate label.
- name: a maintainer claims responsibility
if:
has_labels:
- maintainer-responsibility
has_author_in:
teams:
- syncthing/maintainers

20
.travis.yml Normal file
View File

@@ -0,0 +1,20 @@
language: go
go:
- tip
install:
- export PATH=$PATH:$HOME/gopath/bin
- ./build.sh setup
- go get code.google.com/p/go.tools/cmd/cover
- go get github.com/mattn/goveralls
script:
- ./build.sh test-cov
after_success:
- goveralls -coverprofile=coverage.out -service=travis-ci -package=calmh/syncthing -repotoken="$COVERALS_TOKEN"
env:
global:
secure: "zEV2h2XtKHNLVdXJjM4LA/VjMfLVydm6goF+ARit+nOSGxGoH7f7jIdzJzhxgh7shKG93q61eLO1Tug+WBMYB2EpBuYnTB5AIMYhCDwNI8C4uBV6c3brHfcrie7MASNao8TID2QScASKNFFWvjv/i1Ccn5ztxdcQuhSsNjGZp8A="

View File

@@ -1,4 +0,0 @@
line_ending: lf
formatter:
type: basic
retain_line_breaks: true

322
AUTHORS
View File

@@ -1,322 +0,0 @@
# This is the official list of Syncthing authors for copyright purposes.
#
# THIS FILE IS MOSTLY AUTO GENERATED. IF YOU'VE MADE A COMMIT TO THE
# REPOSITORY YOU WILL BE ADDED HERE AUTOMATICALLY WITHOUT THE NEED FOR
# ANY MANUAL ACTION.
#
# That said, you are welcome to correct your name or add a nickname / GitHub
# user name as appropriate. The format is:
#
# Name Name Name (nickname) <email1@example.com> <email2@example.com>
#
# The in-GUI authors list is periodically automatically updated from the
# contents of this file.
#
Jakob Borg (calmh) <jakob@nym.se> <jakob@kastelo.net> <jborg@coreweave.com>
Audrius Butkevicius (AudriusButkevicius) <audrius.butkevicius@gmail.com> <github@audrius.rocks>
Simon Frei (imsodin) <freisim93@gmail.com>
Tomasz Wilczyński <5626656+tomasz1986@users.noreply.github.com> <twilczynski@naver.com>
Alexander Graf (alex2108) <register-github@alex-graf.de>
Alexandre Viau (aviau) <alexandre@alexandreviau.net> <aviau@debian.org>
Anderson Mesquita (andersonvom) <andersonvom@gmail.com>
André Colomb (acolomb) <src@andre.colomb.de> <github.com@andre.colomb.de>
Antony Male (canton7) <antony.male@gmail.com>
Ben Schulz (uok) <ueomkail@gmail.com> <uok@users.noreply.github.com>
bt90 <btom1990@googlemail.com>
Caleb Callaway (cqcallaw) <enlightened.despot@gmail.com>
Daniel Harte (norgeous) <daniel@harte.me> <daniel@danielharte.co.uk> <norgeous@users.noreply.github.com>
Emil Lundberg <emil@emlun.se>
Eric P <eric@kastelo.net>
Evgeny Kuznetsov <evgeny@kuznetsov.md>
greatroar <61184462+greatroar@users.noreply.github.com>
Lars K.W. Gohlke (lkwg82) <lkwg82@gmx.de>
Lode Hoste (Zillode) <zillode@zillode.be>
Marcus B Spencer <marcus@marcusspencer.xyz> <marcus@marcusspencer.us>
Michael Ploujnikov (plouj) <ploujj@gmail.com>
Ross Smith II (rasa) <ross@smithii.com>
Stefan Tatschner (rumpelsepp) <stefan@sevenbyte.org> <rumpelsepp@sevenbyte.org> <stefan@rumpelsepp.org>
Tommy van der Vorst <tommy-github@pixelspark.nl> <tommy@pixelspark.nl>
Wulf Weich (wweich) <wweich@users.noreply.github.com> <wweich@gmx.de> <wulf@weich-kr.de>
Adam Piggott (ProactiveServices) <aD@simplypeachy.co.uk> <simplypeachy@users.noreply.github.com> <ProactiveServices@users.noreply.github.com> <adam@proactiveservices.co.uk>
Adel Qalieh (adelq) <aqalieh95@gmail.com> <adelq@users.noreply.github.com>
Aleksey Vasenev <margtu-fivt@ya.ru>
Alessandro G. (alessandro.g89) <alessandro.g89@gmail.com>
Alex Ionescu <github@ionescu.sh>
Alex Lindeman <139387+aelindeman@users.noreply.github.com>
Alex Xu <alex.hello71@gmail.com>
Alexander Seiler <seileralex@gmail.com>
Alexandre Alves <alexandrealvesdb.contact@gmail.com>
Aman Gupta <aman@tmm1.net>
Andreas Sommer <andreas.sommer87@googlemail.com>
andresvia <andres.via@gmail.com>
Andrew Rabert (nvllsvm) <ar@nullsum.net> <6550543+nvllsvm@users.noreply.github.com>
Andrey D (scienmind) <scintertech@cryptolab.net> <scienmind@users.noreply.github.com>
andyleap <andyleap@gmail.com>
Anjan Momi <anjan@momi.ca>
Anthony Goeckner <agoeckner@users.noreply.github.com>
Antoine Lamielle (0x010C) <antoine.lamielle@0x010c.fr> <gh@0x010c.fr>
Anur <anurnomeru@163.com>
Aranjedeath <Aranjedeath@users.noreply.github.com>
ardevd <ardevd@users.noreply.github.com>
Arkadiusz Tymiński <gevleeog@gmail.com>
Aroun <login@b-vo.fr>
Arthur Axel fREW Schmidt (frioux) <frew@afoolishmanifesto.com> <frioux@gmail.com>
Artur Zubilewicz <AkaZecik@users.noreply.github.com>
Ashish Bhate <bhate.ashish@gmail.com>
Aurélien Rainone <476650+arl@users.noreply.github.com>
BAHADIR YILMAZ <bahadiryilmaz32@gmail.com>
Bart De Vries (mogwa1) <devriesb@gmail.com>
Beat Reichenbach <44111292+beatreichenbach@users.noreply.github.com>
Ben Shepherd (benshep) <bjashepherd@gmail.com>
Ben Sidhom (bsidhom) <bsidhom@gmail.com>
Benedikt Heine (bebehei) <bebe@bebehei.de>
Benno Fünfstück <benno.fuenfstueck@gmail.com>
Benny Ng (tpng) <benny.tpng@gmail.com>
boomsquared <54829195+boomsquared@users.noreply.github.com>
Boqin Qin <bobbqqin@bupt.edu.cn>
Boris Rybalkin <ribalkin@gmail.com>
Brendan Long (brendanlong) <self@brendanlong.com>
Catfriend1 <16361913+Catfriend1@users.noreply.github.com>
Cathryne Linenweaver (Cathryne) <cathryne.linenweaver@gmail.com> <Cathryne@users.noreply.github.com> <katrinleinweber@MAC.local>
Cedric Staniewski (xduugu) <cedric@gmx.ca>
Chih-Hsuan Yen <yan12125@gmail.com> <1937689+yan12125@users.noreply.github.com>
Choongkyu <choongkyu.kim+gh@gmail.com> <vapidlyrapid+gh@gmail.com>
Chris Howie (cdhowie) <me@chrishowie.com>
Chris Joel (cdata) <chris@scriptolo.gy>
Christian Kujau <ckujau@users.noreply.github.com>
Christian Prescott <me@christianprescott.com>
chucic <chucic@seznam.cz>
cjc7373 <niuchangcun@gmail.com>
Colin Kennedy (moshen) <moshen.colin@gmail.com>
Cromefire_ <tim.l@nghorst.net> <26320625+cromefire@users.noreply.github.com>
Cyprien Devillez <cypx@users.noreply.github.com>
d-volution <49024624+d-volution@users.noreply.github.com>
Dan <benda.daniel@gmail.com>
Daniel Barczyk <46358936+DanielBarczyk@users.noreply.github.com>
Daniel Bergmann (brgmnn) <dan.arne.bergmann@gmail.com> <brgmnn@users.noreply.github.com>
Daniel Martí (mvdan) <mvdan@mvdan.cc>
Daniel Padrta <64928366+danpadcz@users.noreply.github.com>
Daniil Gentili <daniil@daniil.it>
Darshil Chanpura (dtchanpura) <dtchanpura@gmail.com> <dcprime314@gmail.com>
dashangcun <907225865@qq.com>
David Rimmer (dinosore) <dinosore@dbrsoftware.co.uk>
DeflateAwning <11021263+DeflateAwning@users.noreply.github.com>
Denis A. (dva) <denisva@gmail.com>
Dennis Wilson (snnd) <dw@risu.io>
derekriemer <derek.riemer@colorado.edu>
DerRockWolf <50499906+DerRockWolf@users.noreply.github.com>
desbma <desbma@users.noreply.github.com>
Devon G. Redekopp <devon@redekopp.com>
digital <didev@dinid.net>
Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com>
Dmitry Saveliev (dsaveliev) <d.e.saveliev@gmail.com>
domain <32405309+szu17dmy@users.noreply.github.com>
Domenic Horner <domenic@tgxn.net>
Dominik Heidler (asdil12) <dominik@heidler.eu>
Elias Jarlebring (jarlebring) <jarlebring@gmail.com>
Elliot Huffman <thelich2@gmail.com>
Emil Hessman (ceh) <emil@hessman.se>
Eng Zer Jun <engzerjun@gmail.com>
entity0xfe <109791748+entity0xfe@users.noreply.github.com> <entity0xfe@my.domain>
Eric Lesiuta <elesiuta@gmail.com>
Erik Meitner (WSGCSysadmin) <e.meitner@willystreet.coop>
Evan Spensley <94762716+0evan@users.noreply.github.com>
Federico Castagnini (facastagnini) <federico.castagnini@gmail.com>
Felix <53702818+f-eliks@users.noreply.github.com>
Felix Ableitner (Nutomic) <me@nutomic.com>
Felix Lampe <mail@flampe.de>
Felix Unterpaintner (bigbear2nd) <bigbear2nd@gmail.com>
Francois-Xavier Gsell (zukoo) <fxgsell@gmail.com>
Frank Isemann (fti7) <frank@isemann.name>
Gahl Saraf <saraf.gahl@gmail.com> <gahl@raftt.io>
georgespatton <georgespatton@users.noreply.github.com>
ghjklw <malo@jaffre.info>
Gilli Sigurdsson (gillisig) <gilli@vx.is>
Gleb Sinyavskiy <zhulik.gleb@gmail.com>
Graham Miln (grahammiln) <graham.miln@dssw.co.uk> <graham.miln@miln.eu>
Greg <gco@jazzhaiku.com>
guangwu <guoguangwu@magic-shield.com>
gudvinr <gudvinr@gmail.com>
Gusted <postmaster@gusted.xyz> <williamzijl7@hotmail.com>
Han Boetes <han@boetes.org>
HansK-p <42314815+HansK-p@users.noreply.github.com>
Harrison Jones (harrisonhjones) <harrisonhjones@users.noreply.github.com>
Hazem Krimi <me@hazemkrimi.tech>
Heiko Zuerker (Smiley73) <heiko@zuerker.org>
Hireworks <129852174+hireworksltd@users.noreply.github.com>
Hugo Locurcio <hugo.locurcio@hugo.pro>
Iain Barnett <iainspeed@gmail.com>
Ian Johnson (anonymouse64) <ian.johnson@canonical.com> <person.uwsome@gmail.com>
ignacy123 <ignacy.buczek@onet.pl>
Iskander Sharipov (Alex) <quasilyte@gmail.com>
Jaakko Hannikainen (jgke) <jgke@jgke.fi>
Jack Croft <jccroft1@users.noreply.github.com>
Jacob <jyundt@gmail.com>
Jake Peterson (acogdev) <jake@acogdev.com>
James O'Beirne <wild-github@au92.org>
James Patterson (jpjp) <jamespatterson@operamail.com> <jpjp@users.noreply.github.com>
Jaroslav Lichtblau <svetlemodry@users.noreply.github.com>
Jaroslav Malec (dzarda) <dzardacz@gmail.com>
Jaspitta <ste.scarpitta@gmail.com>
Jaya Chithra (jayachithra) <s.k.jayachithra@gmail.com>
Jaya Kumar <jaya.kumar@ict.nl>
Jeffery To <jeffery.to@gmail.com>
jelle van der Waa <jelle@vdwaa.nl>
Jens Diemer (jedie) <github.com@jensdiemer.de> <git@jensdiemer.de>
Jochen Voss (seehuhn) <voss@seehuhn.de>
Johan Vromans (sciurius) <jvromans@squirrel.nl>
John Rinehart (fuzzybear3965) <johnrichardrinehart@gmail.com>
Jonas Thelemann <e-mail@jonas-thelemann.de>
Jonathan <artback@protonmail.com> <jonagn@gmail.com>
Jose Manuel Delicado (jmdaweb) <jmdaweb@hotmail.com> <jmdaweb@users.noreply.github.com>
jtagcat <git-514635f7@jtag.cat> <git-12dbd862@jtag.cat>
Julian Lehrhuber <jul13579@users.noreply.github.com>
Jörg Thalheim <Mic92@users.noreply.github.com>
Jędrzej Kula <kula.jedrek@gmail.com>
Kapil Sareen <kapilsareen584@gmail.com>
Karol Różycki (krozycki) <rozycki.karol@gmail.com>
Kebin Liu <lkebin@gmail.com>
Keith Harrison <keithh@protonmail.com>
Kelong Cong (kc1212) <kc04bc@gmx.com> <kc1212@users.noreply.github.com>
Ken'ichi Kamada (kamadak) <kamada@nanohz.org>
Kevin Allen (ironmig) <kma1660@gmail.com>
Kevin Bushiri (keevBush) <keevbush@gmail.com> <36192217+keevBush@users.noreply.github.com>
Kevin White, Jr. (kwhite17) <kevinwhite1710@gmail.com>
klemens <ka7@github.com>
Kurt Fitzner (Kudalufi) <kurt@va1der.ca> <kurt.fitzner@gmail.com>
kylosus <33132401+kylosus@users.noreply.github.com>
Lars Lehtonen <lars.lehtonen@gmail.com>
Laurent Etiemble (letiemble) <laurent.etiemble@gmail.com> <laurent.etiemble@monobjc.net>
Leo Arias (elopio) <yo@elopio.net>
Liu Siyuan (liusy182) <liusy182@gmail.com> <liusy182@hotmail.com>
Lord Landon Agahnim (LordLandon) <lordlandon@gmail.com>
LSmithx2 <42276854+lsmithx2@users.noreply.github.com>
Lukas Lihotzki <lukas@lihotzki.de>
Luke Hamburg <1992842+luckman212@users.noreply.github.com>
luzpaz <luzpaz@users.noreply.github.com>
Majed Abdulaziz (majedev) <majed.alhajry@gmail.com>
Marc Laporte (marclaporte) <marc@marclaporte.com> <marc@laporte.name>
Marcel Meyer <mm.marcelmeyer@gmail.com>
Marcin Dziadus (marcindziadus) <dziadus.marcin@gmail.com>
Marcus Legendre <marcus.legendre@gmail.com>
Mario Majila <mariustshipichik@gmail.com>
Mark Pulford (mpx) <mark@kyne.com.au>
Martchus <martchus@gmx.net>
Mateusz Naściszewski (mateon1) <matin1111@wp.pl>
Mateusz Ż <thedead4fun@live.com>
mathias4833 <67101597+mathias4833@users.noreply.github.com>
Matic Potočnik <hairyfotr@gmail.com>
Matt Burke (burkemw3) <mburke@amplify.com> <burkemw3@gmail.com>
Matt Robenolt <matt@ydekproductions.com>
Matteo Ruina <matteo.ruina@gmail.com>
Maurizio Tomasi <ziotom78@gmail.com>
Max <github@germancoding.com>
Max Schulze (kralo) <max.schulze@online.de> <kralo@users.noreply.github.com>
MaximAL <almaximal@ya.ru>
Maximilian <maxi.rostock@outlook.de> <public@complexvector.space>
Michael Jephcote (Rewt0r) <rewt0r@gmx.com> <Rewt0r@users.noreply.github.com>
Michael Rienstra <mrienstra@gmail.com>
MichaIng <micha@dietpi.com>
Migelo <miha@filetki.si>
Mike Boone <mike@boonedocks.net>
MikeLund <MikeLund@users.noreply.github.com>
MikolajTwarog <43782609+MikolajTwarog@users.noreply.github.com>
Mingxuan Lin <gdlmx@users.noreply.github.com>
mv1005 <49659413+mv1005@users.noreply.github.com>
Nate Morrison (nrm21) <natemorrison@gmail.com>
nf <nf@wh3rd.net>
Nicholas Rishel (PrototypeNM1) <rishel.nick@gmail.com> <PrototypeNM1@users.noreply.github.com>
Nick Busey <NickBusey@users.noreply.github.com>
Nico Stapelbroek <3368018+nstapelbroek@users.noreply.github.com>
Nicolas Braud-Santoni <nicolas@braud-santoni.eu>
Nicolas Perraut <n.perraut@gmail.com>
Niels Peter Roest (Niller303) <nielsproest@hotmail.com> <seje.niels@hotmail.com>
Nils Jakobi (thunderstorm99) <jakobi.nils@gmail.com>
NinoM4ster <ninom4ster@gmail.com>
Nitroretro <43112364+Nitroretro@users.noreply.github.com>
NoLooseEnds <jon.koslung@gmail.com>
Oliver Freyermuth <o.freyermuth@googlemail.com>
orangekame3 <miya.org.0309@gmail.com>
otbutz <tbutz@optitool.de>
overkill <22098433+0verk1ll@users.noreply.github.com>
Oyebanji Jacob Mayowa <oyebanji05@gmail.com>
Pablo <pbaeyens31+github@gmail.com>
Pascal Jungblut (pascalj) <github@pascalj.com> <mail@pascal-jungblut.com>
Paul Brit <paulbrit44@gmail.com>
Paul Donald <newtwen+github@gmail.com>
Pawel Palenica (qepasa) <pawelpalenica11@gmail.com>
perewa <cavalcante.ten@gmail.com>
Peter Badida <KeyWeeUsr@users.noreply.github.com>
Peter Dave Hello <hsu@peterdavehello.org>
Peter Hoeg (peterhoeg) <peter@speartail.com>
Peter Marquardt (wwwutz) <wwwutz@gmail.com> <wwwutz@googlemail.com>
Phani Rithvij <phanirithvij2000@gmail.com>
Phil Davis <phil.davis@inf.org>
Philippe Schommers (filoozoom) <philippe@schommers.be>
Phill Luby (pluby) <phill.luby@newredo.com>
Piotr Bejda (piobpl) <piotrb10@gmail.com>
polyfloyd <polyfloyd@users.noreply.github.com>
pullmerge <166967364+pullmerge@users.noreply.github.com>
Quentin Hibon <qh.public@yahoo.com>
Rahmi Pruitt <rjpruitt16@gmail.com>
red_led <red-led@users.noreply.github.com>
Robert Carosi (nov1n) <robert@carosi.nl>
Roberto Santalla <roobre@users.noreply.github.com>
Robin Schoonover <robin@cornhooves.org>
Roman Zaynetdinov (zaynetro) <romanznet@gmail.com>
rubenbe <github-com-00ff86@vandamme.email>
Ruslan Yevdokymov <38809160+ruslanye@users.noreply.github.com>
Ryan Qian <i@bitbili.net>
Ryan Sullivan (KayoticSully) <kayoticsully@gmail.com>
Sacheendra Talluri (sacheendra) <sacheendra.t@gmail.com>
Scott Klupfel (kluppy) <kluppy@going2blue.com>
sec65 <106604020+sec65@users.noreply.github.com>
Sergey Mishin (ralder) <ralder@yandex.ru>
Sertonix <83883937+Sertonix@users.noreply.github.com>
Severin von Wnuck-Lipinski <ss7@live.de>
Shaarad Dalvi <60266155+shaaraddalvi@users.noreply.github.com> <shdalv@microsoft.com>
Simon Mwepu <simonmwepu@gmail.com>
Simon Pickup <simon@pickupinfinity.com>
Sly_tom_cat <slytomcat@mail.ru>
Sonu Kumar Saw <31889738+dev-saw99@users.noreply.github.com>
Stefan Kuntz (Stefan-Code) <stefan.github@gmail.com> <Stefan.github@gmail.com>
Steven Eckhoff <steven.eckhoff.opensource@gmail.com>
Suhas Gundimeda (snugghash) <suhas.gundimeda@gmail.com> <snugghash@gmail.com>
Sven Bachmann <dev@mcbachmann.de>
Sébastien WENSKE <sebastien@wenske.fr>
Taylor Khan (nelsonkhan) <nelsonkhan@gmail.com>
Terrance <git@terrance.allofti.me>
TheCreeper <TheCreeper@users.noreply.github.com>
Thomas <9749173+uhthomas@users.noreply.github.com>
Thomas Hipp <thomashipp@gmail.com>
Tim Abell (timabell) <tim@timwise.co.uk>
Tim Howes (timhowes) <timhowes@berkeley.edu>
Tobias Frölich <40638719+tobifroe@users.noreply.github.com>
Tobias Klauser <tobias.klauser@gmail.com>
Tobias Nygren (tnn2) <tnn@nygren.pp.se>
Tobias Tom (tobiastom) <t.tom@succont.de>
Tom Jakubowski <tom@crystae.net>
Tully Robinson (tojrobinson) <tully@tojr.org>
Tyler Brazier (tylerbrazier) <tyler@tylerbrazier.com>
Tyler Kropp <kropptyler@gmail.com>
Unrud (Unrud) <unrud@openaliasbox.org> <Unrud@users.noreply.github.com>
vapatel2 <149737089+vapatel2@users.noreply.github.com>
Veeti Paananen (veeti) <veeti.paananen@rojekti.fi>
Victor Buinsky (buinsky) <vix_booja@tut.by>
Vik <63919734+ViktorOn@users.noreply.github.com>
Vil Brekin (Vilbrekin) <vilbrekin@gmail.com>
villekalliomaki <53118179+villekalliomaki@users.noreply.github.com>
Vladimir Rusinov <vrusinov@google.com> <vladimir.rusinov@gmail.com>
wangguoliang <liangcszzu@163.com>
WangXi <xib1102@icloud.com>
Will Rouesnel <wrouesnel@wrouesnel.com>
William A. Kennington III (wkennington) <william@wkennington.com>
wouter bolsterlee <wouter@bolsterl.ee>
xarx00 <xarx00@users.noreply.github.com>
Xavier O. (damajor) <damajor@gmail.com>
xjtdy888 (xjtdy888) <xjtdy888@163.com> <xjtdy888@gmail.com>
Yannic A. (eipiminus1) <eipiminusone+github@gmail.com> <eipiminus1@users.noreply.github.com>
yparitcher <y@paritcher.com>
佛跳墙 <daoquan@qq.com>
落心 <luoxin.ttt@gmail.com>

View File

@@ -1,73 +0,0 @@
# Contributor Covenant Code of Conduct
## Our Pledge
In the interest of fostering an open and welcoming environment, we as
contributors and maintainers pledge to making participation in our project and
our community a harassment-free experience for everyone, regardless of age, body
size, disability, ethnicity, gender identity and expression, level of experience,
education, socio-economic status, nationality, personal appearance, race,
religion, or sexual identity and orientation.
## Our Standards
Examples of behavior that contributes to creating a positive environment
include:
* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members
Examples of unacceptable behavior by participants include:
* The use of sexualized language or imagery and unwelcome sexual attention or
advances
* Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or electronic
address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting
## Our Responsibilities
Project maintainers are responsible for clarifying the standards of acceptable
behavior and are expected to take appropriate and fair corrective action in
response to any instances of unacceptable behavior.
Project maintainers have the right and responsibility to remove, edit, or
reject comments, commits, code, wiki edits, issues, and other contributions
that are not aligned to this Code of Conduct, or to ban temporarily or
permanently any contributor for other behaviors that they deem inappropriate,
threatening, offensive, or harmful.
## Scope
This Code of Conduct applies both within project spaces and in public spaces
when an individual is representing the project or its community. Examples of
representing a project or community include using an official project e-mail
address, posting via an official social media account, or acting as an appointed
representative at an online or offline event. Representation of a project may be
further defined and clarified by project maintainers.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported by contacting the project team at security@syncthing.net. All
complaints will be reviewed and investigated and will result in a response that
is deemed necessary and appropriate to the circumstances. The project team is
obligated to maintain confidentiality with regard to the reporter of an incident.
Further details of specific enforcement policies may be posted separately.
Project maintainers who do not follow or enforce the Code of Conduct in good
faith may face temporary or permanent repercussions as determined by other
members of the project's leadership.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
[homepage]: https://www.contributor-covenant.org

View File

@@ -1,208 +1,55 @@
## Reporting Bugs
Please file bugs in the [GitHub Issue
Tracker](https://github.com/syncthing/syncthing/issues). Include at
least the following:
- What happened
- What did you expect to happen instead of what *did* happen, if it's
not crazy obvious
- What operating system, operating system version and version of
Syncthing you are running
- The same for other connected devices, where relevant
- Screenshot if the issue concerns something visible in the GUI
- Console log entries, where possible and relevant
If you're not sure whether something is relevant, erring on the side of
too much information will never get you yelled at. :)
## Contributing Translations
All translations are done via
[Weblate](https://hosted.weblate.org/projects/syncthing/). If you wish
to contribute to a translation, just head over there and sign up.
Before every release, the language resources are updated from the
latest info on Weblate.
Note that the previously used service at
[Transifex](https://www.transifex.com/projects/p/syncthing/) is being
retired and we kindly ask you to sign up on Weblate for continued
involvement.
## Contributing Documentation
Updates to the [documentation site](https://docs.syncthing.net/) can be
made as pull requests on the [documentation
repository](https://github.com/syncthing/docs).
## Contributing Code
Every contribution is welcome. If you want to contribute but are unsure
where to start, any open issues are fair game! Here's a short rundown of
what you need to keep in mind:
- Don't worry. You are not expected to get everything right on the first
attempt, we'll guide you through it.
- Make sure there is an
[issue](https://github.com/syncthing/syncthing/issues) that describes the
change you want to do. If the thing you want to do does not have an issue
yet, please file one before starting work on it.
- Fork the repository and make your changes in a new branch. Once it's ready
for review, create a pull request.
### Authorship
All code authors are listed in the AUTHORS file. When your first pull
request is accepted your details are added to the AUTHORS file and the list
of authors in the GUI. Commits must be made with the same name and email as
listed in the AUTHORS file. To accomplish this, ensure that your git
configuration is set correctly prior to making your first commit:
$ git config --global user.name "Jane Doe"
$ git config --global user.email janedoe@example.com
You must be reachable on the given email address. If you do not wish to use
your real name for whatever reason, using a nickname or pseudonym is
perfectly acceptable.
### The Developer Certificate of Origin (DCO)
The Syncthing project requires the Developer Certificate of Origin (DCO)
sign-off on pull requests (PRs). This means that all commit messages must
contain a signature line to indicate that the developer accepts the DCO.
The DCO is a lightweight way for contributors to certify that they wrote (or
otherwise have the right to submit) the code and changes they are
contributing to the project. Here is the full [text of the
DCO](https://developercertificate.org):
---
By making a contribution to this project, I certify that:
1. The contribution was created in whole or in part by me and I have the
right to submit it under the open source license indicated in the file;
or
2. The contribution is based upon previous work that, to the best of my
knowledge, is covered under an appropriate open source license and I have
the right under that license to submit that work with modifications,
whether created in whole or in part by me, under the same open source
license (unless I am permitted to submit under a different license), as
indicated in the file; or
3. The contribution was provided directly to me by some other person who
certified (1), (2) or (3) and I have not modified it.
4. I understand and agree that this project and the contribution are public
and that a record of the contribution (including all personal information
I submit with it, including my sign-off) is maintained indefinitely and
may be redistributed consistent with this project or the open source
license(s) involved.
---
Contributors indicate that they adhere to these requirements by adding
a `Signed-off-by` line to their commit messages. For example:
This is my commit message
Signed-off-by: Random J Developer <random@developer.example.org>
The name and email address in this line must match those of the committing
author, and be the same as what you want in the AUTHORS file as per above.
### Coding Style
#### General
- All text files use Unix line endings. The git settings already present in
the repository attempt to enforce this.
- When making changes, follow the brace and parenthesis style of the
surrounding code.
#### Go Specific
- Follow the conventions laid out in [Effective
Go](https://go.dev/doc/effective_go) as much as makes sense. The review
guidelines in [Go Code Review
Comments](https://github.com/golang/go/wiki/CodeReviewComments) should
generally be followed.
- Each commit should be `go fmt` clean.
- Imports are grouped per `goimports` standard; that is, standard
library first, then third party libraries after a blank line.
### Commits
- Commit messages (and pull request titles) should follow the [conventional
commits](https://www.conventionalcommits.org/en/v1.0.0/) specification and
be in lower case.
- We use a scope description in the commit message subject. This is the
component of Syncthing that the commit affects. For example, `gui`,
`protocol`, `scanner`, `upnp`, etc -- typically, the part after
`internal/`, `lib/` or `cmd/` in the package path. If the commit doesn't
affect a specific component, such as for changes to the build system or
documentation, the scope should be omitted. The same goes for changes that
affect many components which would be cumbersome to list.
- Commits that resolve an existing issue must include the issue number
as `(fixes #123)` at the end of the commit message subject. A correctly
formatted commit message subject looks like this:
feat(dialer): add env var to disable proxy fallback (fixes #3006)
- If the commit message subject doesn't say it all, one or more paragraphs of
describing text should be added to the commit message. This should explain
why the change is made and what it accomplishes.
- When drafting a pull request, please feel free to add commits with
corrections and merge from `main` when necessary. This provides a clear time
line with changes and simplifies review. Do not, in general, rebase your
commits, as this makes review harder.
- Pull requests are merged to `main` using squash merge. The "stream of
consciousness" set of commits described in the previous point will be reduced
to a single commit at merge time. The pull request title and description will
be used as the commit message.
### Tests
Yes please, do add tests when adding features or fixing bugs. Also, when a
pull request is filed a number of automatic tests are run on the code. This
includes:
- That the code actually builds and the test suite passes.
- That the code is correctly formatted (`go fmt`).
- That the commits are based on a reasonably recent `main`.
- That the output from `go lint` and `go vet` is clean. (This checks for a
number of potential problems the compiler doesn't catch.)
Please do contribute! If you want to contribute but are unsure where to
start, the [Contributions Needed
topic](http://discourse.syncthing.net/t/contributions-needed/49)
lists areas in need of attention.
## Licensing
All contributions are made available under the same license as the already
existing material being contributed to. For most of the project and unless
otherwise stated this means MPLv2, but there are exceptions:
All contributions are made under the same MIT License as the rest of the
project, except documentation which is licensed under the Creative
Commons Attribution 4.0 International License. You retain the copyright
to code you have written.
- Certain commands (under cmd/...) may have a separate license, indicated by
the presence of a LICENSE file in the corresponding directory.
When accepting your first contribution, the maintainer of the project
will ensure that you are added to the CONTRIBUTORS file.
- The documentation (man/...) is licensed under the Creative Commons
Attribution 4.0 International License.
## Building
Regardless of the license in effect, you retain the copyright to your
contribution.
[See the
documentation](http://discourse.syncthing.net/t/building-syncthing/44)
## Branches
- `master` is the main branch containing good code that will end up in
the next release. You should base your work on it. It won't ever be
rebased or force-pushed to.
- `vx.y` branches exist to make patch releases on otherwise obsolete
minor releases. Should only contain fixes cherry picked from master.
Don't base any work on them.
- Other branches are probably topic branches and may be subject to
rebasing. Don't base any work on them unless you specifically know
otherwise.
## Tags
All releases are tagged semver style as `vx.y.z`. Release tags are
signed by GPG key BCE524C7.
## Tests
Yes please!
## Style
`go fmt`
## Documentation
[Over here!](http://discourse.syncthing.net/category/documentation)
## License
MIT

9
CONTRIBUTORS Normal file
View File

@@ -0,0 +1,9 @@
Aaron Bieber <qbit@deftly.net>
Andrew Dunham <andrew@du.nham.ca>
Arthur Axel fREW Schmidt <frew@afoolishmanifesto.com>
Brandon Philips <brandon@ifup.org>
James Patterson <jamespatterson@operamail.com>
Jens Diemer <github.com@jensdiemer.de>
Philippe Schommers <philippe@schommers.be>
Ryan Sullivan <kayoticsully@gmail.com>
Veeti Paananen <veeti.paananen@rojekti.fi>

View File

@@ -1,57 +0,0 @@
ARG GOVERSION=latest
#
# Maybe build Syncthing. This is a bit ugly as we can't make an entire
# section of the Dockerfile conditional, so we end up always pulling the
# golang image as builder. Then we check if the executable we need already
# exists (pre-built) otherwise we build it.
#
FROM golang:$GOVERSION AS builder
ARG BUILD_USER
ARG BUILD_HOST
ARG TARGETARCH
WORKDIR /src
COPY . .
ENV CGO_ENABLED=0
RUN if [ ! -f syncthing-linux-$TARGETARCH ] ; then \
go run build.go -no-upgrade build syncthing ; \
mv syncthing syncthing-linux-$TARGETARCH ; \
fi
#
# The rest of the Dockerfile uses the binary from the builder, prebuilt or
# not.
#
FROM alpine
ARG TARGETARCH
LABEL org.opencontainers.image.authors="The Syncthing Project" \
org.opencontainers.image.url="https://syncthing.net" \
org.opencontainers.image.documentation="https://docs.syncthing.net" \
org.opencontainers.image.source="https://github.com/syncthing/syncthing" \
org.opencontainers.image.vendor="The Syncthing Project" \
org.opencontainers.image.licenses="MPL-2.0" \
org.opencontainers.image.title="Syncthing"
EXPOSE 8384 22000/tcp 22000/udp 21027/udp
VOLUME ["/var/syncthing"]
RUN apk add --no-cache ca-certificates curl libcap su-exec tzdata
COPY --from=builder /src/syncthing-linux-$TARGETARCH /bin/syncthing
COPY --from=builder /src/script/docker-entrypoint.sh /bin/entrypoint.sh
ENV PUID=1000 PGID=1000 HOME=/var/syncthing
HEALTHCHECK --interval=1m --timeout=10s \
CMD curl -fkLsS -m 2 127.0.0.1:8384/rest/noauth/health | grep -o --color=never OK || exit 1
ENV STGUIADDRESS=0.0.0.0:8384
ENV STHOMEDIR=/var/syncthing/config
RUN chmod 755 /bin/entrypoint.sh
ENTRYPOINT ["/bin/entrypoint.sh", "/bin/syncthing"]

View File

@@ -1,17 +0,0 @@
ARG GOVERSION=latest
FROM golang:$GOVERSION
LABEL org.opencontainers.image.authors="The Syncthing Project" \
org.opencontainers.image.url="https://syncthing.net" \
org.opencontainers.image.documentation="https://docs.syncthing.net" \
org.opencontainers.image.source="https://github.com/syncthing/syncthing" \
org.opencontainers.image.vendor="The Syncthing Project" \
org.opencontainers.image.licenses="MPL-2.0" \
org.opencontainers.image.title="Syncthing Builder"
# FPM to build Debian packages
RUN apt-get update && apt-get install -y --no-install-recommends \
locales rubygems ruby-dev build-essential git \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* \
&& gem install fpm

View File

@@ -1,16 +0,0 @@
FROM alpine
ARG TARGETARCH
LABEL org.opencontainers.image.authors="The Syncthing Project" \
org.opencontainers.image.url="https://syncthing.net" \
org.opencontainers.image.documentation="https://docs.syncthing.net" \
org.opencontainers.image.source="https://github.com/syncthing/syncthing" \
org.opencontainers.image.vendor="The Syncthing Project" \
org.opencontainers.image.licenses="MPL-2.0" \
org.opencontainers.image.title="Syncthing Crash Receiver"
EXPOSE 8080
COPY stcrashreceiver-linux-${TARGETARCH} /bin/stcrashreceiver
ENTRYPOINT [ "/bin/stcrashreceiver" ]

View File

@@ -1,42 +0,0 @@
ARG GOVERSION=latest
FROM golang:$GOVERSION AS builder
ARG BUILD_USER
ARG BUILD_HOST
ARG TARGETARCH
WORKDIR /src
COPY . .
ENV CGO_ENABLED=0
RUN if [ ! -f stdiscosrv-linux-$TARGETARCH ] ; then \
go run build.go -no-upgrade build stdiscosrv ; \
mv stdiscosrv stdiscosrv-linux-$TARGETARCH ; \
fi
FROM alpine
ARG TARGETARCH
LABEL org.opencontainers.image.authors="The Syncthing Project" \
org.opencontainers.image.url="https://syncthing.net" \
org.opencontainers.image.documentation="https://docs.syncthing.net" \
org.opencontainers.image.source="https://github.com/syncthing/syncthing" \
org.opencontainers.image.vendor="The Syncthing Project" \
org.opencontainers.image.licenses="MPL-2.0" \
org.opencontainers.image.title="Syncthing Discovery Server"
EXPOSE 19200 8443
VOLUME ["/var/stdiscosrv"]
RUN apk add --no-cache ca-certificates su-exec
COPY --from=builder /src/stdiscosrv-linux-$TARGETARCH /bin/stdiscosrv
COPY --from=builder /src/script/docker-entrypoint.sh /bin/entrypoint.sh
ENV PUID=1000 PGID=1000 HOME=/var/stdiscosrv
HEALTHCHECK --interval=1m --timeout=10s \
CMD nc -z localhost 8443 || exit 1
WORKDIR /var/stdiscosrv
ENTRYPOINT ["/bin/entrypoint.sh", "/bin/stdiscosrv"]

View File

@@ -1,16 +0,0 @@
FROM alpine
ARG TARGETARCH
LABEL org.opencontainers.image.authors="The Syncthing Project" \
org.opencontainers.image.url="https://syncthing.net" \
org.opencontainers.image.documentation="https://docs.syncthing.net" \
org.opencontainers.image.source="https://github.com/syncthing/syncthing" \
org.opencontainers.image.vendor="The Syncthing Project" \
org.opencontainers.image.licenses="MPL-2.0" \
org.opencontainers.image.title="Syncthing Relay Pool Server"
EXPOSE 8080
COPY strelaypoolsrv-linux-${TARGETARCH} /bin/strelaypoolsrv
ENTRYPOINT ["/bin/strelaypoolsrv", "-listen", ":8080"]

View File

@@ -1,42 +0,0 @@
ARG GOVERSION=latest
FROM golang:$GOVERSION AS builder
ARG BUILD_USER
ARG BUILD_HOST
ARG TARGETARCH
WORKDIR /src
COPY . .
ENV CGO_ENABLED=0
RUN if [ ! -f strelaysrv-linux-$TARGETARCH ] ; then \
go run build.go -no-upgrade build strelaysrv ; \
mv strelaysrv strelaysrv-linux-$TARGETARCH ; \
fi
FROM alpine
ARG TARGETARCH
LABEL org.opencontainers.image.authors="The Syncthing Project" \
org.opencontainers.image.url="https://syncthing.net" \
org.opencontainers.image.documentation="https://docs.syncthing.net" \
org.opencontainers.image.source="https://github.com/syncthing/syncthing" \
org.opencontainers.image.vendor="The Syncthing Project" \
org.opencontainers.image.licenses="MPL-2.0" \
org.opencontainers.image.title="Syncthing Relay Server"
EXPOSE 22067 22070
VOLUME ["/var/strelaysrv"]
RUN apk add --no-cache ca-certificates su-exec
COPY --from=builder /src/strelaysrv-linux-$TARGETARCH /bin/strelaysrv
COPY --from=builder /src/script/docker-entrypoint.sh /bin/entrypoint.sh
ENV PUID=1000 PGID=1000 HOME=/var/strelaysrv
HEALTHCHECK --interval=1m --timeout=10s \
CMD nc -z localhost 22067 || exit 1
WORKDIR /var/strelaysrv
ENTRYPOINT ["/bin/entrypoint.sh", "/bin/strelaysrv"]

View File

@@ -1,16 +0,0 @@
FROM alpine
ARG TARGETARCH
LABEL org.opencontainers.image.authors="The Syncthing Project" \
org.opencontainers.image.url="https://syncthing.net" \
org.opencontainers.image.documentation="https://docs.syncthing.net" \
org.opencontainers.image.source="https://github.com/syncthing/syncthing" \
org.opencontainers.image.vendor="The Syncthing Project" \
org.opencontainers.image.licenses="MPL-2.0" \
org.opencontainers.image.title="Syncthing Upgrades"
EXPOSE 8080
COPY stupgrades-linux-${TARGETARCH} /bin/stupgrades
ENTRYPOINT [ "/bin/stupgrades" ]

View File

@@ -1,16 +0,0 @@
FROM alpine
ARG TARGETARCH
LABEL org.opencontainers.image.authors="The Syncthing Project" \
org.opencontainers.image.url="https://syncthing.net" \
org.opencontainers.image.documentation="https://docs.syncthing.net" \
org.opencontainers.image.source="https://github.com/syncthing/syncthing" \
org.opencontainers.image.vendor="The Syncthing Project" \
org.opencontainers.image.licenses="MPL-2.0" \
org.opencontainers.image.title="Syncthing Usage Reporting Server"
EXPOSE 8080
COPY ursrv-linux-${TARGETARCH} /bin/ursrv
ENTRYPOINT [ "/bin/ursrv" ]

View File

@@ -1,83 +0,0 @@
# The Syncthing Goals
Syncthing is a **continuous file synchronization program**. It synchronizes
files between two or more computers. We strive to fulfill the goals below.
The goals are listed in order of importance, the most important one being
the first.
> "Syncing files" here is precise. It means we specifically exclude things
> that are not files - calendar items, instant messages, and so on. If those
> are in fact stored as files on disk, they can of course be synced as
> files.
Syncthing should be:
### 1. Safe From Data Loss
Protecting the user's data is paramount. We take every reasonable precaution
to avoid corrupting the user's files.
> This is the overriding goal, without which synchronizing files becomes
> pointless. This means that we do not make unsafe trade offs for the sake
> of performance or, in some cases, even usability.
### 2. Secure Against Attackers
Again, protecting the user's data is paramount. Regardless of our other
goals, we must never allow the user's data to be susceptible to eavesdropping
or modification by unauthorized parties.
> This should be understood in context. It is not necessarily reasonable to
> expect Syncthing to be resistant against well equipped state level
> attackers. We will, however, do our best. Note also that this is different
> from anonymity which is not, currently, a goal.
### 3. Easy to Use
Syncthing should be approachable, understandable, and inclusive.
> Complex concepts and maths form the base of Syncthing's functionality.
> This should nonetheless be abstracted or hidden to a degree where
> Syncthing is usable by the general public.
### 4. Automatic
User interaction should be required only when absolutely necessary.
> Specifically this means that changes to files are picked up without
> prompting, conflicts are resolved without prompting and connections are
> maintained without prompting. We only prompt the user when it is required
> to fulfill one of the (overriding) Secure, Safe or Easy goals.
### 5. Universally Available
Syncthing should run on every common computer. We are mindful that the
latest technology is not always available to every individual.
> Computers include desktops, laptops, servers, virtual machines, small
> general purpose computers such as Raspberry Pis and, *where possible*,
> tablets and phones. NAS appliances, toasters, cars, firearms, thermostats,
> and so on may include computing capabilities but it is not our goal for
> Syncthing to run smoothly on these devices.
### 6. For Individuals
Syncthing is primarily about empowering the individual user with safe,
secure, and easy to use file synchronization.
> We acknowledge that it's also useful in an enterprise setting and include
> functionality to support that. If this is in conflict with the
> requirements of the individual, those will however take priority.
### 7. Everything Else
There are many things we care about that don't make it on to the list. It is
fine to optimize for these values as well, as long as they are not in
conflict with the stated goals above.
> For example, performance is a thing we care about. We just don't care more
> about it than safety, security, etc. Maintainability of the code base and
> providing entertainment value for the maintainers are also things that
> matter. It is understood that there are aspects of Syncthing that are
> suboptimal or even in opposition with the goals above. However, we
> continuously strive to align Syncthing more and more with these goals.

65
Godeps/Godeps.json generated Normal file
View File

@@ -0,0 +1,65 @@
{
"ImportPath": "github.com/calmh/syncthing",
"GoVersion": "go1.3",
"Packages": [
"./cmd/syncthing",
"./cmd/assets",
"./discover/cmd/discosrv"
],
"Deps": [
{
"ImportPath": "bitbucket.org/kardianos/osext",
"Comment": "null-9",
"Rev": "364fb577de68fb646c4cb39cc0e09c887ee16376"
},
{
"ImportPath": "code.google.com/p/go.crypto/bcrypt",
"Comment": "null-185",
"Rev": "6478cc9340cbbe6c04511280c5007722269108e9"
},
{
"ImportPath": "code.google.com/p/go.crypto/blowfish",
"Comment": "null-185",
"Rev": "6478cc9340cbbe6c04511280c5007722269108e9"
},
{
"ImportPath": "code.google.com/p/go.text/transform",
"Comment": "null-81",
"Rev": "9cbe983aed9b0dfc73954433fead5e00866342ac"
},
{
"ImportPath": "code.google.com/p/go.text/unicode/norm",
"Comment": "null-81",
"Rev": "9cbe983aed9b0dfc73954433fead5e00866342ac"
},
{
"ImportPath": "github.com/codegangsta/inject",
"Rev": "9aea7a2fa5b79ef7fc00f63a575e72df33b4e886"
},
{
"ImportPath": "github.com/codegangsta/martini",
"Comment": "v0.1-142-g8659df7",
"Rev": "8659df7a51aebe6c6120268cd5a8b4c34fa8441a"
},
{
"ImportPath": "github.com/golang/groupcache/lru",
"Rev": "d781998583680cda80cf61e0b37dd0cd8da2eb52"
},
{
"ImportPath": "github.com/juju/ratelimit",
"Rev": "cbaa435c80a9716e086f25d409344b26c4039358"
},
{
"ImportPath": "github.com/vitrun/qart/coding",
"Rev": "ccb109cf25f0cd24474da73b9fee4e7a3e8a8ce0"
},
{
"ImportPath": "github.com/vitrun/qart/gf256",
"Rev": "ccb109cf25f0cd24474da73b9fee4e7a3e8a8ce0"
},
{
"ImportPath": "github.com/vitrun/qart/qr",
"Rev": "ccb109cf25f0cd24474da73b9fee4e7a3e8a8ce0"
}
]
}

5
Godeps/Readme generated Normal file
View File

@@ -0,0 +1,5 @@
This directory tree is generated automatically by godep.
Please do not edit.
See https://github.com/tools/godep for more information.

2
Godeps/_workspace/.gitignore generated vendored Normal file
View File

@@ -0,0 +1,2 @@
/pkg
/bin

View File

@@ -0,0 +1,20 @@
Copyright (c) 2012 Daniel Theophanes
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source
distribution.

View File

@@ -0,0 +1,32 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Extensions to the standard "os" package.
package osext
import "path/filepath"
// Executable returns an absolute path that can be used to
// re-invoke the current program.
// It may not be valid after the current program exits.
func Executable() (string, error) {
p, err := executable()
return filepath.Clean(p), err
}
// Returns same path as Executable, returns just the folder
// path. Excludes the executable name.
func ExecutableFolder() (string, error) {
p, err := Executable()
if err != nil {
return "", err
}
folder, _ := filepath.Split(p)
return folder, nil
}
// Depricated. Same as Executable().
func GetExePath() (exePath string, err error) {
return Executable()
}

View File

@@ -0,0 +1,16 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package osext
import "syscall"
func executable() (string, error) {
f, err := Open("/proc/" + itoa(Getpid()) + "/text")
if err != nil {
return "", err
}
defer f.Close()
return syscall.Fd2path(int(f.Fd()))
}

View File

@@ -0,0 +1,25 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build linux netbsd openbsd
package osext
import (
"errors"
"os"
"runtime"
)
func executable() (string, error) {
switch runtime.GOOS {
case "linux":
return os.Readlink("/proc/self/exe")
case "netbsd":
return os.Readlink("/proc/curproc/exe")
case "openbsd":
return os.Readlink("/proc/curproc/file")
}
return "", errors.New("ExecPath not implemented for " + runtime.GOOS)
}

View File

@@ -0,0 +1,82 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build darwin freebsd
package osext
import (
"os"
"path/filepath"
"runtime"
"syscall"
"unsafe"
)
var startUpcwd, getwdError = os.Getwd()
func executable() (string, error) {
var mib [4]int32
switch runtime.GOOS {
case "freebsd":
mib = [4]int32{1 /* CTL_KERN */, 14 /* KERN_PROC */, 12 /* KERN_PROC_PATHNAME */, -1}
case "darwin":
mib = [4]int32{1 /* CTL_KERN */, 38 /* KERN_PROCARGS */, int32(os.Getpid()), -1}
}
n := uintptr(0)
// get length
_, _, err := syscall.Syscall6(syscall.SYS___SYSCTL, uintptr(unsafe.Pointer(&mib[0])), 4, 0, uintptr(unsafe.Pointer(&n)), 0, 0)
if err != 0 {
return "", err
}
if n == 0 { // shouldn't happen
return "", nil
}
buf := make([]byte, n)
_, _, err = syscall.Syscall6(syscall.SYS___SYSCTL, uintptr(unsafe.Pointer(&mib[0])), 4, uintptr(unsafe.Pointer(&buf[0])), uintptr(unsafe.Pointer(&n)), 0, 0)
if err != 0 {
return "", err
}
if n == 0 { // shouldn't happen
return "", nil
}
for i, v := range buf {
if v == 0 {
buf = buf[:i]
break
}
}
var strpath string
if buf[0] != '/' {
var e error
if strpath, e = getAbs(buf); e != nil {
return strpath, e
}
} else {
strpath = string(buf)
}
// darwin KERN_PROCARGS may return the path to a symlink rather than the
// actual executable
if runtime.GOOS == "darwin" {
if strpath, err := filepath.EvalSymlinks(strpath); err != nil {
return strpath, err
}
}
return strpath, nil
}
func getAbs(buf []byte) (string, error) {
if getwdError != nil {
return string(buf), getwdError
} else {
if buf[0] == '.' {
buf = buf[1:]
}
if startUpcwd[len(startUpcwd)-1] != '/' && buf[0] != '/' {
return startUpcwd + "/" + string(buf), nil
}
return startUpcwd + string(buf), nil
}
}

View File

@@ -0,0 +1,79 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build darwin linux freebsd netbsd windows
package osext
import (
"fmt"
"os"
oexec "os/exec"
"path/filepath"
"runtime"
"testing"
)
const execPath_EnvVar = "OSTEST_OUTPUT_EXECPATH"
func TestExecPath(t *testing.T) {
ep, err := Executable()
if err != nil {
t.Fatalf("ExecPath failed: %v", err)
}
// we want fn to be of the form "dir/prog"
dir := filepath.Dir(filepath.Dir(ep))
fn, err := filepath.Rel(dir, ep)
if err != nil {
t.Fatalf("filepath.Rel: %v", err)
}
cmd := &oexec.Cmd{}
// make child start with a relative program path
cmd.Dir = dir
cmd.Path = fn
// forge argv[0] for child, so that we can verify we could correctly
// get real path of the executable without influenced by argv[0].
cmd.Args = []string{"-", "-test.run=XXXX"}
cmd.Env = []string{fmt.Sprintf("%s=1", execPath_EnvVar)}
out, err := cmd.CombinedOutput()
if err != nil {
t.Fatalf("exec(self) failed: %v", err)
}
outs := string(out)
if !filepath.IsAbs(outs) {
t.Fatalf("Child returned %q, want an absolute path", out)
}
if !sameFile(outs, ep) {
t.Fatalf("Child returned %q, not the same file as %q", out, ep)
}
}
func sameFile(fn1, fn2 string) bool {
fi1, err := os.Stat(fn1)
if err != nil {
return false
}
fi2, err := os.Stat(fn2)
if err != nil {
return false
}
return os.SameFile(fi1, fi2)
}
func init() {
if e := os.Getenv(execPath_EnvVar); e != "" {
// first chdir to another path
dir := "/"
if runtime.GOOS == "windows" {
dir = filepath.VolumeName(".")
}
os.Chdir(dir)
if ep, err := Executable(); err != nil {
fmt.Fprint(os.Stderr, "ERROR: ", err)
} else {
fmt.Fprint(os.Stderr, ep)
}
os.Exit(0)
}
}

View File

@@ -0,0 +1,34 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package osext
import (
"syscall"
"unicode/utf16"
"unsafe"
)
var (
kernel = syscall.MustLoadDLL("kernel32.dll")
getModuleFileNameProc = kernel.MustFindProc("GetModuleFileNameW")
)
// GetModuleFileName() with hModule = NULL
func executable() (exePath string, err error) {
return getModuleFileName()
}
func getModuleFileName() (string, error) {
var n uint32
b := make([]uint16, syscall.MAX_PATH)
size := uint32(len(b))
r0, _, e1 := getModuleFileNameProc.Call(0, uintptr(unsafe.Pointer(&b[0])), uintptr(size))
n = uint32(r0)
if n == 0 {
return "", e1
}
return string(utf16.Decode(b[0:n])), nil
}

View File

@@ -0,0 +1,35 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bcrypt
import "encoding/base64"
const alphabet = "./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
var bcEncoding = base64.NewEncoding(alphabet)
func base64Encode(src []byte) []byte {
n := bcEncoding.EncodedLen(len(src))
dst := make([]byte, n)
bcEncoding.Encode(dst, src)
for dst[n-1] == '=' {
n--
}
return dst[:n]
}
func base64Decode(src []byte) ([]byte, error) {
numOfEquals := 4 - (len(src) % 4)
for i := 0; i < numOfEquals; i++ {
src = append(src, '=')
}
dst := make([]byte, bcEncoding.DecodedLen(len(src)))
n, err := bcEncoding.Decode(dst, src)
if err != nil {
return nil, err
}
return dst[:n], nil
}

View File

@@ -0,0 +1,294 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package bcrypt implements Provos and Mazières's bcrypt adaptive hashing
// algorithm. See http://www.usenix.org/event/usenix99/provos/provos.pdf
package bcrypt
// The code is a port of Provos and Mazières's C implementation.
import (
"code.google.com/p/go.crypto/blowfish"
"crypto/rand"
"crypto/subtle"
"errors"
"fmt"
"io"
"strconv"
)
const (
MinCost int = 4 // the minimum allowable cost as passed in to GenerateFromPassword
MaxCost int = 31 // the maximum allowable cost as passed in to GenerateFromPassword
DefaultCost int = 10 // the cost that will actually be set if a cost below MinCost is passed into GenerateFromPassword
)
// The error returned from CompareHashAndPassword when a password and hash do
// not match.
var ErrMismatchedHashAndPassword = errors.New("crypto/bcrypt: hashedPassword is not the hash of the given password")
// The error returned from CompareHashAndPassword when a hash is too short to
// be a bcrypt hash.
var ErrHashTooShort = errors.New("crypto/bcrypt: hashedSecret too short to be a bcrypted password")
// The error returned from CompareHashAndPassword when a hash was created with
// a bcrypt algorithm newer than this implementation.
type HashVersionTooNewError byte
func (hv HashVersionTooNewError) Error() string {
return fmt.Sprintf("crypto/bcrypt: bcrypt algorithm version '%c' requested is newer than current version '%c'", byte(hv), majorVersion)
}
// The error returned from CompareHashAndPassword when a hash starts with something other than '$'
type InvalidHashPrefixError byte
func (ih InvalidHashPrefixError) Error() string {
return fmt.Sprintf("crypto/bcrypt: bcrypt hashes must start with '$', but hashedSecret started with '%c'", byte(ih))
}
type InvalidCostError int
func (ic InvalidCostError) Error() string {
return fmt.Sprintf("crypto/bcrypt: cost %d is outside allowed range (%d,%d)", int(ic), int(MinCost), int(MaxCost))
}
const (
majorVersion = '2'
minorVersion = 'a'
maxSaltSize = 16
maxCryptedHashSize = 23
encodedSaltSize = 22
encodedHashSize = 31
minHashSize = 59
)
// magicCipherData is an IV for the 64 Blowfish encryption calls in
// bcrypt(). It's the string "OrpheanBeholderScryDoubt" in big-endian bytes.
var magicCipherData = []byte{
0x4f, 0x72, 0x70, 0x68,
0x65, 0x61, 0x6e, 0x42,
0x65, 0x68, 0x6f, 0x6c,
0x64, 0x65, 0x72, 0x53,
0x63, 0x72, 0x79, 0x44,
0x6f, 0x75, 0x62, 0x74,
}
type hashed struct {
hash []byte
salt []byte
cost int // allowed range is MinCost to MaxCost
major byte
minor byte
}
// GenerateFromPassword returns the bcrypt hash of the password at the given
// cost. If the cost given is less than MinCost, the cost will be set to
// DefaultCost, instead. Use CompareHashAndPassword, as defined in this package,
// to compare the returned hashed password with its cleartext version.
func GenerateFromPassword(password []byte, cost int) ([]byte, error) {
p, err := newFromPassword(password, cost)
if err != nil {
return nil, err
}
return p.Hash(), nil
}
// CompareHashAndPassword compares a bcrypt hashed password with its possible
// plaintext equivalent. Returns nil on success, or an error on failure.
func CompareHashAndPassword(hashedPassword, password []byte) error {
p, err := newFromHash(hashedPassword)
if err != nil {
return err
}
otherHash, err := bcrypt(password, p.cost, p.salt)
if err != nil {
return err
}
otherP := &hashed{otherHash, p.salt, p.cost, p.major, p.minor}
if subtle.ConstantTimeCompare(p.Hash(), otherP.Hash()) == 1 {
return nil
}
return ErrMismatchedHashAndPassword
}
// Cost returns the hashing cost used to create the given hashed
// password. When, in the future, the hashing cost of a password system needs
// to be increased in order to adjust for greater computational power, this
// function allows one to establish which passwords need to be updated.
func Cost(hashedPassword []byte) (int, error) {
p, err := newFromHash(hashedPassword)
if err != nil {
return 0, err
}
return p.cost, nil
}
func newFromPassword(password []byte, cost int) (*hashed, error) {
if cost < MinCost {
cost = DefaultCost
}
p := new(hashed)
p.major = majorVersion
p.minor = minorVersion
err := checkCost(cost)
if err != nil {
return nil, err
}
p.cost = cost
unencodedSalt := make([]byte, maxSaltSize)
_, err = io.ReadFull(rand.Reader, unencodedSalt)
if err != nil {
return nil, err
}
p.salt = base64Encode(unencodedSalt)
hash, err := bcrypt(password, p.cost, p.salt)
if err != nil {
return nil, err
}
p.hash = hash
return p, err
}
func newFromHash(hashedSecret []byte) (*hashed, error) {
if len(hashedSecret) < minHashSize {
return nil, ErrHashTooShort
}
p := new(hashed)
n, err := p.decodeVersion(hashedSecret)
if err != nil {
return nil, err
}
hashedSecret = hashedSecret[n:]
n, err = p.decodeCost(hashedSecret)
if err != nil {
return nil, err
}
hashedSecret = hashedSecret[n:]
// The "+2" is here because we'll have to append at most 2 '=' to the salt
// when base64 decoding it in expensiveBlowfishSetup().
p.salt = make([]byte, encodedSaltSize, encodedSaltSize+2)
copy(p.salt, hashedSecret[:encodedSaltSize])
hashedSecret = hashedSecret[encodedSaltSize:]
p.hash = make([]byte, len(hashedSecret))
copy(p.hash, hashedSecret)
return p, nil
}
func bcrypt(password []byte, cost int, salt []byte) ([]byte, error) {
cipherData := make([]byte, len(magicCipherData))
copy(cipherData, magicCipherData)
c, err := expensiveBlowfishSetup(password, uint32(cost), salt)
if err != nil {
return nil, err
}
for i := 0; i < 24; i += 8 {
for j := 0; j < 64; j++ {
c.Encrypt(cipherData[i:i+8], cipherData[i:i+8])
}
}
// Bug compatibility with C bcrypt implementations. We only encode 23 of
// the 24 bytes encrypted.
hsh := base64Encode(cipherData[:maxCryptedHashSize])
return hsh, nil
}
func expensiveBlowfishSetup(key []byte, cost uint32, salt []byte) (*blowfish.Cipher, error) {
csalt, err := base64Decode(salt)
if err != nil {
return nil, err
}
// Bug compatibility with C bcrypt implementations. They use the trailing
// NULL in the key string during expansion.
ckey := append(key, 0)
c, err := blowfish.NewSaltedCipher(ckey, csalt)
if err != nil {
return nil, err
}
var i, rounds uint64
rounds = 1 << cost
for i = 0; i < rounds; i++ {
blowfish.ExpandKey(ckey, c)
blowfish.ExpandKey(csalt, c)
}
return c, nil
}
func (p *hashed) Hash() []byte {
arr := make([]byte, 60)
arr[0] = '$'
arr[1] = p.major
n := 2
if p.minor != 0 {
arr[2] = p.minor
n = 3
}
arr[n] = '$'
n += 1
copy(arr[n:], []byte(fmt.Sprintf("%02d", p.cost)))
n += 2
arr[n] = '$'
n += 1
copy(arr[n:], p.salt)
n += encodedSaltSize
copy(arr[n:], p.hash)
n += encodedHashSize
return arr[:n]
}
func (p *hashed) decodeVersion(sbytes []byte) (int, error) {
if sbytes[0] != '$' {
return -1, InvalidHashPrefixError(sbytes[0])
}
if sbytes[1] > majorVersion {
return -1, HashVersionTooNewError(sbytes[1])
}
p.major = sbytes[1]
n := 3
if sbytes[2] != '$' {
p.minor = sbytes[2]
n++
}
return n, nil
}
// sbytes should begin where decodeVersion left off.
func (p *hashed) decodeCost(sbytes []byte) (int, error) {
cost, err := strconv.Atoi(string(sbytes[0:2]))
if err != nil {
return -1, err
}
err = checkCost(cost)
if err != nil {
return -1, err
}
p.cost = cost
return 3, nil
}
func (p *hashed) String() string {
return fmt.Sprintf("&{hash: %#v, salt: %#v, cost: %d, major: %c, minor: %c}", string(p.hash), p.salt, p.cost, p.major, p.minor)
}
func checkCost(cost int) error {
if cost < MinCost || cost > MaxCost {
return InvalidCostError(cost)
}
return nil
}

View File

@@ -0,0 +1,217 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bcrypt
import (
"bytes"
"fmt"
"testing"
)
func TestBcryptingIsEasy(t *testing.T) {
pass := []byte("mypassword")
hp, err := GenerateFromPassword(pass, 0)
if err != nil {
t.Fatalf("GenerateFromPassword error: %s", err)
}
if CompareHashAndPassword(hp, pass) != nil {
t.Errorf("%v should hash %s correctly", hp, pass)
}
notPass := "notthepass"
err = CompareHashAndPassword(hp, []byte(notPass))
if err != ErrMismatchedHashAndPassword {
t.Errorf("%v and %s should be mismatched", hp, notPass)
}
}
func TestBcryptingIsCorrect(t *testing.T) {
pass := []byte("allmine")
salt := []byte("XajjQvNhvvRt5GSeFk1xFe")
expectedHash := []byte("$2a$10$XajjQvNhvvRt5GSeFk1xFeyqRrsxkhBkUiQeg0dt.wU1qD4aFDcga")
hash, err := bcrypt(pass, 10, salt)
if err != nil {
t.Fatalf("bcrypt blew up: %v", err)
}
if !bytes.HasSuffix(expectedHash, hash) {
t.Errorf("%v should be the suffix of %v", hash, expectedHash)
}
h, err := newFromHash(expectedHash)
if err != nil {
t.Errorf("Unable to parse %s: %v", string(expectedHash), err)
}
// This is not the safe way to compare these hashes. We do this only for
// testing clarity. Use bcrypt.CompareHashAndPassword()
if err == nil && !bytes.Equal(expectedHash, h.Hash()) {
t.Errorf("Parsed hash %v should equal %v", h.Hash(), expectedHash)
}
}
func TestTooLongPasswordsWork(t *testing.T) {
salt := []byte("XajjQvNhvvRt5GSeFk1xFe")
// One byte over the usual 56 byte limit that blowfish has
tooLongPass := []byte("012345678901234567890123456789012345678901234567890123456")
tooLongExpected := []byte("$2a$10$XajjQvNhvvRt5GSeFk1xFe5l47dONXg781AmZtd869sO8zfsHuw7C")
hash, err := bcrypt(tooLongPass, 10, salt)
if err != nil {
t.Fatalf("bcrypt blew up on long password: %v", err)
}
if !bytes.HasSuffix(tooLongExpected, hash) {
t.Errorf("%v should be the suffix of %v", hash, tooLongExpected)
}
}
type InvalidHashTest struct {
err error
hash []byte
}
var invalidTests = []InvalidHashTest{
{ErrHashTooShort, []byte("$2a$10$fooo")},
{ErrHashTooShort, []byte("$2a")},
{HashVersionTooNewError('3'), []byte("$3a$10$sssssssssssssssssssssshhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh")},
{InvalidHashPrefixError('%'), []byte("%2a$10$sssssssssssssssssssssshhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh")},
{InvalidCostError(32), []byte("$2a$32$sssssssssssssssssssssshhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh")},
}
func TestInvalidHashErrors(t *testing.T) {
check := func(name string, expected, err error) {
if err == nil {
t.Errorf("%s: Should have returned an error", name)
}
if err != nil && err != expected {
t.Errorf("%s gave err %v but should have given %v", name, err, expected)
}
}
for _, iht := range invalidTests {
_, err := newFromHash(iht.hash)
check("newFromHash", iht.err, err)
err = CompareHashAndPassword(iht.hash, []byte("anything"))
check("CompareHashAndPassword", iht.err, err)
}
}
func TestUnpaddedBase64Encoding(t *testing.T) {
original := []byte{101, 201, 101, 75, 19, 227, 199, 20, 239, 236, 133, 32, 30, 109, 243, 30}
encodedOriginal := []byte("XajjQvNhvvRt5GSeFk1xFe")
encoded := base64Encode(original)
if !bytes.Equal(encodedOriginal, encoded) {
t.Errorf("Encoded %v should have equaled %v", encoded, encodedOriginal)
}
decoded, err := base64Decode(encodedOriginal)
if err != nil {
t.Fatalf("base64Decode blew up: %s", err)
}
if !bytes.Equal(decoded, original) {
t.Errorf("Decoded %v should have equaled %v", decoded, original)
}
}
func TestCost(t *testing.T) {
suffix := "XajjQvNhvvRt5GSeFk1xFe5l47dONXg781AmZtd869sO8zfsHuw7C"
for _, vers := range []string{"2a", "2"} {
for _, cost := range []int{4, 10} {
s := fmt.Sprintf("$%s$%02d$%s", vers, cost, suffix)
h := []byte(s)
actual, err := Cost(h)
if err != nil {
t.Errorf("Cost, error: %s", err)
continue
}
if actual != cost {
t.Errorf("Cost, expected: %d, actual: %d", cost, actual)
}
}
}
_, err := Cost([]byte("$a$a$" + suffix))
if err == nil {
t.Errorf("Cost, malformed but no error returned")
}
}
func TestCostValidationInHash(t *testing.T) {
if testing.Short() {
return
}
pass := []byte("mypassword")
for c := 0; c < MinCost; c++ {
p, _ := newFromPassword(pass, c)
if p.cost != DefaultCost {
t.Errorf("newFromPassword should default costs below %d to %d, but was %d", MinCost, DefaultCost, p.cost)
}
}
p, _ := newFromPassword(pass, 14)
if p.cost != 14 {
t.Errorf("newFromPassword should default cost to 14, but was %d", p.cost)
}
hp, _ := newFromHash(p.Hash())
if p.cost != hp.cost {
t.Errorf("newFromHash should maintain the cost at %d, but was %d", p.cost, hp.cost)
}
_, err := newFromPassword(pass, 32)
if err == nil {
t.Fatalf("newFromPassword: should return a cost error")
}
if err != InvalidCostError(32) {
t.Errorf("newFromPassword: should return cost error, got %#v", err)
}
}
func TestCostReturnsWithLeadingZeroes(t *testing.T) {
hp, _ := newFromPassword([]byte("abcdefgh"), 7)
cost := hp.Hash()[4:7]
expected := []byte("07$")
if !bytes.Equal(expected, cost) {
t.Errorf("single digit costs in hash should have leading zeros: was %v instead of %v", cost, expected)
}
}
func TestMinorNotRequired(t *testing.T) {
noMinorHash := []byte("$2$10$XajjQvNhvvRt5GSeFk1xFeyqRrsxkhBkUiQeg0dt.wU1qD4aFDcga")
h, err := newFromHash(noMinorHash)
if err != nil {
t.Fatalf("No minor hash blew up: %s", err)
}
if h.minor != 0 {
t.Errorf("Should leave minor version at 0, but was %d", h.minor)
}
if !bytes.Equal(noMinorHash, h.Hash()) {
t.Errorf("Should generate hash %v, but created %v", noMinorHash, h.Hash())
}
}
func BenchmarkEqual(b *testing.B) {
b.StopTimer()
passwd := []byte("somepasswordyoulike")
hash, _ := GenerateFromPassword(passwd, 10)
b.StartTimer()
for i := 0; i < b.N; i++ {
CompareHashAndPassword(hash, passwd)
}
}
func BenchmarkGeneration(b *testing.B) {
b.StopTimer()
passwd := []byte("mylongpassword1234")
b.StartTimer()
for i := 0; i < b.N; i++ {
GenerateFromPassword(passwd, 10)
}
}

View File

@@ -0,0 +1,190 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package blowfish
// ExpandKey performs a key expansion on the given *Cipher. Specifically, it
// performs the Blowfish algorithm's key schedule which sets up the *Cipher's
// pi and substitution tables for calls to Encrypt. This is used, primarily,
// by the bcrypt package to reuse the Blowfish key schedule during its
// set up. It's unlikely that you need to use this directly.
func ExpandKey(key []byte, c *Cipher) {
j := 0
for i := 0; i < 18; i++ {
var d uint32
for k := 0; k < 4; k++ {
d = d<<8 | uint32(key[j])
j++
if j >= len(key) {
j = 0
}
}
c.p[i] ^= d
}
var l, r uint32
for i := 0; i < 18; i += 2 {
l, r = encryptBlock(l, r, c)
c.p[i], c.p[i+1] = l, r
}
for i := 0; i < 256; i += 2 {
l, r = encryptBlock(l, r, c)
c.s0[i], c.s0[i+1] = l, r
}
for i := 0; i < 256; i += 2 {
l, r = encryptBlock(l, r, c)
c.s1[i], c.s1[i+1] = l, r
}
for i := 0; i < 256; i += 2 {
l, r = encryptBlock(l, r, c)
c.s2[i], c.s2[i+1] = l, r
}
for i := 0; i < 256; i += 2 {
l, r = encryptBlock(l, r, c)
c.s3[i], c.s3[i+1] = l, r
}
}
// This is similar to ExpandKey, but folds the salt during the key
// schedule. While ExpandKey is essentially expandKeyWithSalt with an all-zero
// salt passed in, reusing ExpandKey turns out to be a place of inefficiency
// and specializing it here is useful.
func expandKeyWithSalt(key []byte, salt []byte, c *Cipher) {
j := 0
for i := 0; i < 18; i++ {
var d uint32
for k := 0; k < 4; k++ {
d = d<<8 | uint32(key[j])
j++
if j >= len(key) {
j = 0
}
}
c.p[i] ^= d
}
j = 0
var expandedSalt [4]uint32
for i := range expandedSalt {
var d uint32
for k := 0; k < 4; k++ {
d = d<<8 | uint32(salt[j])
j++
if j >= len(salt) {
j = 0
}
}
expandedSalt[i] = d
}
var l, r uint32
for i := 0; i < 18; i += 2 {
l ^= expandedSalt[i&2]
r ^= expandedSalt[(i&2)+1]
l, r = encryptBlock(l, r, c)
c.p[i], c.p[i+1] = l, r
}
for i := 0; i < 256; i += 4 {
l ^= expandedSalt[2]
r ^= expandedSalt[3]
l, r = encryptBlock(l, r, c)
c.s0[i], c.s0[i+1] = l, r
l ^= expandedSalt[0]
r ^= expandedSalt[1]
l, r = encryptBlock(l, r, c)
c.s0[i+2], c.s0[i+3] = l, r
}
for i := 0; i < 256; i += 4 {
l ^= expandedSalt[2]
r ^= expandedSalt[3]
l, r = encryptBlock(l, r, c)
c.s1[i], c.s1[i+1] = l, r
l ^= expandedSalt[0]
r ^= expandedSalt[1]
l, r = encryptBlock(l, r, c)
c.s1[i+2], c.s1[i+3] = l, r
}
for i := 0; i < 256; i += 4 {
l ^= expandedSalt[2]
r ^= expandedSalt[3]
l, r = encryptBlock(l, r, c)
c.s2[i], c.s2[i+1] = l, r
l ^= expandedSalt[0]
r ^= expandedSalt[1]
l, r = encryptBlock(l, r, c)
c.s2[i+2], c.s2[i+3] = l, r
}
for i := 0; i < 256; i += 4 {
l ^= expandedSalt[2]
r ^= expandedSalt[3]
l, r = encryptBlock(l, r, c)
c.s3[i], c.s3[i+1] = l, r
l ^= expandedSalt[0]
r ^= expandedSalt[1]
l, r = encryptBlock(l, r, c)
c.s3[i+2], c.s3[i+3] = l, r
}
}
func encryptBlock(l, r uint32, c *Cipher) (uint32, uint32) {
xl, xr := l, r
xl ^= c.p[0]
xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[1]
xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[2]
xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[3]
xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[4]
xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[5]
xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[6]
xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[7]
xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[8]
xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[9]
xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[10]
xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[11]
xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[12]
xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[13]
xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[14]
xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[15]
xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[16]
xr ^= c.p[17]
return xr, xl
}
func decryptBlock(l, r uint32, c *Cipher) (uint32, uint32) {
xl, xr := l, r
xl ^= c.p[17]
xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[16]
xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[15]
xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[14]
xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[13]
xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[12]
xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[11]
xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[10]
xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[9]
xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[8]
xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[7]
xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[6]
xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[5]
xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[4]
xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[3]
xr ^= ((c.s0[byte(xl>>24)] + c.s1[byte(xl>>16)]) ^ c.s2[byte(xl>>8)]) + c.s3[byte(xl)] ^ c.p[2]
xl ^= ((c.s0[byte(xr>>24)] + c.s1[byte(xr>>16)]) ^ c.s2[byte(xr>>8)]) + c.s3[byte(xr)] ^ c.p[1]
xr ^= c.p[0]
return xr, xl
}
func zero(x []uint32) {
for i := range x {
x[i] = 0
}
}

View File

@@ -0,0 +1,210 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package blowfish
import (
"testing"
)
type CryptTest struct {
key []byte
in []byte
out []byte
}
// Test vector values are from http://www.schneier.com/code/vectors.txt.
var encryptTests = []CryptTest{
{
[]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
[]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
[]byte{0x4E, 0xF9, 0x97, 0x45, 0x61, 0x98, 0xDD, 0x78}},
{
[]byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF},
[]byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF},
[]byte{0x51, 0x86, 0x6F, 0xD5, 0xB8, 0x5E, 0xCB, 0x8A}},
{
[]byte{0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
[]byte{0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
[]byte{0x7D, 0x85, 0x6F, 0x9A, 0x61, 0x30, 0x63, 0xF2}},
{
[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11},
[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11},
[]byte{0x24, 0x66, 0xDD, 0x87, 0x8B, 0x96, 0x3C, 0x9D}},
{
[]byte{0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF},
[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11},
[]byte{0x61, 0xF9, 0xC3, 0x80, 0x22, 0x81, 0xB0, 0x96}},
{
[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11},
[]byte{0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF},
[]byte{0x7D, 0x0C, 0xC6, 0x30, 0xAF, 0xDA, 0x1E, 0xC7}},
{
[]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
[]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
[]byte{0x4E, 0xF9, 0x97, 0x45, 0x61, 0x98, 0xDD, 0x78}},
{
[]byte{0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10},
[]byte{0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF},
[]byte{0x0A, 0xCE, 0xAB, 0x0F, 0xC6, 0xA0, 0xA2, 0x8D}},
{
[]byte{0x7C, 0xA1, 0x10, 0x45, 0x4A, 0x1A, 0x6E, 0x57},
[]byte{0x01, 0xA1, 0xD6, 0xD0, 0x39, 0x77, 0x67, 0x42},
[]byte{0x59, 0xC6, 0x82, 0x45, 0xEB, 0x05, 0x28, 0x2B}},
{
[]byte{0x01, 0x31, 0xD9, 0x61, 0x9D, 0xC1, 0x37, 0x6E},
[]byte{0x5C, 0xD5, 0x4C, 0xA8, 0x3D, 0xEF, 0x57, 0xDA},
[]byte{0xB1, 0xB8, 0xCC, 0x0B, 0x25, 0x0F, 0x09, 0xA0}},
{
[]byte{0x07, 0xA1, 0x13, 0x3E, 0x4A, 0x0B, 0x26, 0x86},
[]byte{0x02, 0x48, 0xD4, 0x38, 0x06, 0xF6, 0x71, 0x72},
[]byte{0x17, 0x30, 0xE5, 0x77, 0x8B, 0xEA, 0x1D, 0xA4}},
{
[]byte{0x38, 0x49, 0x67, 0x4C, 0x26, 0x02, 0x31, 0x9E},
[]byte{0x51, 0x45, 0x4B, 0x58, 0x2D, 0xDF, 0x44, 0x0A},
[]byte{0xA2, 0x5E, 0x78, 0x56, 0xCF, 0x26, 0x51, 0xEB}},
{
[]byte{0x04, 0xB9, 0x15, 0xBA, 0x43, 0xFE, 0xB5, 0xB6},
[]byte{0x42, 0xFD, 0x44, 0x30, 0x59, 0x57, 0x7F, 0xA2},
[]byte{0x35, 0x38, 0x82, 0xB1, 0x09, 0xCE, 0x8F, 0x1A}},
{
[]byte{0x01, 0x13, 0xB9, 0x70, 0xFD, 0x34, 0xF2, 0xCE},
[]byte{0x05, 0x9B, 0x5E, 0x08, 0x51, 0xCF, 0x14, 0x3A},
[]byte{0x48, 0xF4, 0xD0, 0x88, 0x4C, 0x37, 0x99, 0x18}},
{
[]byte{0x01, 0x70, 0xF1, 0x75, 0x46, 0x8F, 0xB5, 0xE6},
[]byte{0x07, 0x56, 0xD8, 0xE0, 0x77, 0x47, 0x61, 0xD2},
[]byte{0x43, 0x21, 0x93, 0xB7, 0x89, 0x51, 0xFC, 0x98}},
{
[]byte{0x43, 0x29, 0x7F, 0xAD, 0x38, 0xE3, 0x73, 0xFE},
[]byte{0x76, 0x25, 0x14, 0xB8, 0x29, 0xBF, 0x48, 0x6A},
[]byte{0x13, 0xF0, 0x41, 0x54, 0xD6, 0x9D, 0x1A, 0xE5}},
{
[]byte{0x07, 0xA7, 0x13, 0x70, 0x45, 0xDA, 0x2A, 0x16},
[]byte{0x3B, 0xDD, 0x11, 0x90, 0x49, 0x37, 0x28, 0x02},
[]byte{0x2E, 0xED, 0xDA, 0x93, 0xFF, 0xD3, 0x9C, 0x79}},
{
[]byte{0x04, 0x68, 0x91, 0x04, 0xC2, 0xFD, 0x3B, 0x2F},
[]byte{0x26, 0x95, 0x5F, 0x68, 0x35, 0xAF, 0x60, 0x9A},
[]byte{0xD8, 0x87, 0xE0, 0x39, 0x3C, 0x2D, 0xA6, 0xE3}},
{
[]byte{0x37, 0xD0, 0x6B, 0xB5, 0x16, 0xCB, 0x75, 0x46},
[]byte{0x16, 0x4D, 0x5E, 0x40, 0x4F, 0x27, 0x52, 0x32},
[]byte{0x5F, 0x99, 0xD0, 0x4F, 0x5B, 0x16, 0x39, 0x69}},
{
[]byte{0x1F, 0x08, 0x26, 0x0D, 0x1A, 0xC2, 0x46, 0x5E},
[]byte{0x6B, 0x05, 0x6E, 0x18, 0x75, 0x9F, 0x5C, 0xCA},
[]byte{0x4A, 0x05, 0x7A, 0x3B, 0x24, 0xD3, 0x97, 0x7B}},
{
[]byte{0x58, 0x40, 0x23, 0x64, 0x1A, 0xBA, 0x61, 0x76},
[]byte{0x00, 0x4B, 0xD6, 0xEF, 0x09, 0x17, 0x60, 0x62},
[]byte{0x45, 0x20, 0x31, 0xC1, 0xE4, 0xFA, 0xDA, 0x8E}},
{
[]byte{0x02, 0x58, 0x16, 0x16, 0x46, 0x29, 0xB0, 0x07},
[]byte{0x48, 0x0D, 0x39, 0x00, 0x6E, 0xE7, 0x62, 0xF2},
[]byte{0x75, 0x55, 0xAE, 0x39, 0xF5, 0x9B, 0x87, 0xBD}},
{
[]byte{0x49, 0x79, 0x3E, 0xBC, 0x79, 0xB3, 0x25, 0x8F},
[]byte{0x43, 0x75, 0x40, 0xC8, 0x69, 0x8F, 0x3C, 0xFA},
[]byte{0x53, 0xC5, 0x5F, 0x9C, 0xB4, 0x9F, 0xC0, 0x19}},
{
[]byte{0x4F, 0xB0, 0x5E, 0x15, 0x15, 0xAB, 0x73, 0xA7},
[]byte{0x07, 0x2D, 0x43, 0xA0, 0x77, 0x07, 0x52, 0x92},
[]byte{0x7A, 0x8E, 0x7B, 0xFA, 0x93, 0x7E, 0x89, 0xA3}},
{
[]byte{0x49, 0xE9, 0x5D, 0x6D, 0x4C, 0xA2, 0x29, 0xBF},
[]byte{0x02, 0xFE, 0x55, 0x77, 0x81, 0x17, 0xF1, 0x2A},
[]byte{0xCF, 0x9C, 0x5D, 0x7A, 0x49, 0x86, 0xAD, 0xB5}},
{
[]byte{0x01, 0x83, 0x10, 0xDC, 0x40, 0x9B, 0x26, 0xD6},
[]byte{0x1D, 0x9D, 0x5C, 0x50, 0x18, 0xF7, 0x28, 0xC2},
[]byte{0xD1, 0xAB, 0xB2, 0x90, 0x65, 0x8B, 0xC7, 0x78}},
{
[]byte{0x1C, 0x58, 0x7F, 0x1C, 0x13, 0x92, 0x4F, 0xEF},
[]byte{0x30, 0x55, 0x32, 0x28, 0x6D, 0x6F, 0x29, 0x5A},
[]byte{0x55, 0xCB, 0x37, 0x74, 0xD1, 0x3E, 0xF2, 0x01}},
{
[]byte{0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01},
[]byte{0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF},
[]byte{0xFA, 0x34, 0xEC, 0x48, 0x47, 0xB2, 0x68, 0xB2}},
{
[]byte{0x1F, 0x1F, 0x1F, 0x1F, 0x0E, 0x0E, 0x0E, 0x0E},
[]byte{0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF},
[]byte{0xA7, 0x90, 0x79, 0x51, 0x08, 0xEA, 0x3C, 0xAE}},
{
[]byte{0xE0, 0xFE, 0xE0, 0xFE, 0xF1, 0xFE, 0xF1, 0xFE},
[]byte{0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF},
[]byte{0xC3, 0x9E, 0x07, 0x2D, 0x9F, 0xAC, 0x63, 0x1D}},
{
[]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
[]byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF},
[]byte{0x01, 0x49, 0x33, 0xE0, 0xCD, 0xAF, 0xF6, 0xE4}},
{
[]byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF},
[]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
[]byte{0xF2, 0x1E, 0x9A, 0x77, 0xB7, 0x1C, 0x49, 0xBC}},
{
[]byte{0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF},
[]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
[]byte{0x24, 0x59, 0x46, 0x88, 0x57, 0x54, 0x36, 0x9A}},
{
[]byte{0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10},
[]byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF},
[]byte{0x6B, 0x5C, 0x5A, 0x9C, 0x5D, 0x9E, 0x0A, 0x5A}},
}
func TestCipherEncrypt(t *testing.T) {
for i, tt := range encryptTests {
c, err := NewCipher(tt.key)
if err != nil {
t.Errorf("NewCipher(%d bytes) = %s", len(tt.key), err)
continue
}
ct := make([]byte, len(tt.out))
c.Encrypt(ct, tt.in)
for j, v := range ct {
if v != tt.out[j] {
t.Errorf("Cipher.Encrypt, test vector #%d: cipher-text[%d] = %#x, expected %#x", i, j, v, tt.out[j])
break
}
}
}
}
func TestCipherDecrypt(t *testing.T) {
for i, tt := range encryptTests {
c, err := NewCipher(tt.key)
if err != nil {
t.Errorf("NewCipher(%d bytes) = %s", len(tt.key), err)
continue
}
pt := make([]byte, len(tt.in))
c.Decrypt(pt, tt.out)
for j, v := range pt {
if v != tt.in[j] {
t.Errorf("Cipher.Decrypt, test vector #%d: plain-text[%d] = %#x, expected %#x", i, j, v, tt.in[j])
break
}
}
}
}
func TestSaltedCipherKeyLength(t *testing.T) {
var key []byte
for i := 0; i < 4; i++ {
_, err := NewSaltedCipher(key, []byte{'a'})
if err != KeySizeError(i) {
t.Errorf("NewSaltedCipher with short key, gave error %#v, expected %#v", err, KeySizeError(i))
}
key = append(key, 'a')
}
// A 57-byte key. One over the typical blowfish restriction.
key = []byte("012345678901234567890123456789012345678901234567890123456")
_, err := NewSaltedCipher(key, []byte{'a'})
if err != nil {
t.Errorf("NewSaltedCipher with long key, gave error %#v", err)
}
}

View File

@@ -0,0 +1,90 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package blowfish implements Bruce Schneier's Blowfish encryption algorithm.
package blowfish
// The code is a port of Bruce Schneier's C implementation.
// See http://www.schneier.com/blowfish.html.
import "strconv"
// The Blowfish block size in bytes.
const BlockSize = 8
// A Cipher is an instance of Blowfish encryption using a particular key.
type Cipher struct {
p [18]uint32
s0, s1, s2, s3 [256]uint32
}
type KeySizeError int
func (k KeySizeError) Error() string {
return "crypto/blowfish: invalid key size " + strconv.Itoa(int(k))
}
// NewCipher creates and returns a Cipher.
// The key argument should be the Blowfish key, 4 to 56 bytes.
func NewCipher(key []byte) (*Cipher, error) {
var result Cipher
k := len(key)
if k < 4 || k > 56 {
return nil, KeySizeError(k)
}
initCipher(key, &result)
ExpandKey(key, &result)
return &result, nil
}
// NewSaltedCipher creates a returns a Cipher that folds a salt into its key
// schedule. For most purposes, NewCipher, instead of NewSaltedCipher, is
// sufficient and desirable. For bcrypt compatiblity, the key can be over 56
// bytes. Only the first 16 bytes of salt are used.
func NewSaltedCipher(key, salt []byte) (*Cipher, error) {
var result Cipher
k := len(key)
if k < 4 {
return nil, KeySizeError(k)
}
initCipher(key, &result)
expandKeyWithSalt(key, salt, &result)
return &result, nil
}
// BlockSize returns the Blowfish block size, 8 bytes.
// It is necessary to satisfy the Block interface in the
// package "crypto/cipher".
func (c *Cipher) BlockSize() int { return BlockSize }
// Encrypt encrypts the 8-byte buffer src using the key k
// and stores the result in dst.
// Note that for amounts of data larger than a block,
// it is not safe to just call Encrypt on successive blocks;
// instead, use an encryption mode like CBC (see crypto/cipher/cbc.go).
func (c *Cipher) Encrypt(dst, src []byte) {
l := uint32(src[0])<<24 | uint32(src[1])<<16 | uint32(src[2])<<8 | uint32(src[3])
r := uint32(src[4])<<24 | uint32(src[5])<<16 | uint32(src[6])<<8 | uint32(src[7])
l, r = encryptBlock(l, r, c)
dst[0], dst[1], dst[2], dst[3] = byte(l>>24), byte(l>>16), byte(l>>8), byte(l)
dst[4], dst[5], dst[6], dst[7] = byte(r>>24), byte(r>>16), byte(r>>8), byte(r)
}
// Decrypt decrypts the 8-byte buffer src using the key k
// and stores the result in dst.
func (c *Cipher) Decrypt(dst, src []byte) {
l := uint32(src[0])<<24 | uint32(src[1])<<16 | uint32(src[2])<<8 | uint32(src[3])
r := uint32(src[4])<<24 | uint32(src[5])<<16 | uint32(src[6])<<8 | uint32(src[7])
l, r = decryptBlock(l, r, c)
dst[0], dst[1], dst[2], dst[3] = byte(l>>24), byte(l>>16), byte(l>>8), byte(l)
dst[4], dst[5], dst[6], dst[7] = byte(r>>24), byte(r>>16), byte(r>>8), byte(r)
}
func initCipher(key []byte, c *Cipher) {
copy(c.p[0:], p[0:])
copy(c.s0[0:], s0[0:])
copy(c.s1[0:], s1[0:])
copy(c.s2[0:], s2[0:])
copy(c.s3[0:], s3[0:])
}

View File

@@ -0,0 +1,199 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// The startup permutation array and substitution boxes.
// They are the hexadecimal digits of PI; see:
// http://www.schneier.com/code/constants.txt.
package blowfish
var s0 = [256]uint32{
0xd1310ba6, 0x98dfb5ac, 0x2ffd72db, 0xd01adfb7, 0xb8e1afed, 0x6a267e96,
0xba7c9045, 0xf12c7f99, 0x24a19947, 0xb3916cf7, 0x0801f2e2, 0x858efc16,
0x636920d8, 0x71574e69, 0xa458fea3, 0xf4933d7e, 0x0d95748f, 0x728eb658,
0x718bcd58, 0x82154aee, 0x7b54a41d, 0xc25a59b5, 0x9c30d539, 0x2af26013,
0xc5d1b023, 0x286085f0, 0xca417918, 0xb8db38ef, 0x8e79dcb0, 0x603a180e,
0x6c9e0e8b, 0xb01e8a3e, 0xd71577c1, 0xbd314b27, 0x78af2fda, 0x55605c60,
0xe65525f3, 0xaa55ab94, 0x57489862, 0x63e81440, 0x55ca396a, 0x2aab10b6,
0xb4cc5c34, 0x1141e8ce, 0xa15486af, 0x7c72e993, 0xb3ee1411, 0x636fbc2a,
0x2ba9c55d, 0x741831f6, 0xce5c3e16, 0x9b87931e, 0xafd6ba33, 0x6c24cf5c,
0x7a325381, 0x28958677, 0x3b8f4898, 0x6b4bb9af, 0xc4bfe81b, 0x66282193,
0x61d809cc, 0xfb21a991, 0x487cac60, 0x5dec8032, 0xef845d5d, 0xe98575b1,
0xdc262302, 0xeb651b88, 0x23893e81, 0xd396acc5, 0x0f6d6ff3, 0x83f44239,
0x2e0b4482, 0xa4842004, 0x69c8f04a, 0x9e1f9b5e, 0x21c66842, 0xf6e96c9a,
0x670c9c61, 0xabd388f0, 0x6a51a0d2, 0xd8542f68, 0x960fa728, 0xab5133a3,
0x6eef0b6c, 0x137a3be4, 0xba3bf050, 0x7efb2a98, 0xa1f1651d, 0x39af0176,
0x66ca593e, 0x82430e88, 0x8cee8619, 0x456f9fb4, 0x7d84a5c3, 0x3b8b5ebe,
0xe06f75d8, 0x85c12073, 0x401a449f, 0x56c16aa6, 0x4ed3aa62, 0x363f7706,
0x1bfedf72, 0x429b023d, 0x37d0d724, 0xd00a1248, 0xdb0fead3, 0x49f1c09b,
0x075372c9, 0x80991b7b, 0x25d479d8, 0xf6e8def7, 0xe3fe501a, 0xb6794c3b,
0x976ce0bd, 0x04c006ba, 0xc1a94fb6, 0x409f60c4, 0x5e5c9ec2, 0x196a2463,
0x68fb6faf, 0x3e6c53b5, 0x1339b2eb, 0x3b52ec6f, 0x6dfc511f, 0x9b30952c,
0xcc814544, 0xaf5ebd09, 0xbee3d004, 0xde334afd, 0x660f2807, 0x192e4bb3,
0xc0cba857, 0x45c8740f, 0xd20b5f39, 0xb9d3fbdb, 0x5579c0bd, 0x1a60320a,
0xd6a100c6, 0x402c7279, 0x679f25fe, 0xfb1fa3cc, 0x8ea5e9f8, 0xdb3222f8,
0x3c7516df, 0xfd616b15, 0x2f501ec8, 0xad0552ab, 0x323db5fa, 0xfd238760,
0x53317b48, 0x3e00df82, 0x9e5c57bb, 0xca6f8ca0, 0x1a87562e, 0xdf1769db,
0xd542a8f6, 0x287effc3, 0xac6732c6, 0x8c4f5573, 0x695b27b0, 0xbbca58c8,
0xe1ffa35d, 0xb8f011a0, 0x10fa3d98, 0xfd2183b8, 0x4afcb56c, 0x2dd1d35b,
0x9a53e479, 0xb6f84565, 0xd28e49bc, 0x4bfb9790, 0xe1ddf2da, 0xa4cb7e33,
0x62fb1341, 0xcee4c6e8, 0xef20cada, 0x36774c01, 0xd07e9efe, 0x2bf11fb4,
0x95dbda4d, 0xae909198, 0xeaad8e71, 0x6b93d5a0, 0xd08ed1d0, 0xafc725e0,
0x8e3c5b2f, 0x8e7594b7, 0x8ff6e2fb, 0xf2122b64, 0x8888b812, 0x900df01c,
0x4fad5ea0, 0x688fc31c, 0xd1cff191, 0xb3a8c1ad, 0x2f2f2218, 0xbe0e1777,
0xea752dfe, 0x8b021fa1, 0xe5a0cc0f, 0xb56f74e8, 0x18acf3d6, 0xce89e299,
0xb4a84fe0, 0xfd13e0b7, 0x7cc43b81, 0xd2ada8d9, 0x165fa266, 0x80957705,
0x93cc7314, 0x211a1477, 0xe6ad2065, 0x77b5fa86, 0xc75442f5, 0xfb9d35cf,
0xebcdaf0c, 0x7b3e89a0, 0xd6411bd3, 0xae1e7e49, 0x00250e2d, 0x2071b35e,
0x226800bb, 0x57b8e0af, 0x2464369b, 0xf009b91e, 0x5563911d, 0x59dfa6aa,
0x78c14389, 0xd95a537f, 0x207d5ba2, 0x02e5b9c5, 0x83260376, 0x6295cfa9,
0x11c81968, 0x4e734a41, 0xb3472dca, 0x7b14a94a, 0x1b510052, 0x9a532915,
0xd60f573f, 0xbc9bc6e4, 0x2b60a476, 0x81e67400, 0x08ba6fb5, 0x571be91f,
0xf296ec6b, 0x2a0dd915, 0xb6636521, 0xe7b9f9b6, 0xff34052e, 0xc5855664,
0x53b02d5d, 0xa99f8fa1, 0x08ba4799, 0x6e85076a,
}
var s1 = [256]uint32{
0x4b7a70e9, 0xb5b32944, 0xdb75092e, 0xc4192623, 0xad6ea6b0, 0x49a7df7d,
0x9cee60b8, 0x8fedb266, 0xecaa8c71, 0x699a17ff, 0x5664526c, 0xc2b19ee1,
0x193602a5, 0x75094c29, 0xa0591340, 0xe4183a3e, 0x3f54989a, 0x5b429d65,
0x6b8fe4d6, 0x99f73fd6, 0xa1d29c07, 0xefe830f5, 0x4d2d38e6, 0xf0255dc1,
0x4cdd2086, 0x8470eb26, 0x6382e9c6, 0x021ecc5e, 0x09686b3f, 0x3ebaefc9,
0x3c971814, 0x6b6a70a1, 0x687f3584, 0x52a0e286, 0xb79c5305, 0xaa500737,
0x3e07841c, 0x7fdeae5c, 0x8e7d44ec, 0x5716f2b8, 0xb03ada37, 0xf0500c0d,
0xf01c1f04, 0x0200b3ff, 0xae0cf51a, 0x3cb574b2, 0x25837a58, 0xdc0921bd,
0xd19113f9, 0x7ca92ff6, 0x94324773, 0x22f54701, 0x3ae5e581, 0x37c2dadc,
0xc8b57634, 0x9af3dda7, 0xa9446146, 0x0fd0030e, 0xecc8c73e, 0xa4751e41,
0xe238cd99, 0x3bea0e2f, 0x3280bba1, 0x183eb331, 0x4e548b38, 0x4f6db908,
0x6f420d03, 0xf60a04bf, 0x2cb81290, 0x24977c79, 0x5679b072, 0xbcaf89af,
0xde9a771f, 0xd9930810, 0xb38bae12, 0xdccf3f2e, 0x5512721f, 0x2e6b7124,
0x501adde6, 0x9f84cd87, 0x7a584718, 0x7408da17, 0xbc9f9abc, 0xe94b7d8c,
0xec7aec3a, 0xdb851dfa, 0x63094366, 0xc464c3d2, 0xef1c1847, 0x3215d908,
0xdd433b37, 0x24c2ba16, 0x12a14d43, 0x2a65c451, 0x50940002, 0x133ae4dd,
0x71dff89e, 0x10314e55, 0x81ac77d6, 0x5f11199b, 0x043556f1, 0xd7a3c76b,
0x3c11183b, 0x5924a509, 0xf28fe6ed, 0x97f1fbfa, 0x9ebabf2c, 0x1e153c6e,
0x86e34570, 0xeae96fb1, 0x860e5e0a, 0x5a3e2ab3, 0x771fe71c, 0x4e3d06fa,
0x2965dcb9, 0x99e71d0f, 0x803e89d6, 0x5266c825, 0x2e4cc978, 0x9c10b36a,
0xc6150eba, 0x94e2ea78, 0xa5fc3c53, 0x1e0a2df4, 0xf2f74ea7, 0x361d2b3d,
0x1939260f, 0x19c27960, 0x5223a708, 0xf71312b6, 0xebadfe6e, 0xeac31f66,
0xe3bc4595, 0xa67bc883, 0xb17f37d1, 0x018cff28, 0xc332ddef, 0xbe6c5aa5,
0x65582185, 0x68ab9802, 0xeecea50f, 0xdb2f953b, 0x2aef7dad, 0x5b6e2f84,
0x1521b628, 0x29076170, 0xecdd4775, 0x619f1510, 0x13cca830, 0xeb61bd96,
0x0334fe1e, 0xaa0363cf, 0xb5735c90, 0x4c70a239, 0xd59e9e0b, 0xcbaade14,
0xeecc86bc, 0x60622ca7, 0x9cab5cab, 0xb2f3846e, 0x648b1eaf, 0x19bdf0ca,
0xa02369b9, 0x655abb50, 0x40685a32, 0x3c2ab4b3, 0x319ee9d5, 0xc021b8f7,
0x9b540b19, 0x875fa099, 0x95f7997e, 0x623d7da8, 0xf837889a, 0x97e32d77,
0x11ed935f, 0x16681281, 0x0e358829, 0xc7e61fd6, 0x96dedfa1, 0x7858ba99,
0x57f584a5, 0x1b227263, 0x9b83c3ff, 0x1ac24696, 0xcdb30aeb, 0x532e3054,
0x8fd948e4, 0x6dbc3128, 0x58ebf2ef, 0x34c6ffea, 0xfe28ed61, 0xee7c3c73,
0x5d4a14d9, 0xe864b7e3, 0x42105d14, 0x203e13e0, 0x45eee2b6, 0xa3aaabea,
0xdb6c4f15, 0xfacb4fd0, 0xc742f442, 0xef6abbb5, 0x654f3b1d, 0x41cd2105,
0xd81e799e, 0x86854dc7, 0xe44b476a, 0x3d816250, 0xcf62a1f2, 0x5b8d2646,
0xfc8883a0, 0xc1c7b6a3, 0x7f1524c3, 0x69cb7492, 0x47848a0b, 0x5692b285,
0x095bbf00, 0xad19489d, 0x1462b174, 0x23820e00, 0x58428d2a, 0x0c55f5ea,
0x1dadf43e, 0x233f7061, 0x3372f092, 0x8d937e41, 0xd65fecf1, 0x6c223bdb,
0x7cde3759, 0xcbee7460, 0x4085f2a7, 0xce77326e, 0xa6078084, 0x19f8509e,
0xe8efd855, 0x61d99735, 0xa969a7aa, 0xc50c06c2, 0x5a04abfc, 0x800bcadc,
0x9e447a2e, 0xc3453484, 0xfdd56705, 0x0e1e9ec9, 0xdb73dbd3, 0x105588cd,
0x675fda79, 0xe3674340, 0xc5c43465, 0x713e38d8, 0x3d28f89e, 0xf16dff20,
0x153e21e7, 0x8fb03d4a, 0xe6e39f2b, 0xdb83adf7,
}
var s2 = [256]uint32{
0xe93d5a68, 0x948140f7, 0xf64c261c, 0x94692934, 0x411520f7, 0x7602d4f7,
0xbcf46b2e, 0xd4a20068, 0xd4082471, 0x3320f46a, 0x43b7d4b7, 0x500061af,
0x1e39f62e, 0x97244546, 0x14214f74, 0xbf8b8840, 0x4d95fc1d, 0x96b591af,
0x70f4ddd3, 0x66a02f45, 0xbfbc09ec, 0x03bd9785, 0x7fac6dd0, 0x31cb8504,
0x96eb27b3, 0x55fd3941, 0xda2547e6, 0xabca0a9a, 0x28507825, 0x530429f4,
0x0a2c86da, 0xe9b66dfb, 0x68dc1462, 0xd7486900, 0x680ec0a4, 0x27a18dee,
0x4f3ffea2, 0xe887ad8c, 0xb58ce006, 0x7af4d6b6, 0xaace1e7c, 0xd3375fec,
0xce78a399, 0x406b2a42, 0x20fe9e35, 0xd9f385b9, 0xee39d7ab, 0x3b124e8b,
0x1dc9faf7, 0x4b6d1856, 0x26a36631, 0xeae397b2, 0x3a6efa74, 0xdd5b4332,
0x6841e7f7, 0xca7820fb, 0xfb0af54e, 0xd8feb397, 0x454056ac, 0xba489527,
0x55533a3a, 0x20838d87, 0xfe6ba9b7, 0xd096954b, 0x55a867bc, 0xa1159a58,
0xcca92963, 0x99e1db33, 0xa62a4a56, 0x3f3125f9, 0x5ef47e1c, 0x9029317c,
0xfdf8e802, 0x04272f70, 0x80bb155c, 0x05282ce3, 0x95c11548, 0xe4c66d22,
0x48c1133f, 0xc70f86dc, 0x07f9c9ee, 0x41041f0f, 0x404779a4, 0x5d886e17,
0x325f51eb, 0xd59bc0d1, 0xf2bcc18f, 0x41113564, 0x257b7834, 0x602a9c60,
0xdff8e8a3, 0x1f636c1b, 0x0e12b4c2, 0x02e1329e, 0xaf664fd1, 0xcad18115,
0x6b2395e0, 0x333e92e1, 0x3b240b62, 0xeebeb922, 0x85b2a20e, 0xe6ba0d99,
0xde720c8c, 0x2da2f728, 0xd0127845, 0x95b794fd, 0x647d0862, 0xe7ccf5f0,
0x5449a36f, 0x877d48fa, 0xc39dfd27, 0xf33e8d1e, 0x0a476341, 0x992eff74,
0x3a6f6eab, 0xf4f8fd37, 0xa812dc60, 0xa1ebddf8, 0x991be14c, 0xdb6e6b0d,
0xc67b5510, 0x6d672c37, 0x2765d43b, 0xdcd0e804, 0xf1290dc7, 0xcc00ffa3,
0xb5390f92, 0x690fed0b, 0x667b9ffb, 0xcedb7d9c, 0xa091cf0b, 0xd9155ea3,
0xbb132f88, 0x515bad24, 0x7b9479bf, 0x763bd6eb, 0x37392eb3, 0xcc115979,
0x8026e297, 0xf42e312d, 0x6842ada7, 0xc66a2b3b, 0x12754ccc, 0x782ef11c,
0x6a124237, 0xb79251e7, 0x06a1bbe6, 0x4bfb6350, 0x1a6b1018, 0x11caedfa,
0x3d25bdd8, 0xe2e1c3c9, 0x44421659, 0x0a121386, 0xd90cec6e, 0xd5abea2a,
0x64af674e, 0xda86a85f, 0xbebfe988, 0x64e4c3fe, 0x9dbc8057, 0xf0f7c086,
0x60787bf8, 0x6003604d, 0xd1fd8346, 0xf6381fb0, 0x7745ae04, 0xd736fccc,
0x83426b33, 0xf01eab71, 0xb0804187, 0x3c005e5f, 0x77a057be, 0xbde8ae24,
0x55464299, 0xbf582e61, 0x4e58f48f, 0xf2ddfda2, 0xf474ef38, 0x8789bdc2,
0x5366f9c3, 0xc8b38e74, 0xb475f255, 0x46fcd9b9, 0x7aeb2661, 0x8b1ddf84,
0x846a0e79, 0x915f95e2, 0x466e598e, 0x20b45770, 0x8cd55591, 0xc902de4c,
0xb90bace1, 0xbb8205d0, 0x11a86248, 0x7574a99e, 0xb77f19b6, 0xe0a9dc09,
0x662d09a1, 0xc4324633, 0xe85a1f02, 0x09f0be8c, 0x4a99a025, 0x1d6efe10,
0x1ab93d1d, 0x0ba5a4df, 0xa186f20f, 0x2868f169, 0xdcb7da83, 0x573906fe,
0xa1e2ce9b, 0x4fcd7f52, 0x50115e01, 0xa70683fa, 0xa002b5c4, 0x0de6d027,
0x9af88c27, 0x773f8641, 0xc3604c06, 0x61a806b5, 0xf0177a28, 0xc0f586e0,
0x006058aa, 0x30dc7d62, 0x11e69ed7, 0x2338ea63, 0x53c2dd94, 0xc2c21634,
0xbbcbee56, 0x90bcb6de, 0xebfc7da1, 0xce591d76, 0x6f05e409, 0x4b7c0188,
0x39720a3d, 0x7c927c24, 0x86e3725f, 0x724d9db9, 0x1ac15bb4, 0xd39eb8fc,
0xed545578, 0x08fca5b5, 0xd83d7cd3, 0x4dad0fc4, 0x1e50ef5e, 0xb161e6f8,
0xa28514d9, 0x6c51133c, 0x6fd5c7e7, 0x56e14ec4, 0x362abfce, 0xddc6c837,
0xd79a3234, 0x92638212, 0x670efa8e, 0x406000e0,
}
var s3 = [256]uint32{
0x3a39ce37, 0xd3faf5cf, 0xabc27737, 0x5ac52d1b, 0x5cb0679e, 0x4fa33742,
0xd3822740, 0x99bc9bbe, 0xd5118e9d, 0xbf0f7315, 0xd62d1c7e, 0xc700c47b,
0xb78c1b6b, 0x21a19045, 0xb26eb1be, 0x6a366eb4, 0x5748ab2f, 0xbc946e79,
0xc6a376d2, 0x6549c2c8, 0x530ff8ee, 0x468dde7d, 0xd5730a1d, 0x4cd04dc6,
0x2939bbdb, 0xa9ba4650, 0xac9526e8, 0xbe5ee304, 0xa1fad5f0, 0x6a2d519a,
0x63ef8ce2, 0x9a86ee22, 0xc089c2b8, 0x43242ef6, 0xa51e03aa, 0x9cf2d0a4,
0x83c061ba, 0x9be96a4d, 0x8fe51550, 0xba645bd6, 0x2826a2f9, 0xa73a3ae1,
0x4ba99586, 0xef5562e9, 0xc72fefd3, 0xf752f7da, 0x3f046f69, 0x77fa0a59,
0x80e4a915, 0x87b08601, 0x9b09e6ad, 0x3b3ee593, 0xe990fd5a, 0x9e34d797,
0x2cf0b7d9, 0x022b8b51, 0x96d5ac3a, 0x017da67d, 0xd1cf3ed6, 0x7c7d2d28,
0x1f9f25cf, 0xadf2b89b, 0x5ad6b472, 0x5a88f54c, 0xe029ac71, 0xe019a5e6,
0x47b0acfd, 0xed93fa9b, 0xe8d3c48d, 0x283b57cc, 0xf8d56629, 0x79132e28,
0x785f0191, 0xed756055, 0xf7960e44, 0xe3d35e8c, 0x15056dd4, 0x88f46dba,
0x03a16125, 0x0564f0bd, 0xc3eb9e15, 0x3c9057a2, 0x97271aec, 0xa93a072a,
0x1b3f6d9b, 0x1e6321f5, 0xf59c66fb, 0x26dcf319, 0x7533d928, 0xb155fdf5,
0x03563482, 0x8aba3cbb, 0x28517711, 0xc20ad9f8, 0xabcc5167, 0xccad925f,
0x4de81751, 0x3830dc8e, 0x379d5862, 0x9320f991, 0xea7a90c2, 0xfb3e7bce,
0x5121ce64, 0x774fbe32, 0xa8b6e37e, 0xc3293d46, 0x48de5369, 0x6413e680,
0xa2ae0810, 0xdd6db224, 0x69852dfd, 0x09072166, 0xb39a460a, 0x6445c0dd,
0x586cdecf, 0x1c20c8ae, 0x5bbef7dd, 0x1b588d40, 0xccd2017f, 0x6bb4e3bb,
0xdda26a7e, 0x3a59ff45, 0x3e350a44, 0xbcb4cdd5, 0x72eacea8, 0xfa6484bb,
0x8d6612ae, 0xbf3c6f47, 0xd29be463, 0x542f5d9e, 0xaec2771b, 0xf64e6370,
0x740e0d8d, 0xe75b1357, 0xf8721671, 0xaf537d5d, 0x4040cb08, 0x4eb4e2cc,
0x34d2466a, 0x0115af84, 0xe1b00428, 0x95983a1d, 0x06b89fb4, 0xce6ea048,
0x6f3f3b82, 0x3520ab82, 0x011a1d4b, 0x277227f8, 0x611560b1, 0xe7933fdc,
0xbb3a792b, 0x344525bd, 0xa08839e1, 0x51ce794b, 0x2f32c9b7, 0xa01fbac9,
0xe01cc87e, 0xbcc7d1f6, 0xcf0111c3, 0xa1e8aac7, 0x1a908749, 0xd44fbd9a,
0xd0dadecb, 0xd50ada38, 0x0339c32a, 0xc6913667, 0x8df9317c, 0xe0b12b4f,
0xf79e59b7, 0x43f5bb3a, 0xf2d519ff, 0x27d9459c, 0xbf97222c, 0x15e6fc2a,
0x0f91fc71, 0x9b941525, 0xfae59361, 0xceb69ceb, 0xc2a86459, 0x12baa8d1,
0xb6c1075e, 0xe3056a0c, 0x10d25065, 0xcb03a442, 0xe0ec6e0e, 0x1698db3b,
0x4c98a0be, 0x3278e964, 0x9f1f9532, 0xe0d392df, 0xd3a0342b, 0x8971f21e,
0x1b0a7441, 0x4ba3348c, 0xc5be7120, 0xc37632d8, 0xdf359f8d, 0x9b992f2e,
0xe60b6f47, 0x0fe3f11d, 0xe54cda54, 0x1edad891, 0xce6279cf, 0xcd3e7e6f,
0x1618b166, 0xfd2c1d05, 0x848fd2c5, 0xf6fb2299, 0xf523f357, 0xa6327623,
0x93a83531, 0x56cccd02, 0xacf08162, 0x5a75ebb5, 0x6e163697, 0x88d273cc,
0xde966292, 0x81b949d0, 0x4c50901b, 0x71c65614, 0xe6c6c7bd, 0x327a140a,
0x45e1d006, 0xc3f27b9a, 0xc9aa53fd, 0x62a80f00, 0xbb25bfe2, 0x35bdd2f6,
0x71126905, 0xb2040222, 0xb6cbcf7c, 0xcd769c2b, 0x53113ec0, 0x1640e3d3,
0x38abbd60, 0x2547adf0, 0xba38209c, 0xf746ce76, 0x77afa1c5, 0x20756060,
0x85cbfe4e, 0x8ae88dd8, 0x7aaaf9b0, 0x4cf9aa7e, 0x1948c25c, 0x02fb8a8c,
0x01c36ae4, 0xd6ebe1f9, 0x90d4f869, 0xa65cdea0, 0x3f09252d, 0xc208e69f,
0xb74e6132, 0xce77e25b, 0x578fdfe3, 0x3ac372e6,
}
var p = [18]uint32{
0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344, 0xa4093822, 0x299f31d0,
0x082efa98, 0xec4e6c89, 0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c,
0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917, 0x9216d5d9, 0x8979fb1b,
}

View File

@@ -0,0 +1,37 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package transform_test
import (
"fmt"
"unicode"
"code.google.com/p/go.text/transform"
"code.google.com/p/go.text/unicode/norm"
)
func ExampleRemoveFunc() {
input := []byte(`tschüß; до свидания`)
b := make([]byte, len(input))
t := transform.RemoveFunc(unicode.IsSpace)
n, _, _ := t.Transform(b, input, true)
fmt.Println(string(b[:n]))
t = transform.RemoveFunc(func(r rune) bool {
return !unicode.Is(unicode.Latin, r)
})
n, _, _ = t.Transform(b, input, true)
fmt.Println(string(b[:n]))
n, _, _ = t.Transform(b, norm.NFD.Bytes(input), true)
fmt.Println(string(b[:n]))
// Output:
// tschüß;досвидания
// tschüß
// tschuß
}

View File

@@ -0,0 +1,496 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package transform provides reader and writer wrappers that transform the
// bytes passing through as well as various transformations. Example
// transformations provided by other packages include normalization and
// conversion between character sets.
package transform
import (
"errors"
"io"
"unicode/utf8"
)
var (
// ErrShortDst means that the destination buffer was too short to
// receive all of the transformed bytes.
ErrShortDst = errors.New("transform: short destination buffer")
// ErrShortSrc means that the source buffer has insufficient data to
// complete the transformation.
ErrShortSrc = errors.New("transform: short source buffer")
// errInconsistentByteCount means that Transform returned success (nil
// error) but also returned nSrc inconsistent with the src argument.
errInconsistentByteCount = errors.New("transform: inconsistent byte count returned")
// errShortInternal means that an internal buffer is not large enough
// to make progress and the Transform operation must be aborted.
errShortInternal = errors.New("transform: short internal buffer")
)
// Transformer transforms bytes.
type Transformer interface {
// Transform writes to dst the transformed bytes read from src, and
// returns the number of dst bytes written and src bytes read. The
// atEOF argument tells whether src represents the last bytes of the
// input.
//
// Callers should always process the nDst bytes produced and account
// for the nSrc bytes consumed before considering the error err.
//
// A nil error means that all of the transformed bytes (whether freshly
// transformed from src or left over from previous Transform calls)
// were written to dst. A nil error can be returned regardless of
// whether atEOF is true. If err is nil then nSrc must equal len(src);
// the converse is not necessarily true.
//
// ErrShortDst means that dst was too short to receive all of the
// transformed bytes. ErrShortSrc means that src had insufficient data
// to complete the transformation. If both conditions apply, then
// either error may be returned. Other than the error conditions listed
// here, implementations are free to report other errors that arise.
Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
}
// TODO: Do we require that a Transformer be reusable if it returns a nil error
// or do we always require a reset after use? Is Reset mandatory or optional?
// Reader wraps another io.Reader by transforming the bytes read.
type Reader struct {
r io.Reader
t Transformer
err error
// dst[dst0:dst1] contains bytes that have been transformed by t but
// not yet copied out via Read.
dst []byte
dst0, dst1 int
// src[src0:src1] contains bytes that have been read from r but not
// yet transformed through t.
src []byte
src0, src1 int
// transformComplete is whether the transformation is complete,
// regardless of whether or not it was successful.
transformComplete bool
}
const defaultBufSize = 4096
// NewReader returns a new Reader that wraps r by transforming the bytes read
// via t.
func NewReader(r io.Reader, t Transformer) *Reader {
return &Reader{
r: r,
t: t,
dst: make([]byte, defaultBufSize),
src: make([]byte, defaultBufSize),
}
}
// Read implements the io.Reader interface.
func (r *Reader) Read(p []byte) (int, error) {
n, err := 0, error(nil)
for {
// Copy out any transformed bytes and return the final error if we are done.
if r.dst0 != r.dst1 {
n = copy(p, r.dst[r.dst0:r.dst1])
r.dst0 += n
if r.dst0 == r.dst1 && r.transformComplete {
return n, r.err
}
return n, nil
} else if r.transformComplete {
return 0, r.err
}
// Try to transform some source bytes, or to flush the transformer if we
// are out of source bytes. We do this even if r.r.Read returned an error.
// As the io.Reader documentation says, "process the n > 0 bytes returned
// before considering the error".
if r.src0 != r.src1 || r.err != nil {
r.dst0 = 0
r.dst1, n, err = r.t.Transform(r.dst, r.src[r.src0:r.src1], r.err == io.EOF)
r.src0 += n
switch {
case err == nil:
if r.src0 != r.src1 {
r.err = errInconsistentByteCount
}
// The Transform call was successful; we are complete if we
// cannot read more bytes into src.
r.transformComplete = r.err != nil
continue
case err == ErrShortDst && r.dst1 != 0:
// Make room in dst by copying out, and try again.
continue
case err == ErrShortSrc && r.src1-r.src0 != len(r.src) && r.err == nil:
// Read more bytes into src via the code below, and try again.
default:
r.transformComplete = true
// The reader error (r.err) takes precedence over the
// transformer error (err) unless r.err is nil or io.EOF.
if r.err == nil || r.err == io.EOF {
r.err = err
}
continue
}
}
// Move any untransformed source bytes to the start of the buffer
// and read more bytes.
if r.src0 != 0 {
r.src0, r.src1 = 0, copy(r.src, r.src[r.src0:r.src1])
}
n, r.err = r.r.Read(r.src[r.src1:])
r.src1 += n
}
}
// TODO: implement ReadByte (and ReadRune??).
// Writer wraps another io.Writer by transforming the bytes read.
// The user needs to call Close to flush unwritten bytes that may
// be buffered.
type Writer struct {
w io.Writer
t Transformer
dst []byte
// src[:n] contains bytes that have not yet passed through t.
src []byte
n int
}
// NewWriter returns a new Writer that wraps w by transforming the bytes written
// via t.
func NewWriter(w io.Writer, t Transformer) *Writer {
return &Writer{
w: w,
t: t,
dst: make([]byte, defaultBufSize),
src: make([]byte, defaultBufSize),
}
}
// Write implements the io.Writer interface. If there are not enough
// bytes available to complete a Transform, the bytes will be buffered
// for the next write. Call Close to convert the remaining bytes.
func (w *Writer) Write(data []byte) (n int, err error) {
src := data
if w.n > 0 {
// Append bytes from data to the last remainder.
// TODO: limit the amount copied on first try.
n = copy(w.src[w.n:], data)
w.n += n
src = w.src[:w.n]
}
for {
nDst, nSrc, err := w.t.Transform(w.dst, src, false)
if _, werr := w.w.Write(w.dst[:nDst]); werr != nil {
return n, werr
}
src = src[nSrc:]
if w.n > 0 && len(src) <= n {
// Enough bytes from w.src have been consumed. We make src point
// to data instead to reduce the copying.
w.n = 0
n -= len(src)
src = data[n:]
if n < len(data) && (err == nil || err == ErrShortSrc) {
continue
}
} else {
n += nSrc
}
switch {
case err == ErrShortDst && nDst > 0:
case err == ErrShortSrc && len(src) < len(w.src):
m := copy(w.src, src)
// If w.n > 0, bytes from data were already copied to w.src and n
// was already set to the number of bytes consumed.
if w.n == 0 {
n += m
}
w.n = m
return n, nil
case err == nil && w.n > 0:
return n, errInconsistentByteCount
default:
return n, err
}
}
}
// Close implements the io.Closer interface.
func (w *Writer) Close() error {
for src := w.src[:w.n]; len(src) > 0; {
nDst, nSrc, err := w.t.Transform(w.dst, src, true)
if nDst == 0 {
return err
}
if _, werr := w.w.Write(w.dst[:nDst]); werr != nil {
return werr
}
if err != ErrShortDst {
return err
}
src = src[nSrc:]
}
return nil
}
type nop struct{}
func (nop) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
n := copy(dst, src)
if n < len(src) {
err = ErrShortDst
}
return n, n, err
}
type discard struct{}
func (discard) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
return 0, len(src), nil
}
var (
// Discard is a Transformer for which all Transform calls succeed
// by consuming all bytes and writing nothing.
Discard Transformer = discard{}
// Nop is a Transformer that copies src to dst.
Nop Transformer = nop{}
)
// chain is a sequence of links. A chain with N Transformers has N+1 links and
// N+1 buffers. Of those N+1 buffers, the first and last are the src and dst
// buffers given to chain.Transform and the middle N-1 buffers are intermediate
// buffers owned by the chain. The i'th link transforms bytes from the i'th
// buffer chain.link[i].b at read offset chain.link[i].p to the i+1'th buffer
// chain.link[i+1].b at write offset chain.link[i+1].n, for i in [0, N).
type chain struct {
link []link
err error
// errStart is the index at which the error occurred plus 1. Processing
// errStart at this level at the next call to Transform. As long as
// errStart > 0, chain will not consume any more source bytes.
errStart int
}
func (c *chain) fatalError(errIndex int, err error) {
if i := errIndex + 1; i > c.errStart {
c.errStart = i
c.err = err
}
}
type link struct {
t Transformer
// b[p:n] holds the bytes to be transformed by t.
b []byte
p int
n int
}
func (l *link) src() []byte {
return l.b[l.p:l.n]
}
func (l *link) dst() []byte {
return l.b[l.n:]
}
// Chain returns a Transformer that applies t in sequence.
func Chain(t ...Transformer) Transformer {
if len(t) == 0 {
return nop{}
}
c := &chain{link: make([]link, len(t)+1)}
for i, tt := range t {
c.link[i].t = tt
}
// Allocate intermediate buffers.
b := make([][defaultBufSize]byte, len(t)-1)
for i := range b {
c.link[i+1].b = b[i][:]
}
return c
}
// Transform applies the transformers of c in sequence.
func (c *chain) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
// Set up src and dst in the chain.
srcL := &c.link[0]
dstL := &c.link[len(c.link)-1]
srcL.b, srcL.p, srcL.n = src, 0, len(src)
dstL.b, dstL.n = dst, 0
var lastFull, needProgress bool // for detecting progress
// i is the index of the next Transformer to apply, for i in [low, high].
// low is the lowest index for which c.link[low] may still produce bytes.
// high is the highest index for which c.link[high] has a Transformer.
// The error returned by Transform determines whether to increase or
// decrease i. We try to completely fill a buffer before converting it.
for low, i, high := c.errStart, c.errStart, len(c.link)-2; low <= i && i <= high; {
in, out := &c.link[i], &c.link[i+1]
nDst, nSrc, err0 := in.t.Transform(out.dst(), in.src(), atEOF && low == i)
out.n += nDst
in.p += nSrc
if i > 0 && in.p == in.n {
in.p, in.n = 0, 0
}
needProgress, lastFull = lastFull, false
switch err0 {
case ErrShortDst:
// Process the destination buffer next. Return if we are already
// at the high index.
if i == high {
return dstL.n, srcL.p, ErrShortDst
}
if out.n != 0 {
i++
// If the Transformer at the next index is not able to process any
// source bytes there is nothing that can be done to make progress
// and the bytes will remain unprocessed. lastFull is used to
// detect this and break out of the loop with a fatal error.
lastFull = true
continue
}
// The destination buffer was too small, but is completely empty.
// Return a fatal error as this transformation can never complete.
c.fatalError(i, errShortInternal)
case ErrShortSrc:
if i == 0 {
// Save ErrShortSrc in err. All other errors take precedence.
err = ErrShortSrc
break
}
// Source bytes were depleted before filling up the destination buffer.
// Verify we made some progress, move the remaining bytes to the errStart
// and try to get more source bytes.
if needProgress && nSrc == 0 || in.n-in.p == len(in.b) {
// There were not enough source bytes to proceed while the source
// buffer cannot hold any more bytes. Return a fatal error as this
// transformation can never complete.
c.fatalError(i, errShortInternal)
break
}
// in.b is an internal buffer and we can make progress.
in.p, in.n = 0, copy(in.b, in.src())
fallthrough
case nil:
// if i == low, we have depleted the bytes at index i or any lower levels.
// In that case we increase low and i. In all other cases we decrease i to
// fetch more bytes before proceeding to the next index.
if i > low {
i--
continue
}
default:
c.fatalError(i, err0)
}
// Exhausted level low or fatal error: increase low and continue
// to process the bytes accepted so far.
i++
low = i
}
// If c.errStart > 0, this means we found a fatal error. We will clear
// all upstream buffers. At this point, no more progress can be made
// downstream, as Transform would have bailed while handling ErrShortDst.
if c.errStart > 0 {
for i := 1; i < c.errStart; i++ {
c.link[i].p, c.link[i].n = 0, 0
}
err, c.errStart, c.err = c.err, 0, nil
}
return dstL.n, srcL.p, err
}
// RemoveFunc returns a Transformer that removes from the input all runes r for
// which f(r) is true. Illegal bytes in the input are replaced by RuneError.
func RemoveFunc(f func(r rune) bool) Transformer {
return removeF(f)
}
type removeF func(r rune) bool
// Transform implements the Transformer interface.
func (t removeF) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
for r, sz := rune(0), 0; len(src) > 0; src = src[sz:] {
if r = rune(src[0]); r < utf8.RuneSelf {
sz = 1
} else {
r, sz = utf8.DecodeRune(src)
if sz == 1 {
// Invalid rune.
if !atEOF && !utf8.FullRune(src[nSrc:]) {
err = ErrShortSrc
break
}
// We replace illegal bytes with RuneError. Not doing so might
// otherwise turn a sequence of invalid UTF-8 into valid UTF-8.
// The resulting byte sequence may subsequently contain runes
// for which t(r) is true that were passed unnoticed.
if !t(r) {
if nDst+3 > len(dst) {
err = ErrShortDst
break
}
nDst += copy(dst[nDst:], "\uFFFD")
}
nSrc++
continue
}
}
if !t(r) {
if nDst+sz > len(dst) {
err = ErrShortDst
break
}
nDst += copy(dst[nDst:], src[:sz])
}
nSrc += sz
}
return
}
// Bytes returns a new byte slice with the result of converting b using t.
// If any unrecoverable error occurs it returns nil.
func Bytes(t Transformer, b []byte) []byte {
out := make([]byte, len(b))
n := 0
for {
nDst, nSrc, err := t.Transform(out[n:], b, true)
n += nDst
if err == nil {
return out[:n]
} else if err != ErrShortDst {
return nil
}
b = b[nSrc:]
// Grow the destination buffer.
sz := len(out)
if sz <= 256 {
sz *= 2
} else {
sz += sz >> 1
}
out2 := make([]byte, sz)
copy(out2, out[:n])
out = out2
}
}

View File

@@ -0,0 +1,901 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package transform
import (
"bytes"
"errors"
"fmt"
"io/ioutil"
"strconv"
"strings"
"testing"
"unicode/utf8"
)
type lowerCaseASCII struct{}
func (lowerCaseASCII) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
n := len(src)
if n > len(dst) {
n, err = len(dst), ErrShortDst
}
for i, c := range src[:n] {
if 'A' <= c && c <= 'Z' {
c += 'a' - 'A'
}
dst[i] = c
}
return n, n, err
}
var errYouMentionedX = errors.New("you mentioned X")
type dontMentionX struct{}
func (dontMentionX) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
n := len(src)
if n > len(dst) {
n, err = len(dst), ErrShortDst
}
for i, c := range src[:n] {
if c == 'X' {
return i, i, errYouMentionedX
}
dst[i] = c
}
return n, n, err
}
// doublerAtEOF is a strange Transformer that transforms "this" to "tthhiiss",
// but only if atEOF is true.
type doublerAtEOF struct{}
func (doublerAtEOF) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
if !atEOF {
return 0, 0, ErrShortSrc
}
for i, c := range src {
if 2*i+2 >= len(dst) {
return 2 * i, i, ErrShortDst
}
dst[2*i+0] = c
dst[2*i+1] = c
}
return 2 * len(src), len(src), nil
}
// rleDecode and rleEncode implement a toy run-length encoding: "aabbbbbbbbbb"
// is encoded as "2a10b". The decoding is assumed to not contain any numbers.
type rleDecode struct{}
func (rleDecode) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
loop:
for len(src) > 0 {
n := 0
for i, c := range src {
if '0' <= c && c <= '9' {
n = 10*n + int(c-'0')
continue
}
if i == 0 {
return nDst, nSrc, errors.New("rleDecode: bad input")
}
if n > len(dst) {
return nDst, nSrc, ErrShortDst
}
for j := 0; j < n; j++ {
dst[j] = c
}
dst, src = dst[n:], src[i+1:]
nDst, nSrc = nDst+n, nSrc+i+1
continue loop
}
if atEOF {
return nDst, nSrc, errors.New("rleDecode: bad input")
}
return nDst, nSrc, ErrShortSrc
}
return nDst, nSrc, nil
}
type rleEncode struct {
// allowStutter means that "xxxxxxxx" can be encoded as "5x3x"
// instead of always as "8x".
allowStutter bool
}
func (e rleEncode) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
for len(src) > 0 {
n, c0 := len(src), src[0]
for i, c := range src[1:] {
if c != c0 {
n = i + 1
break
}
}
if n == len(src) && !atEOF && !e.allowStutter {
return nDst, nSrc, ErrShortSrc
}
s := strconv.Itoa(n)
if len(s) >= len(dst) {
return nDst, nSrc, ErrShortDst
}
copy(dst, s)
dst[len(s)] = c0
dst, src = dst[len(s)+1:], src[n:]
nDst, nSrc = nDst+len(s)+1, nSrc+n
}
return nDst, nSrc, nil
}
type testCase struct {
desc string
t Transformer
src string
dstSize int
srcSize int
ioSize int
wantStr string
wantErr error
wantIter int // number of iterations taken; 0 means we don't care.
}
func (t testCase) String() string {
return tstr(t.t) + "; " + t.desc
}
func tstr(t Transformer) string {
if stringer, ok := t.(fmt.Stringer); ok {
return stringer.String()
}
s := fmt.Sprintf("%T", t)
return s[1+strings.Index(s, "."):]
}
func (c chain) String() string {
buf := &bytes.Buffer{}
buf.WriteString("Chain(")
for i, l := range c.link[:len(c.link)-1] {
if i != 0 {
fmt.Fprint(buf, ", ")
}
buf.WriteString(tstr(l.t))
}
buf.WriteString(")")
return buf.String()
}
var testCases = []testCase{
{
desc: "basic",
t: lowerCaseASCII{},
src: "Hello WORLD.",
dstSize: 100,
srcSize: 100,
wantStr: "hello world.",
},
{
desc: "small dst",
t: lowerCaseASCII{},
src: "Hello WORLD.",
dstSize: 3,
srcSize: 100,
wantStr: "hello world.",
},
{
desc: "small src",
t: lowerCaseASCII{},
src: "Hello WORLD.",
dstSize: 100,
srcSize: 4,
wantStr: "hello world.",
},
{
desc: "small buffers",
t: lowerCaseASCII{},
src: "Hello WORLD.",
dstSize: 3,
srcSize: 4,
wantStr: "hello world.",
},
{
desc: "very small buffers",
t: lowerCaseASCII{},
src: "Hello WORLD.",
dstSize: 1,
srcSize: 1,
wantStr: "hello world.",
},
{
desc: "basic",
t: dontMentionX{},
src: "The First Rule of Transform Club: don't mention Mister X, ever.",
dstSize: 100,
srcSize: 100,
wantStr: "The First Rule of Transform Club: don't mention Mister ",
wantErr: errYouMentionedX,
},
{
desc: "small buffers",
t: dontMentionX{},
src: "The First Rule of Transform Club: don't mention Mister X, ever.",
dstSize: 10,
srcSize: 10,
wantStr: "The First Rule of Transform Club: don't mention Mister ",
wantErr: errYouMentionedX,
},
{
desc: "very small buffers",
t: dontMentionX{},
src: "The First Rule of Transform Club: don't mention Mister X, ever.",
dstSize: 1,
srcSize: 1,
wantStr: "The First Rule of Transform Club: don't mention Mister ",
wantErr: errYouMentionedX,
},
{
desc: "only transform at EOF",
t: doublerAtEOF{},
src: "this",
dstSize: 100,
srcSize: 100,
wantStr: "tthhiiss",
},
{
desc: "basic",
t: rleDecode{},
src: "1a2b3c10d11e0f1g",
dstSize: 100,
srcSize: 100,
wantStr: "abbcccddddddddddeeeeeeeeeeeg",
},
{
desc: "long",
t: rleDecode{},
src: "12a23b34c45d56e99z",
dstSize: 100,
srcSize: 100,
wantStr: strings.Repeat("a", 12) +
strings.Repeat("b", 23) +
strings.Repeat("c", 34) +
strings.Repeat("d", 45) +
strings.Repeat("e", 56) +
strings.Repeat("z", 99),
},
{
desc: "tight buffers",
t: rleDecode{},
src: "1a2b3c10d11e0f1g",
dstSize: 11,
srcSize: 3,
wantStr: "abbcccddddddddddeeeeeeeeeeeg",
},
{
desc: "short dst",
t: rleDecode{},
src: "1a2b3c10d11e0f1g",
dstSize: 10,
srcSize: 3,
wantStr: "abbcccdddddddddd",
wantErr: ErrShortDst,
},
{
desc: "short src",
t: rleDecode{},
src: "1a2b3c10d11e0f1g",
dstSize: 11,
srcSize: 2,
ioSize: 2,
wantStr: "abbccc",
wantErr: ErrShortSrc,
},
{
desc: "basic",
t: rleEncode{},
src: "abbcccddddddddddeeeeeeeeeeeg",
dstSize: 100,
srcSize: 100,
wantStr: "1a2b3c10d11e1g",
},
{
desc: "long",
t: rleEncode{},
src: strings.Repeat("a", 12) +
strings.Repeat("b", 23) +
strings.Repeat("c", 34) +
strings.Repeat("d", 45) +
strings.Repeat("e", 56) +
strings.Repeat("z", 99),
dstSize: 100,
srcSize: 100,
wantStr: "12a23b34c45d56e99z",
},
{
desc: "tight buffers",
t: rleEncode{},
src: "abbcccddddddddddeeeeeeeeeeeg",
dstSize: 3,
srcSize: 12,
wantStr: "1a2b3c10d11e1g",
},
{
desc: "short dst",
t: rleEncode{},
src: "abbcccddddddddddeeeeeeeeeeeg",
dstSize: 2,
srcSize: 12,
wantStr: "1a2b3c",
wantErr: ErrShortDst,
},
{
desc: "short src",
t: rleEncode{},
src: "abbcccddddddddddeeeeeeeeeeeg",
dstSize: 3,
srcSize: 11,
ioSize: 11,
wantStr: "1a2b3c10d",
wantErr: ErrShortSrc,
},
{
desc: "allowStutter = false",
t: rleEncode{allowStutter: false},
src: "aaaabbbbbbbbccccddddd",
dstSize: 10,
srcSize: 10,
wantStr: "4a8b4c5d",
},
{
desc: "allowStutter = true",
t: rleEncode{allowStutter: true},
src: "aaaabbbbbbbbccccddddd",
dstSize: 10,
srcSize: 10,
ioSize: 10,
wantStr: "4a6b2b4c4d1d",
},
}
func TestReader(t *testing.T) {
for _, tc := range testCases {
reset(tc.t)
r := NewReader(strings.NewReader(tc.src), tc.t)
// Differently sized dst and src buffers are not part of the
// exported API. We override them manually.
r.dst = make([]byte, tc.dstSize)
r.src = make([]byte, tc.srcSize)
got, err := ioutil.ReadAll(r)
str := string(got)
if str != tc.wantStr || err != tc.wantErr {
t.Errorf("%s:\ngot %q, %v\nwant %q, %v", tc, str, err, tc.wantStr, tc.wantErr)
}
}
}
func reset(t Transformer) {
var dst [128]byte
for err := ErrShortDst; err != nil; {
_, _, err = t.Transform(dst[:], nil, true)
}
}
func TestWriter(t *testing.T) {
tests := append(testCases, chainTests()...)
for _, tc := range tests {
sizes := []int{1, 2, 3, 4, 5, 10, 100, 1000}
if tc.ioSize > 0 {
sizes = []int{tc.ioSize}
}
for _, sz := range sizes {
bb := &bytes.Buffer{}
reset(tc.t)
w := NewWriter(bb, tc.t)
// Differently sized dst and src buffers are not part of the
// exported API. We override them manually.
w.dst = make([]byte, tc.dstSize)
w.src = make([]byte, tc.srcSize)
src := make([]byte, sz)
var err error
for b := tc.src; len(b) > 0 && err == nil; {
n := copy(src, b)
b = b[n:]
m := 0
m, err = w.Write(src[:n])
if m != n && err == nil {
t.Errorf("%s:%d: did not consume all bytes %d < %d", tc, sz, m, n)
}
}
if err == nil {
err = w.Close()
}
str := bb.String()
if str != tc.wantStr || err != tc.wantErr {
t.Errorf("%s:%d:\ngot %q, %v\nwant %q, %v", tc, sz, str, err, tc.wantStr, tc.wantErr)
}
}
}
}
func TestNop(t *testing.T) {
testCases := []struct {
str string
dstSize int
err error
}{
{"", 0, nil},
{"", 10, nil},
{"a", 0, ErrShortDst},
{"a", 1, nil},
{"a", 10, nil},
}
for i, tc := range testCases {
dst := make([]byte, tc.dstSize)
nDst, nSrc, err := Nop.Transform(dst, []byte(tc.str), true)
want := tc.str
if tc.dstSize < len(want) {
want = want[:tc.dstSize]
}
if got := string(dst[:nDst]); got != want || err != tc.err || nSrc != nDst {
t.Errorf("%d:\ngot %q, %d, %v\nwant %q, %d, %v", i, got, nSrc, err, want, nDst, tc.err)
}
}
}
func TestDiscard(t *testing.T) {
testCases := []struct {
str string
dstSize int
}{
{"", 0},
{"", 10},
{"a", 0},
{"ab", 10},
}
for i, tc := range testCases {
nDst, nSrc, err := Discard.Transform(make([]byte, tc.dstSize), []byte(tc.str), true)
if nDst != 0 || nSrc != len(tc.str) || err != nil {
t.Errorf("%d:\ngot %q, %d, %v\nwant 0, %d, nil", i, nDst, nSrc, err, len(tc.str))
}
}
}
// mkChain creates a Chain transformer. x must be alternating between transformer
// and bufSize, like T, (sz, T)*
func mkChain(x ...interface{}) *chain {
t := []Transformer{}
for i := 0; i < len(x); i += 2 {
t = append(t, x[i].(Transformer))
}
c := Chain(t...).(*chain)
for i, j := 1, 1; i < len(x); i, j = i+2, j+1 {
c.link[j].b = make([]byte, x[i].(int))
}
return c
}
func chainTests() []testCase {
return []testCase{
{
desc: "nil error",
t: mkChain(rleEncode{}, 100, lowerCaseASCII{}),
src: "ABB",
dstSize: 100,
srcSize: 100,
wantStr: "1a2b",
wantErr: nil,
wantIter: 1,
},
{
desc: "short dst buffer",
t: mkChain(lowerCaseASCII{}, 3, rleDecode{}),
src: "1a2b3c10d11e0f1g",
dstSize: 10,
srcSize: 3,
wantStr: "abbcccdddddddddd",
wantErr: ErrShortDst,
},
{
desc: "short internal dst buffer",
t: mkChain(lowerCaseASCII{}, 3, rleDecode{}, 10, Nop),
src: "1a2b3c10d11e0f1g",
dstSize: 100,
srcSize: 3,
wantStr: "abbcccdddddddddd",
wantErr: errShortInternal,
},
{
desc: "short internal dst buffer from input",
t: mkChain(rleDecode{}, 10, Nop),
src: "1a2b3c10d11e0f1g",
dstSize: 100,
srcSize: 3,
wantStr: "abbcccdddddddddd",
wantErr: errShortInternal,
},
{
desc: "empty short internal dst buffer",
t: mkChain(lowerCaseASCII{}, 3, rleDecode{}, 10, Nop),
src: "4a7b11e0f1g",
dstSize: 100,
srcSize: 3,
wantStr: "aaaabbbbbbb",
wantErr: errShortInternal,
},
{
desc: "empty short internal dst buffer from input",
t: mkChain(rleDecode{}, 10, Nop),
src: "4a7b11e0f1g",
dstSize: 100,
srcSize: 3,
wantStr: "aaaabbbbbbb",
wantErr: errShortInternal,
},
{
desc: "short internal src buffer after full dst buffer",
t: mkChain(Nop, 5, rleEncode{}, 10, Nop),
src: "cccccddddd",
dstSize: 100,
srcSize: 100,
wantStr: "",
wantErr: errShortInternal,
wantIter: 1,
},
{
desc: "short internal src buffer after short dst buffer; test lastFull",
t: mkChain(rleDecode{}, 5, rleEncode{}, 4, Nop),
src: "2a1b4c6d",
dstSize: 100,
srcSize: 100,
wantStr: "2a1b",
wantErr: errShortInternal,
},
{
desc: "short internal src buffer after successful complete fill",
t: mkChain(Nop, 3, rleDecode{}),
src: "123a4b",
dstSize: 4,
srcSize: 3,
wantStr: "",
wantErr: errShortInternal,
wantIter: 1,
},
{
desc: "short internal src buffer after short dst buffer; test lastFull",
t: mkChain(rleDecode{}, 5, rleEncode{}),
src: "2a1b4c6d",
dstSize: 4,
srcSize: 100,
wantStr: "2a1b",
wantErr: errShortInternal,
},
{
desc: "short src buffer",
t: mkChain(rleEncode{}, 5, Nop),
src: "abbcccddddeeeee",
dstSize: 4,
srcSize: 4,
ioSize: 4,
wantStr: "1a2b3c",
wantErr: ErrShortSrc,
},
{
desc: "process all in one go",
t: mkChain(rleEncode{}, 5, Nop),
src: "abbcccddddeeeeeffffff",
dstSize: 100,
srcSize: 100,
wantStr: "1a2b3c4d5e6f",
wantErr: nil,
wantIter: 1,
},
{
desc: "complete processing downstream after error",
t: mkChain(dontMentionX{}, 2, rleDecode{}, 5, Nop),
src: "3a4b5eX",
dstSize: 100,
srcSize: 100,
ioSize: 100,
wantStr: "aaabbbbeeeee",
wantErr: errYouMentionedX,
},
{
desc: "return downstream fatal errors first (followed by short dst)",
t: mkChain(dontMentionX{}, 8, rleDecode{}, 4, Nop),
src: "3a4b5eX",
dstSize: 100,
srcSize: 100,
ioSize: 100,
wantStr: "aaabbbb",
wantErr: errShortInternal,
},
{
desc: "return downstream fatal errors first (followed by short src)",
t: mkChain(dontMentionX{}, 5, Nop, 1, rleDecode{}),
src: "1a5bX",
dstSize: 100,
srcSize: 100,
ioSize: 100,
wantStr: "",
wantErr: errShortInternal,
},
{
desc: "short internal",
t: mkChain(Nop, 11, rleEncode{}, 3, Nop),
src: "abbcccddddddddddeeeeeeeeeeeg",
dstSize: 3,
srcSize: 100,
wantStr: "1a2b3c10d",
wantErr: errShortInternal,
},
}
}
func doTransform(tc testCase) (res string, iter int, err error) {
reset(tc.t)
dst := make([]byte, tc.dstSize)
out, in := make([]byte, 0, 2*len(tc.src)), []byte(tc.src)
for {
iter++
src, atEOF := in, true
if len(src) > tc.srcSize {
src, atEOF = src[:tc.srcSize], false
}
nDst, nSrc, err := tc.t.Transform(dst, src, atEOF)
out = append(out, dst[:nDst]...)
in = in[nSrc:]
switch {
case err == nil && len(in) != 0:
case err == ErrShortSrc && nSrc > 0:
case err == ErrShortDst && nDst > 0:
default:
return string(out), iter, err
}
}
}
func TestChain(t *testing.T) {
if c, ok := Chain().(nop); !ok {
t.Errorf("empty chain: %v; want Nop", c)
}
// Test Chain for a single Transformer.
for _, tc := range testCases {
tc.t = Chain(tc.t)
str, _, err := doTransform(tc)
if str != tc.wantStr || err != tc.wantErr {
t.Errorf("%s:\ngot %q, %v\nwant %q, %v", tc, str, err, tc.wantStr, tc.wantErr)
}
}
tests := chainTests()
sizes := []int{1, 2, 3, 4, 5, 7, 10, 100, 1000}
addTest := func(tc testCase, t *chain) {
if t.link[0].t != tc.t && tc.wantErr == ErrShortSrc {
tc.wantErr = errShortInternal
}
if t.link[len(t.link)-2].t != tc.t && tc.wantErr == ErrShortDst {
tc.wantErr = errShortInternal
}
tc.t = t
tests = append(tests, tc)
}
for _, tc := range testCases {
for _, sz := range sizes {
tt := tc
tt.dstSize = sz
addTest(tt, mkChain(tc.t, tc.dstSize, Nop))
addTest(tt, mkChain(tc.t, tc.dstSize, Nop, 2, Nop))
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop))
if sz >= tc.dstSize && (tc.wantErr != ErrShortDst || sz == tc.dstSize) {
addTest(tt, mkChain(Nop, tc.srcSize, tc.t))
addTest(tt, mkChain(Nop, 100, Nop, tc.srcSize, tc.t))
}
}
}
for _, tc := range testCases {
tt := tc
tt.dstSize = 1
tt.wantStr = ""
addTest(tt, mkChain(tc.t, tc.dstSize, Discard))
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Discard))
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, tc.dstSize, Discard))
}
for _, tc := range testCases {
tt := tc
tt.dstSize = 100
tt.wantStr = strings.Replace(tc.src, "0f", "", -1)
// Chain encoders and decoders.
if _, ok := tc.t.(rleEncode); ok && tc.wantErr == nil {
addTest(tt, mkChain(tc.t, tc.dstSize, Nop, 1000, rleDecode{}))
addTest(tt, mkChain(tc.t, tc.dstSize, Nop, tc.dstSize, rleDecode{}))
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, 100, rleDecode{}))
// decoding needs larger destinations
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, rleDecode{}, 100, Nop))
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, 100, rleDecode{}, 100, Nop))
} else if _, ok := tc.t.(rleDecode); ok && tc.wantErr == nil {
// The internal buffer size may need to be the sum of the maximum segment
// size of the two encoders!
addTest(tt, mkChain(tc.t, 2*tc.dstSize, rleEncode{}))
addTest(tt, mkChain(tc.t, tc.dstSize, Nop, 101, rleEncode{}))
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, 100, rleEncode{}))
addTest(tt, mkChain(Nop, tc.srcSize, tc.t, tc.dstSize, Nop, 200, rleEncode{}, 100, Nop))
}
}
for _, tc := range tests {
str, iter, err := doTransform(tc)
mi := tc.wantIter != 0 && tc.wantIter != iter
if str != tc.wantStr || err != tc.wantErr || mi {
t.Errorf("%s:\ngot iter:%d, %q, %v\nwant iter:%d, %q, %v", tc, iter, str, err, tc.wantIter, tc.wantStr, tc.wantErr)
}
break
}
}
func TestRemoveFunc(t *testing.T) {
filter := RemoveFunc(func(r rune) bool {
return strings.IndexRune("ab\u0300\u1234,", r) != -1
})
tests := []testCase{
{
src: ",",
wantStr: "",
},
{
src: "c",
wantStr: "c",
},
{
src: "\u2345",
wantStr: "\u2345",
},
{
src: "tschüß",
wantStr: "tschüß",
},
{
src: ",до,свидания,",
wantStr: "досвидания",
},
{
src: "a\xbd\xb2=\xbc ⌘",
wantStr: "\uFFFD\uFFFD=\uFFFD ⌘",
},
{
// If we didn't replace illegal bytes with RuneError, the result
// would be \u0300 or the code would need to be more complex.
src: "\xcc\u0300\x80",
wantStr: "\uFFFD\uFFFD",
},
{
src: "\xcc\u0300\x80",
dstSize: 3,
wantStr: "\uFFFD\uFFFD",
wantIter: 2,
},
{
src: "\u2345",
dstSize: 2,
wantStr: "",
wantErr: ErrShortDst,
},
{
src: "\xcc",
dstSize: 2,
wantStr: "",
wantErr: ErrShortDst,
},
{
src: "\u0300",
dstSize: 2,
srcSize: 1,
wantStr: "",
wantErr: ErrShortSrc,
},
{
t: RemoveFunc(func(r rune) bool {
return r == utf8.RuneError
}),
src: "\xcc\u0300\x80",
wantStr: "\u0300",
},
}
for _, tc := range tests {
tc.desc = tc.src
if tc.t == nil {
tc.t = filter
}
if tc.dstSize == 0 {
tc.dstSize = 100
}
if tc.srcSize == 0 {
tc.srcSize = 100
}
str, iter, err := doTransform(tc)
mi := tc.wantIter != 0 && tc.wantIter != iter
if str != tc.wantStr || err != tc.wantErr || mi {
t.Errorf("%+q:\ngot iter:%d, %+q, %v\nwant iter:%d, %+q, %v", tc.src, iter, str, err, tc.wantIter, tc.wantStr, tc.wantErr)
}
tc.src = str
idem, _, _ := doTransform(tc)
if str != idem {
t.Errorf("%+q: found %+q; want %+q", tc.src, idem, str)
}
}
}
func TestBytes(t *testing.T) {
for _, tt := range append(testCases, chainTests()...) {
if tt.desc == "allowStutter = true" {
// We don't have control over the buffer size, so we eliminate tests
// that depend on a specific buffer size being set.
continue
}
got := Bytes(tt.t, []byte(tt.src))
if tt.wantErr != nil {
if tt.wantErr != ErrShortDst && tt.wantErr != ErrShortSrc {
// Bytes should return nil for non-recoverable errors.
if g, w := (got == nil), (tt.wantErr != nil); g != w {
t.Errorf("%s:error: got %v; want %v", tt.desc, g, w)
}
}
// The output strings in the tests that expect an error will
// almost certainly not be the same as the result of Bytes.
continue
}
if string(got) != tt.wantStr {
t.Errorf("%s:string: got %q; want %q", tt.desc, got, tt.wantStr)
}
}
}

View File

@@ -0,0 +1,30 @@
# Copyright 2011 The Go Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
maketables: maketables.go triegen.go
go build $^
maketesttables: maketesttables.go triegen.go
go build $^
normregtest: normregtest.go
go build $^
tables: maketables
./maketables > tables.go
gofmt -w tables.go
trietesttables: maketesttables
./maketesttables > triedata_test.go
gofmt -w triedata_test.go
# Downloads from www.unicode.org, so not part
# of standard test scripts.
test: testtables regtest
testtables: maketables
./maketables -test > data_test.go && go test -tags=test
regtest: normregtest
./normregtest

View File

@@ -0,0 +1,514 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import "unicode/utf8"
const (
maxNonStarters = 30
// The maximum number of characters needed for a buffer is
// maxNonStarters + 1 for the starter + 1 for the GCJ
maxBufferSize = maxNonStarters + 2
maxNFCExpansion = 3 // NFC(0x1D160)
maxNFKCExpansion = 18 // NFKC(0xFDFA)
maxByteBufferSize = utf8.UTFMax * maxBufferSize // 128
)
// ssState is used for reporting the segment state after inserting a rune.
// It is returned by streamSafe.next.
type ssState int
const (
// Indicates a rune was successfully added to the segment.
ssSuccess ssState = iota
// Indicates a rune starts a new segment and should not be added.
ssStarter
// Indicates a rune caused a segment overflow and a CGJ should be inserted.
ssOverflow
)
// streamSafe implements the policy of when a CGJ should be inserted.
type streamSafe uint8
// mkStreamSafe is a shorthand for declaring a streamSafe var and calling
// first on it.
func mkStreamSafe(p Properties) streamSafe {
return streamSafe(p.nTrailingNonStarters())
}
// first inserts the first rune of a segment.
func (ss *streamSafe) first(p Properties) {
if *ss != 0 {
panic("!= 0")
}
*ss = streamSafe(p.nTrailingNonStarters())
}
// insert returns a ssState value to indicate whether a rune represented by p
// can be inserted.
func (ss *streamSafe) next(p Properties) ssState {
if *ss > maxNonStarters {
panic("streamSafe was not reset")
}
n := p.nLeadingNonStarters()
if *ss += streamSafe(n); *ss > maxNonStarters {
*ss = 0
return ssOverflow
}
// The Stream-Safe Text Processing prescribes that the counting can stop
// as soon as a starter is encountered. However, there are some starters,
// like Jamo V and T, that can combine with other runes, leaving their
// successive non-starters appended to the previous, possibly causing an
// overflow. We will therefore consider any rune with a non-zero nLead to
// be a non-starter. Note that it always hold that if nLead > 0 then
// nLead == nTrail.
if n == 0 {
*ss = 0
return ssStarter
}
return ssSuccess
}
// backwards is used for checking for overflow and segment starts
// when traversing a string backwards. Users do not need to call first
// for the first rune. The state of the streamSafe retains the count of
// the non-starters loaded.
func (ss *streamSafe) backwards(p Properties) ssState {
if *ss > maxNonStarters {
panic("streamSafe was not reset")
}
c := *ss + streamSafe(p.nTrailingNonStarters())
if c > maxNonStarters {
return ssOverflow
}
*ss = c
if p.nLeadingNonStarters() == 0 {
return ssStarter
}
return ssSuccess
}
func (ss streamSafe) isMax() bool {
return ss == maxNonStarters
}
// GraphemeJoiner is inserted after maxNonStarters non-starter runes.
const GraphemeJoiner = "\u034F"
// reorderBuffer is used to normalize a single segment. Characters inserted with
// insert are decomposed and reordered based on CCC. The compose method can
// be used to recombine characters. Note that the byte buffer does not hold
// the UTF-8 characters in order. Only the rune array is maintained in sorted
// order. flush writes the resulting segment to a byte array.
type reorderBuffer struct {
rune [maxBufferSize]Properties // Per character info.
byte [maxByteBufferSize]byte // UTF-8 buffer. Referenced by runeInfo.pos.
nbyte uint8 // Number or bytes.
ss streamSafe // For limiting length of non-starter sequence.
nrune int // Number of runeInfos.
f formInfo
src input
nsrc int
tmpBytes input
out []byte
flushF func(*reorderBuffer) bool
}
func (rb *reorderBuffer) init(f Form, src []byte) {
rb.f = *formTable[f]
rb.src.setBytes(src)
rb.nsrc = len(src)
rb.ss = 0
}
func (rb *reorderBuffer) initString(f Form, src string) {
rb.f = *formTable[f]
rb.src.setString(src)
rb.nsrc = len(src)
rb.ss = 0
}
func (rb *reorderBuffer) setFlusher(out []byte, f func(*reorderBuffer) bool) {
rb.out = out
rb.flushF = f
}
// reset discards all characters from the buffer.
func (rb *reorderBuffer) reset() {
rb.nrune = 0
rb.nbyte = 0
rb.ss = 0
}
func (rb *reorderBuffer) doFlush() bool {
if rb.f.composing {
rb.compose()
}
res := rb.flushF(rb)
rb.reset()
return res
}
// appendFlush appends the normalized segment to rb.out.
func appendFlush(rb *reorderBuffer) bool {
for i := 0; i < rb.nrune; i++ {
start := rb.rune[i].pos
end := start + rb.rune[i].size
rb.out = append(rb.out, rb.byte[start:end]...)
}
return true
}
// flush appends the normalized segment to out and resets rb.
func (rb *reorderBuffer) flush(out []byte) []byte {
for i := 0; i < rb.nrune; i++ {
start := rb.rune[i].pos
end := start + rb.rune[i].size
out = append(out, rb.byte[start:end]...)
}
rb.reset()
return out
}
// flushCopy copies the normalized segment to buf and resets rb.
// It returns the number of bytes written to buf.
func (rb *reorderBuffer) flushCopy(buf []byte) int {
p := 0
for i := 0; i < rb.nrune; i++ {
runep := rb.rune[i]
p += copy(buf[p:], rb.byte[runep.pos:runep.pos+runep.size])
}
rb.reset()
return p
}
// insertOrdered inserts a rune in the buffer, ordered by Canonical Combining Class.
// It returns false if the buffer is not large enough to hold the rune.
// It is used internally by insert and insertString only.
func (rb *reorderBuffer) insertOrdered(info Properties) {
n := rb.nrune
b := rb.rune[:]
cc := info.ccc
if cc > 0 {
// Find insertion position + move elements to make room.
for ; n > 0; n-- {
if b[n-1].ccc <= cc {
break
}
b[n] = b[n-1]
}
}
rb.nrune += 1
pos := uint8(rb.nbyte)
rb.nbyte += utf8.UTFMax
info.pos = pos
b[n] = info
}
// insertErr is an error code returned by insert. Using this type instead
// of error improves performance up to 20% for many of the benchmarks.
type insertErr int
const (
iSuccess insertErr = -iota
iShortDst
iShortSrc
)
// insertFlush inserts the given rune in the buffer ordered by CCC.
// If a decomposition with multiple segments are encountered, they leading
// ones are flushed.
// It returns a non-zero error code if the rune was not inserted.
func (rb *reorderBuffer) insertFlush(src input, i int, info Properties) insertErr {
if rune := src.hangul(i); rune != 0 {
rb.decomposeHangul(rune)
return iSuccess
}
if info.hasDecomposition() {
return rb.insertDecomposed(info.Decomposition())
}
rb.insertSingle(src, i, info)
return iSuccess
}
// insertUnsafe inserts the given rune in the buffer ordered by CCC.
// It is assumed there is sufficient space to hold the runes. It is the
// responsibility of the caller to ensure this. This can be done by checking
// the state returned by the streamSafe type.
func (rb *reorderBuffer) insertUnsafe(src input, i int, info Properties) {
if rune := src.hangul(i); rune != 0 {
rb.decomposeHangul(rune)
}
if info.hasDecomposition() {
// TODO: inline.
rb.insertDecomposed(info.Decomposition())
} else {
rb.insertSingle(src, i, info)
}
}
// insertDecomposed inserts an entry in to the reorderBuffer for each rune
// in dcomp. dcomp must be a sequence of decomposed UTF-8-encoded runes.
// It flushes the buffer on each new segment start.
func (rb *reorderBuffer) insertDecomposed(dcomp []byte) insertErr {
rb.tmpBytes.setBytes(dcomp)
for i := 0; i < len(dcomp); {
info := rb.f.info(rb.tmpBytes, i)
if info.BoundaryBefore() && rb.nrune > 0 && !rb.doFlush() {
return iShortDst
}
i += copy(rb.byte[rb.nbyte:], dcomp[i:i+int(info.size)])
rb.insertOrdered(info)
}
return iSuccess
}
// insertSingle inserts an entry in the reorderBuffer for the rune at
// position i. info is the runeInfo for the rune at position i.
func (rb *reorderBuffer) insertSingle(src input, i int, info Properties) {
src.copySlice(rb.byte[rb.nbyte:], i, i+int(info.size))
rb.insertOrdered(info)
}
// insertCGJ inserts a Combining Grapheme Joiner (0x034f) into rb.
func (rb *reorderBuffer) insertCGJ() {
rb.insertSingle(input{str: GraphemeJoiner}, 0, Properties{size: uint8(len(GraphemeJoiner))})
}
// appendRune inserts a rune at the end of the buffer. It is used for Hangul.
func (rb *reorderBuffer) appendRune(r rune) {
bn := rb.nbyte
sz := utf8.EncodeRune(rb.byte[bn:], rune(r))
rb.nbyte += utf8.UTFMax
rb.rune[rb.nrune] = Properties{pos: bn, size: uint8(sz)}
rb.nrune++
}
// assignRune sets a rune at position pos. It is used for Hangul and recomposition.
func (rb *reorderBuffer) assignRune(pos int, r rune) {
bn := rb.rune[pos].pos
sz := utf8.EncodeRune(rb.byte[bn:], rune(r))
rb.rune[pos] = Properties{pos: bn, size: uint8(sz)}
}
// runeAt returns the rune at position n. It is used for Hangul and recomposition.
func (rb *reorderBuffer) runeAt(n int) rune {
inf := rb.rune[n]
r, _ := utf8.DecodeRune(rb.byte[inf.pos : inf.pos+inf.size])
return r
}
// bytesAt returns the UTF-8 encoding of the rune at position n.
// It is used for Hangul and recomposition.
func (rb *reorderBuffer) bytesAt(n int) []byte {
inf := rb.rune[n]
return rb.byte[inf.pos : int(inf.pos)+int(inf.size)]
}
// For Hangul we combine algorithmically, instead of using tables.
const (
hangulBase = 0xAC00 // UTF-8(hangulBase) -> EA B0 80
hangulBase0 = 0xEA
hangulBase1 = 0xB0
hangulBase2 = 0x80
hangulEnd = hangulBase + jamoLVTCount // UTF-8(0xD7A4) -> ED 9E A4
hangulEnd0 = 0xED
hangulEnd1 = 0x9E
hangulEnd2 = 0xA4
jamoLBase = 0x1100 // UTF-8(jamoLBase) -> E1 84 00
jamoLBase0 = 0xE1
jamoLBase1 = 0x84
jamoLEnd = 0x1113
jamoVBase = 0x1161
jamoVEnd = 0x1176
jamoTBase = 0x11A7
jamoTEnd = 0x11C3
jamoTCount = 28
jamoVCount = 21
jamoVTCount = 21 * 28
jamoLVTCount = 19 * 21 * 28
)
const hangulUTF8Size = 3
func isHangul(b []byte) bool {
if len(b) < hangulUTF8Size {
return false
}
b0 := b[0]
if b0 < hangulBase0 {
return false
}
b1 := b[1]
switch {
case b0 == hangulBase0:
return b1 >= hangulBase1
case b0 < hangulEnd0:
return true
case b0 > hangulEnd0:
return false
case b1 < hangulEnd1:
return true
}
return b1 == hangulEnd1 && b[2] < hangulEnd2
}
func isHangulString(b string) bool {
if len(b) < hangulUTF8Size {
return false
}
b0 := b[0]
if b0 < hangulBase0 {
return false
}
b1 := b[1]
switch {
case b0 == hangulBase0:
return b1 >= hangulBase1
case b0 < hangulEnd0:
return true
case b0 > hangulEnd0:
return false
case b1 < hangulEnd1:
return true
}
return b1 == hangulEnd1 && b[2] < hangulEnd2
}
// Caller must ensure len(b) >= 2.
func isJamoVT(b []byte) bool {
// True if (rune & 0xff00) == jamoLBase
return b[0] == jamoLBase0 && (b[1]&0xFC) == jamoLBase1
}
func isHangulWithoutJamoT(b []byte) bool {
c, _ := utf8.DecodeRune(b)
c -= hangulBase
return c < jamoLVTCount && c%jamoTCount == 0
}
// decomposeHangul writes the decomposed Hangul to buf and returns the number
// of bytes written. len(buf) should be at least 9.
func decomposeHangul(buf []byte, r rune) int {
const JamoUTF8Len = 3
r -= hangulBase
x := r % jamoTCount
r /= jamoTCount
utf8.EncodeRune(buf, jamoLBase+r/jamoVCount)
utf8.EncodeRune(buf[JamoUTF8Len:], jamoVBase+r%jamoVCount)
if x != 0 {
utf8.EncodeRune(buf[2*JamoUTF8Len:], jamoTBase+x)
return 3 * JamoUTF8Len
}
return 2 * JamoUTF8Len
}
// decomposeHangul algorithmically decomposes a Hangul rune into
// its Jamo components.
// See http://unicode.org/reports/tr15/#Hangul for details on decomposing Hangul.
func (rb *reorderBuffer) decomposeHangul(r rune) {
r -= hangulBase
x := r % jamoTCount
r /= jamoTCount
rb.appendRune(jamoLBase + r/jamoVCount)
rb.appendRune(jamoVBase + r%jamoVCount)
if x != 0 {
rb.appendRune(jamoTBase + x)
}
}
// combineHangul algorithmically combines Jamo character components into Hangul.
// See http://unicode.org/reports/tr15/#Hangul for details on combining Hangul.
func (rb *reorderBuffer) combineHangul(s, i, k int) {
b := rb.rune[:]
bn := rb.nrune
for ; i < bn; i++ {
cccB := b[k-1].ccc
cccC := b[i].ccc
if cccB == 0 {
s = k - 1
}
if s != k-1 && cccB >= cccC {
// b[i] is blocked by greater-equal cccX below it
b[k] = b[i]
k++
} else {
l := rb.runeAt(s) // also used to compare to hangulBase
v := rb.runeAt(i) // also used to compare to jamoT
switch {
case jamoLBase <= l && l < jamoLEnd &&
jamoVBase <= v && v < jamoVEnd:
// 11xx plus 116x to LV
rb.assignRune(s, hangulBase+
(l-jamoLBase)*jamoVTCount+(v-jamoVBase)*jamoTCount)
case hangulBase <= l && l < hangulEnd &&
jamoTBase < v && v < jamoTEnd &&
((l-hangulBase)%jamoTCount) == 0:
// ACxx plus 11Ax to LVT
rb.assignRune(s, l+v-jamoTBase)
default:
b[k] = b[i]
k++
}
}
}
rb.nrune = k
}
// compose recombines the runes in the buffer.
// It should only be used to recompose a single segment, as it will not
// handle alternations between Hangul and non-Hangul characters correctly.
func (rb *reorderBuffer) compose() {
// UAX #15, section X5 , including Corrigendum #5
// "In any character sequence beginning with starter S, a character C is
// blocked from S if and only if there is some character B between S
// and C, and either B is a starter or it has the same or higher
// combining class as C."
bn := rb.nrune
if bn == 0 {
return
}
k := 1
b := rb.rune[:]
for s, i := 0, 1; i < bn; i++ {
if isJamoVT(rb.bytesAt(i)) {
// Redo from start in Hangul mode. Necessary to support
// U+320E..U+321E in NFKC mode.
rb.combineHangul(s, i, k)
return
}
ii := b[i]
// We can only use combineForward as a filter if we later
// get the info for the combined character. This is more
// expensive than using the filter. Using combinesBackward()
// is safe.
if ii.combinesBackward() {
cccB := b[k-1].ccc
cccC := ii.ccc
blocked := false // b[i] blocked by starter or greater or equal CCC?
if cccB == 0 {
s = k - 1
} else {
blocked = s != k-1 && cccB >= cccC
}
if !blocked {
combined := combine(rb.runeAt(s), rb.runeAt(i))
if combined != 0 {
rb.assignRune(s, combined)
continue
}
}
}
b[k] = b[i]
k++
}
rb.nrune = k
}

View File

@@ -0,0 +1,130 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import "testing"
// TestCase is used for most tests.
type TestCase struct {
in []rune
out []rune
}
func runTests(t *testing.T, name string, fm Form, tests []TestCase) {
rb := reorderBuffer{}
rb.init(fm, nil)
for i, test := range tests {
rb.setFlusher(nil, appendFlush)
for j, rune := range test.in {
b := []byte(string(rune))
src := inputBytes(b)
info := rb.f.info(src, 0)
if j == 0 {
rb.ss.first(info)
} else {
rb.ss.next(info)
}
if rb.insertFlush(src, 0, info) < 0 {
t.Errorf("%s:%d: insert failed for rune %d", name, i, j)
}
}
rb.doFlush()
was := string(rb.out)
want := string(test.out)
if len(was) != len(want) {
t.Errorf("%s:%d: length = %d; want %d", name, i, len(was), len(want))
}
if was != want {
k, pfx := pidx(was, want)
t.Errorf("%s:%d: \nwas %s%+q; \nwant %s%+q", name, i, pfx, was[k:], pfx, want[k:])
}
}
}
func TestFlush(t *testing.T) {
const (
hello = "Hello "
world = "world!"
)
buf := make([]byte, maxByteBufferSize)
p := copy(buf, hello)
out := buf[p:]
rb := reorderBuffer{}
rb.initString(NFC, world)
if i := rb.flushCopy(out); i != 0 {
t.Errorf("wrote bytes on flush of empty buffer. (len(out) = %d)", i)
}
for i := range world {
// No need to set streamSafe values for this test.
rb.insertFlush(rb.src, i, rb.f.info(rb.src, i))
n := rb.flushCopy(out)
out = out[n:]
p += n
}
was := buf[:p]
want := hello + world
if string(was) != want {
t.Errorf(`output after flush was "%s"; want "%s"`, string(was), want)
}
if rb.nrune != 0 {
t.Errorf("non-null size of info buffer (rb.nrune == %d)", rb.nrune)
}
if rb.nbyte != 0 {
t.Errorf("non-null size of byte buffer (rb.nbyte == %d)", rb.nbyte)
}
}
var insertTests = []TestCase{
{[]rune{'a'}, []rune{'a'}},
{[]rune{0x300}, []rune{0x300}},
{[]rune{0x300, 0x316}, []rune{0x316, 0x300}}, // CCC(0x300)==230; CCC(0x316)==220
{[]rune{0x316, 0x300}, []rune{0x316, 0x300}},
{[]rune{0x41, 0x316, 0x300}, []rune{0x41, 0x316, 0x300}},
{[]rune{0x41, 0x300, 0x316}, []rune{0x41, 0x316, 0x300}},
{[]rune{0x300, 0x316, 0x41}, []rune{0x316, 0x300, 0x41}},
{[]rune{0x41, 0x300, 0x40, 0x316}, []rune{0x41, 0x300, 0x40, 0x316}},
}
func TestInsert(t *testing.T) {
runTests(t, "TestInsert", NFD, insertTests)
}
var decompositionNFDTest = []TestCase{
{[]rune{0xC0}, []rune{0x41, 0x300}},
{[]rune{0xAC00}, []rune{0x1100, 0x1161}},
{[]rune{0x01C4}, []rune{0x01C4}},
{[]rune{0x320E}, []rune{0x320E}},
{[]rune("음ẻ과"), []rune{0x110B, 0x1173, 0x11B7, 0x65, 0x309, 0x1100, 0x116A}},
}
var decompositionNFKDTest = []TestCase{
{[]rune{0xC0}, []rune{0x41, 0x300}},
{[]rune{0xAC00}, []rune{0x1100, 0x1161}},
{[]rune{0x01C4}, []rune{0x44, 0x5A, 0x030C}},
{[]rune{0x320E}, []rune{0x28, 0x1100, 0x1161, 0x29}},
}
func TestDecomposition(t *testing.T) {
runTests(t, "TestDecompositionNFD", NFD, decompositionNFDTest)
runTests(t, "TestDecompositionNFKD", NFKD, decompositionNFKDTest)
}
var compositionTest = []TestCase{
{[]rune{0x41, 0x300}, []rune{0xC0}},
{[]rune{0x41, 0x316}, []rune{0x41, 0x316}},
{[]rune{0x41, 0x300, 0x35D}, []rune{0xC0, 0x35D}},
{[]rune{0x41, 0x316, 0x300}, []rune{0xC0, 0x316}},
// blocking starter
{[]rune{0x41, 0x316, 0x40, 0x300}, []rune{0x41, 0x316, 0x40, 0x300}},
{[]rune{0x1100, 0x1161}, []rune{0xAC00}},
// parenthesized Hangul, alternate between ASCII and Hangul.
{[]rune{0x28, 0x1100, 0x1161, 0x29}, []rune{0x28, 0xAC00, 0x29}},
}
func TestComposition(t *testing.T) {
runTests(t, "TestComposition", NFC, compositionTest)
}

View File

@@ -0,0 +1,82 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm_test
import (
"bytes"
"fmt"
"unicode/utf8"
"code.google.com/p/go.text/unicode/norm"
)
// EqualSimple uses a norm.Iter to compare two non-normalized
// strings for equivalence.
func EqualSimple(a, b string) bool {
var ia, ib norm.Iter
ia.InitString(norm.NFKD, a)
ib.InitString(norm.NFKD, b)
for !ia.Done() && !ib.Done() {
if !bytes.Equal(ia.Next(), ib.Next()) {
return false
}
}
return ia.Done() && ib.Done()
}
// FindPrefix finds the longest common prefix of ASCII characters
// of a and b.
func FindPrefix(a, b string) int {
i := 0
for ; i < len(a) && i < len(b) && a[i] < utf8.RuneSelf && a[i] == b[i]; i++ {
}
return i
}
// EqualOpt is like EqualSimple, but optimizes the special
// case for ASCII characters.
func EqualOpt(a, b string) bool {
n := FindPrefix(a, b)
a, b = a[n:], b[n:]
var ia, ib norm.Iter
ia.InitString(norm.NFKD, a)
ib.InitString(norm.NFKD, b)
for !ia.Done() && !ib.Done() {
if !bytes.Equal(ia.Next(), ib.Next()) {
return false
}
if n := int64(FindPrefix(a[ia.Pos():], b[ib.Pos():])); n != 0 {
ia.Seek(n, 1)
ib.Seek(n, 1)
}
}
return ia.Done() && ib.Done()
}
var compareTests = []struct{ a, b string }{
{"aaa", "aaa"},
{"aaa", "aab"},
{"a\u0300a", "\u00E0a"},
{"a\u0300\u0320b", "a\u0320\u0300b"},
{"\u1E0A\u0323", "\x44\u0323\u0307"},
// A character that decomposes into multiple segments
// spans several iterations.
{"\u3304", "\u30A4\u30CB\u30F3\u30AF\u3099"},
}
func ExampleIter() {
for i, t := range compareTests {
r0 := EqualSimple(t.a, t.b)
r1 := EqualOpt(t.a, t.b)
fmt.Printf("%d: %v %v\n", i, r0, r1)
}
// Output:
// 0: true true
// 1: false false
// 2: true true
// 3: true true
// 4: true true
// 5: true true
}

View File

@@ -0,0 +1,256 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
// This file contains Form-specific logic and wrappers for data in tables.go.
// Rune info is stored in a separate trie per composing form. A composing form
// and its corresponding decomposing form share the same trie. Each trie maps
// a rune to a uint16. The values take two forms. For v >= 0x8000:
// bits
// 15: 1 (inverse of NFD_QD bit of qcInfo)
// 13..7: qcInfo (see below). isYesD is always true (no decompostion).
// 6..0: ccc (compressed CCC value).
// For v < 0x8000, the respective rune has a decomposition and v is an index
// into a byte array of UTF-8 decomposition sequences and additional info and
// has the form:
// <header> <decomp_byte>* [<tccc> [<lccc>]]
// The header contains the number of bytes in the decomposition (excluding this
// length byte). The two most significant bits of this length byte correspond
// to bit 5 and 4 of qcInfo (see below). The byte sequence itself starts at v+1.
// The byte sequence is followed by a trailing and leading CCC if the values
// for these are not zero. The value of v determines which ccc are appended
// to the sequences. For v < firstCCC, there are none, for v >= firstCCC,
// the sequence is followed by a trailing ccc, and for v >= firstLeadingCC
// there is an additional leading ccc. The value of tccc itself is the
// trailing CCC shifted left 2 bits. The two least-significant bits of tccc
// are the number of trailing non-starters.
const (
qcInfoMask = 0x3F // to clear all but the relevant bits in a qcInfo
headerLenMask = 0x3F // extract the length value from the header byte
headerFlagsMask = 0xC0 // extract the qcInfo bits from the header byte
)
// Properties provides access to normalization properties of a rune.
type Properties struct {
pos uint8 // start position in reorderBuffer; used in composition.go
size uint8 // length of UTF-8 encoding of this rune
ccc uint8 // leading canonical combining class (ccc if not decomposition)
tccc uint8 // trailing canonical combining class (ccc if not decomposition)
nLead uint8 // number of leading non-starters.
flags qcInfo // quick check flags
index uint16
}
// functions dispatchable per form
type lookupFunc func(b input, i int) Properties
// formInfo holds Form-specific functions and tables.
type formInfo struct {
form Form
composing, compatibility bool // form type
info lookupFunc
nextMain iterFunc
}
var formTable []*formInfo
func init() {
formTable = make([]*formInfo, 4)
for i := range formTable {
f := &formInfo{}
formTable[i] = f
f.form = Form(i)
if Form(i) == NFKD || Form(i) == NFKC {
f.compatibility = true
f.info = lookupInfoNFKC
} else {
f.info = lookupInfoNFC
}
f.nextMain = nextDecomposed
if Form(i) == NFC || Form(i) == NFKC {
f.nextMain = nextComposed
f.composing = true
}
}
}
// We do not distinguish between boundaries for NFC, NFD, etc. to avoid
// unexpected behavior for the user. For example, in NFD, there is a boundary
// after 'a'. However, 'a' might combine with modifiers, so from the application's
// perspective it is not a good boundary. We will therefore always use the
// boundaries for the combining variants.
// BoundaryBefore returns true if this rune starts a new segment and
// cannot combine with any rune on the left.
func (p Properties) BoundaryBefore() bool {
if p.ccc == 0 && !p.combinesBackward() {
return true
}
// We assume that the CCC of the first character in a decomposition
// is always non-zero if different from info.ccc and that we can return
// false at this point. This is verified by maketables.
return false
}
// BoundaryAfter returns true if runes cannot combine with or otherwise
// interact with this or previous runes.
func (p Properties) BoundaryAfter() bool {
// TODO: loosen these conditions.
return p.isInert()
}
// We pack quick check data in 4 bits:
// 5: Combines forward (0 == false, 1 == true)
// 4..3: NFC_QC Yes(00), No (10), or Maybe (11)
// 2: NFD_QC Yes (0) or No (1). No also means there is a decomposition.
// 1..0: Number of trailing non-starters.
//
// When all 4 bits are zero, the character is inert, meaning it is never
// influenced by normalization.
type qcInfo uint8
func (p Properties) isYesC() bool { return p.flags&0x10 == 0 }
func (p Properties) isYesD() bool { return p.flags&0x4 == 0 }
func (p Properties) combinesForward() bool { return p.flags&0x20 != 0 }
func (p Properties) combinesBackward() bool { return p.flags&0x8 != 0 } // == isMaybe
func (p Properties) hasDecomposition() bool { return p.flags&0x4 != 0 } // == isNoD
func (p Properties) isInert() bool {
return p.flags&qcInfoMask == 0 && p.ccc == 0
}
func (p Properties) multiSegment() bool {
return p.index >= firstMulti && p.index < endMulti
}
func (p Properties) nLeadingNonStarters() uint8 {
return p.nLead
}
func (p Properties) nTrailingNonStarters() uint8 {
return uint8(p.flags & 0x03)
}
// Decomposition returns the decomposition for the underlying rune
// or nil if there is none.
func (p Properties) Decomposition() []byte {
// TODO: create the decomposition for Hangul?
if p.index == 0 {
return nil
}
i := p.index
n := decomps[i] & headerLenMask
i++
return decomps[i : i+uint16(n)]
}
// Size returns the length of UTF-8 encoding of the rune.
func (p Properties) Size() int {
return int(p.size)
}
// CCC returns the canonical combining class of the underlying rune.
func (p Properties) CCC() uint8 {
if p.index >= firstCCCZeroExcept {
return 0
}
return ccc[p.ccc]
}
// LeadCCC returns the CCC of the first rune in the decomposition.
// If there is no decomposition, LeadCCC equals CCC.
func (p Properties) LeadCCC() uint8 {
return ccc[p.ccc]
}
// TrailCCC returns the CCC of the last rune in the decomposition.
// If there is no decomposition, TrailCCC equals CCC.
func (p Properties) TrailCCC() uint8 {
return ccc[p.tccc]
}
// Recomposition
// We use 32-bit keys instead of 64-bit for the two codepoint keys.
// This clips off the bits of three entries, but we know this will not
// result in a collision. In the unlikely event that changes to
// UnicodeData.txt introduce collisions, the compiler will catch it.
// Note that the recomposition map for NFC and NFKC are identical.
// combine returns the combined rune or 0 if it doesn't exist.
func combine(a, b rune) rune {
key := uint32(uint16(a))<<16 + uint32(uint16(b))
return recompMap[key]
}
func lookupInfoNFC(b input, i int) Properties {
v, sz := b.charinfoNFC(i)
return compInfo(v, sz)
}
func lookupInfoNFKC(b input, i int) Properties {
v, sz := b.charinfoNFKC(i)
return compInfo(v, sz)
}
// Properties returns properties for the first rune in s.
func (f Form) Properties(s []byte) Properties {
if f == NFC || f == NFD {
return compInfo(nfcTrie.lookup(s))
}
return compInfo(nfkcTrie.lookup(s))
}
// PropertiesString returns properties for the first rune in s.
func (f Form) PropertiesString(s string) Properties {
if f == NFC || f == NFD {
return compInfo(nfcTrie.lookupString(s))
}
return compInfo(nfkcTrie.lookupString(s))
}
// compInfo converts the information contained in v and sz
// to a Properties. See the comment at the top of the file
// for more information on the format.
func compInfo(v uint16, sz int) Properties {
if v == 0 {
return Properties{size: uint8(sz)}
} else if v >= 0x8000 {
p := Properties{
size: uint8(sz),
ccc: uint8(v),
tccc: uint8(v),
flags: qcInfo(v >> 8),
}
if p.ccc > 0 || p.combinesBackward() {
p.nLead = uint8(p.flags & 0x3)
}
return p
}
// has decomposition
h := decomps[v]
f := (qcInfo(h&headerFlagsMask) >> 2) | 0x4
p := Properties{size: uint8(sz), flags: f, index: v}
if v >= firstCCC {
v += uint16(h&headerLenMask) + 1
c := decomps[v]
p.tccc = c >> 2
p.flags |= qcInfo(c & 0x3)
if v >= firstLeadingCCC {
p.nLead = c & 0x3
if v >= firstStarterWithNLead {
// We were tricked. Remove the decomposition.
p.flags &= 0x03
p.index = 0
return p
}
p.ccc = decomps[v+1]
}
}
return p
}

View File

@@ -0,0 +1,54 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build test
package norm
import "testing"
func TestProperties(t *testing.T) {
var d runeData
CK := [2]string{"C", "K"}
for k, r := 1, rune(0); r < 0x2ffff; r++ {
if k < len(testData) && r == testData[k].r {
d = testData[k]
k++
}
s := string(r)
for j, p := range []Properties{NFC.PropertiesString(s), NFKC.PropertiesString(s)} {
f := d.f[j]
if p.CCC() != d.ccc {
t.Errorf("%U: ccc(%s): was %d; want %d %X", r, CK[j], p.CCC(), d.ccc, p.index)
}
if p.isYesC() != (f.qc == Yes) {
t.Errorf("%U: YesC(%s): was %v; want %v", r, CK[j], p.isYesC(), f.qc == Yes)
}
if p.combinesBackward() != (f.qc == Maybe) {
t.Errorf("%U: combines backwards(%s): was %v; want %v", r, CK[j], p.combinesBackward(), f.qc == Maybe)
}
if p.nLeadingNonStarters() != d.nLead {
t.Errorf("%U: nLead(%s): was %d; want %d %#v %#v", r, CK[j], p.nLeadingNonStarters(), d.nLead, p, d)
}
if p.nTrailingNonStarters() != d.nTrail {
t.Errorf("%U: nTrail(%s): was %d; want %d %#v %#v", r, CK[j], p.nTrailingNonStarters(), d.nTrail, p, d)
}
if p.combinesForward() != f.combinesForward {
t.Errorf("%U: combines forward(%s): was %v; want %v %#v", r, CK[j], p.combinesForward(), f.combinesForward, p)
}
// Skip Hangul as it is algorithmically computed.
if r >= hangulBase && r < hangulEnd {
continue
}
if p.hasDecomposition() {
if has := f.decomposition != ""; !has {
t.Errorf("%U: hasDecomposition(%s): was %v; want %v", r, CK[j], p.hasDecomposition(), has)
}
if string(p.Decomposition()) != f.decomposition {
t.Errorf("%U: decomp(%s): was %+q; want %+q", r, CK[j], p.Decomposition(), f.decomposition)
}
}
}
}
}

View File

@@ -0,0 +1,105 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import "unicode/utf8"
type input struct {
str string
bytes []byte
}
func inputBytes(str []byte) input {
return input{bytes: str}
}
func inputString(str string) input {
return input{str: str}
}
func (in *input) setBytes(str []byte) {
in.str = ""
in.bytes = str
}
func (in *input) setString(str string) {
in.str = str
in.bytes = nil
}
func (in *input) _byte(p int) byte {
if in.bytes == nil {
return in.str[p]
}
return in.bytes[p]
}
func (in *input) skipASCII(p, max int) int {
if in.bytes == nil {
for ; p < max && in.str[p] < utf8.RuneSelf; p++ {
}
} else {
for ; p < max && in.bytes[p] < utf8.RuneSelf; p++ {
}
}
return p
}
func (in *input) skipContinuationBytes(p int) int {
if in.bytes == nil {
for ; p < len(in.str) && !utf8.RuneStart(in.str[p]); p++ {
}
} else {
for ; p < len(in.bytes) && !utf8.RuneStart(in.bytes[p]); p++ {
}
}
return p
}
func (in *input) appendSlice(buf []byte, b, e int) []byte {
if in.bytes != nil {
return append(buf, in.bytes[b:e]...)
}
for i := b; i < e; i++ {
buf = append(buf, in.str[i])
}
return buf
}
func (in *input) copySlice(buf []byte, b, e int) int {
if in.bytes == nil {
return copy(buf, in.str[b:e])
}
return copy(buf, in.bytes[b:e])
}
func (in *input) charinfoNFC(p int) (uint16, int) {
if in.bytes == nil {
return nfcTrie.lookupString(in.str[p:])
}
return nfcTrie.lookup(in.bytes[p:])
}
func (in *input) charinfoNFKC(p int) (uint16, int) {
if in.bytes == nil {
return nfkcTrie.lookupString(in.str[p:])
}
return nfkcTrie.lookup(in.bytes[p:])
}
func (in *input) hangul(p int) (r rune) {
if in.bytes == nil {
if !isHangulString(in.str[p:]) {
return 0
}
r, _ = utf8.DecodeRuneInString(in.str[p:])
} else {
if !isHangul(in.bytes[p:]) {
return 0
}
r, _ = utf8.DecodeRune(in.bytes[p:])
}
return r
}

View File

@@ -0,0 +1,448 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import (
"fmt"
"unicode/utf8"
)
const MaxSegmentSize = maxByteBufferSize
// An Iter iterates over a string or byte slice, while normalizing it
// to a given Form.
type Iter struct {
rb reorderBuffer
buf [maxByteBufferSize]byte
info Properties // first character saved from previous iteration
next iterFunc // implementation of next depends on form
asciiF iterFunc
p int // current position in input source
multiSeg []byte // remainder of multi-segment decomposition
}
type iterFunc func(*Iter) []byte
// Init initializes i to iterate over src after normalizing it to Form f.
func (i *Iter) Init(f Form, src []byte) {
i.p = 0
if len(src) == 0 {
i.setDone()
i.rb.nsrc = 0
return
}
i.multiSeg = nil
i.rb.init(f, src)
i.next = i.rb.f.nextMain
i.asciiF = nextASCIIBytes
i.info = i.rb.f.info(i.rb.src, i.p)
}
// InitString initializes i to iterate over src after normalizing it to Form f.
func (i *Iter) InitString(f Form, src string) {
i.p = 0
if len(src) == 0 {
i.setDone()
i.rb.nsrc = 0
return
}
i.multiSeg = nil
i.rb.initString(f, src)
i.next = i.rb.f.nextMain
i.asciiF = nextASCIIString
i.info = i.rb.f.info(i.rb.src, i.p)
}
// Seek sets the segment to be returned by the next call to Next to start
// at position p. It is the responsibility of the caller to set p to the
// start of a UTF8 rune.
func (i *Iter) Seek(offset int64, whence int) (int64, error) {
var abs int64
switch whence {
case 0:
abs = offset
case 1:
abs = int64(i.p) + offset
case 2:
abs = int64(i.rb.nsrc) + offset
default:
return 0, fmt.Errorf("norm: invalid whence")
}
if abs < 0 {
return 0, fmt.Errorf("norm: negative position")
}
if int(abs) >= i.rb.nsrc {
i.setDone()
return int64(i.p), nil
}
i.p = int(abs)
i.multiSeg = nil
i.next = i.rb.f.nextMain
i.info = i.rb.f.info(i.rb.src, i.p)
return abs, nil
}
// returnSlice returns a slice of the underlying input type as a byte slice.
// If the underlying is of type []byte, it will simply return a slice.
// If the underlying is of type string, it will copy the slice to the buffer
// and return that.
func (i *Iter) returnSlice(a, b int) []byte {
if i.rb.src.bytes == nil {
return i.buf[:copy(i.buf[:], i.rb.src.str[a:b])]
}
return i.rb.src.bytes[a:b]
}
// Pos returns the byte position at which the next call to Next will commence processing.
func (i *Iter) Pos() int {
return i.p
}
func (i *Iter) setDone() {
i.next = nextDone
i.p = i.rb.nsrc
}
// Done returns true if there is no more input to process.
func (i *Iter) Done() bool {
return i.p >= i.rb.nsrc
}
// Next returns f(i.input[i.Pos():n]), where n is a boundary of i.input.
// For any input a and b for which f(a) == f(b), subsequent calls
// to Next will return the same segments.
// Modifying runes are grouped together with the preceding starter, if such a starter exists.
// Although not guaranteed, n will typically be the smallest possible n.
func (i *Iter) Next() []byte {
return i.next(i)
}
func nextASCIIBytes(i *Iter) []byte {
p := i.p + 1
if p >= i.rb.nsrc {
i.setDone()
return i.rb.src.bytes[i.p:p]
}
if i.rb.src.bytes[p] < utf8.RuneSelf {
p0 := i.p
i.p = p
return i.rb.src.bytes[p0:p]
}
i.info = i.rb.f.info(i.rb.src, i.p)
i.next = i.rb.f.nextMain
return i.next(i)
}
func nextASCIIString(i *Iter) []byte {
p := i.p + 1
if p >= i.rb.nsrc {
i.buf[0] = i.rb.src.str[i.p]
i.setDone()
return i.buf[:1]
}
if i.rb.src.str[p] < utf8.RuneSelf {
i.buf[0] = i.rb.src.str[i.p]
i.p = p
return i.buf[:1]
}
i.info = i.rb.f.info(i.rb.src, i.p)
i.next = i.rb.f.nextMain
return i.next(i)
}
func nextHangul(i *Iter) []byte {
p := i.p
next := p + hangulUTF8Size
if next >= i.rb.nsrc {
i.setDone()
} else if i.rb.src.hangul(next) == 0 {
i.info = i.rb.f.info(i.rb.src, i.p)
i.next = i.rb.f.nextMain
return i.next(i)
}
i.p = next
return i.buf[:decomposeHangul(i.buf[:], i.rb.src.hangul(p))]
}
func nextDone(i *Iter) []byte {
return nil
}
// nextMulti is used for iterating over multi-segment decompositions
// for decomposing normal forms.
func nextMulti(i *Iter) []byte {
j := 0
d := i.multiSeg
// skip first rune
for j = 1; j < len(d) && !utf8.RuneStart(d[j]); j++ {
}
for j < len(d) {
info := i.rb.f.info(input{bytes: d}, j)
if info.BoundaryBefore() {
i.multiSeg = d[j:]
return d[:j]
}
j += int(info.size)
}
// treat last segment as normal decomposition
i.next = i.rb.f.nextMain
return i.next(i)
}
// nextMultiNorm is used for iterating over multi-segment decompositions
// for composing normal forms.
func nextMultiNorm(i *Iter) []byte {
j := 0
d := i.multiSeg
for j < len(d) {
info := i.rb.f.info(input{bytes: d}, j)
if info.BoundaryBefore() {
i.rb.compose()
seg := i.buf[:i.rb.flushCopy(i.buf[:])]
i.rb.ss.first(info)
i.rb.insertUnsafe(input{bytes: d}, j, info)
i.multiSeg = d[j+int(info.size):]
return seg
}
i.rb.ss.next(info)
i.rb.insertUnsafe(input{bytes: d}, j, info)
j += int(info.size)
}
i.multiSeg = nil
i.next = nextComposed
return doNormComposed(i)
}
// nextDecomposed is the implementation of Next for forms NFD and NFKD.
func nextDecomposed(i *Iter) (next []byte) {
outp := 0
inCopyStart, outCopyStart := i.p, 0
ss := mkStreamSafe(i.info)
for {
if sz := int(i.info.size); sz <= 1 {
p := i.p
i.p++ // ASCII or illegal byte. Either way, advance by 1.
if i.p >= i.rb.nsrc {
i.setDone()
return i.returnSlice(p, i.p)
} else if i.rb.src._byte(i.p) < utf8.RuneSelf {
i.next = i.asciiF
return i.returnSlice(p, i.p)
}
outp++
} else if d := i.info.Decomposition(); d != nil {
// Note: If leading CCC != 0, then len(d) == 2 and last is also non-zero.
// Case 1: there is a leftover to copy. In this case the decomposition
// must begin with a modifier and should always be appended.
// Case 2: no leftover. Simply return d if followed by a ccc == 0 value.
p := outp + len(d)
if outp > 0 {
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
if p > len(i.buf) {
return i.buf[:outp]
}
} else if i.info.multiSegment() {
// outp must be 0 as multi-segment decompositions always
// start a new segment.
if i.multiSeg == nil {
i.multiSeg = d
i.next = nextMulti
return nextMulti(i)
}
// We are in the last segment. Treat as normal decomposition.
d = i.multiSeg
i.multiSeg = nil
p = len(d)
}
prevCC := i.info.tccc
if i.p += sz; i.p >= i.rb.nsrc {
i.setDone()
i.info = Properties{} // Force BoundaryBefore to succeed.
} else {
i.info = i.rb.f.info(i.rb.src, i.p)
}
switch ss.next(i.info) {
case ssOverflow:
i.next = nextCGJDecompose
fallthrough
case ssStarter:
if outp > 0 {
copy(i.buf[outp:], d)
return i.buf[:p]
}
return d
}
copy(i.buf[outp:], d)
outp = p
inCopyStart, outCopyStart = i.p, outp
if i.info.ccc < prevCC {
goto doNorm
}
continue
} else if r := i.rb.src.hangul(i.p); r != 0 {
outp = decomposeHangul(i.buf[:], r)
i.p += hangulUTF8Size
inCopyStart, outCopyStart = i.p, outp
if i.p >= i.rb.nsrc {
i.setDone()
break
} else if i.rb.src.hangul(i.p) != 0 {
i.next = nextHangul
return i.buf[:outp]
}
} else {
p := outp + sz
if p > len(i.buf) {
break
}
outp = p
i.p += sz
}
if i.p >= i.rb.nsrc {
i.setDone()
break
}
prevCC := i.info.tccc
i.info = i.rb.f.info(i.rb.src, i.p)
if v := ss.next(i.info); v == ssStarter {
break
} else if v == ssOverflow {
i.next = nextCGJDecompose
break
}
if i.info.ccc < prevCC {
goto doNorm
}
}
if outCopyStart == 0 {
return i.returnSlice(inCopyStart, i.p)
} else if inCopyStart < i.p {
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
}
return i.buf[:outp]
doNorm:
// Insert what we have decomposed so far in the reorderBuffer.
// As we will only reorder, there will always be enough room.
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
i.rb.insertDecomposed(i.buf[0:outp])
return doNormDecomposed(i)
}
func doNormDecomposed(i *Iter) []byte {
for {
if s := i.rb.ss.next(i.info); s == ssOverflow {
i.next = nextCGJDecompose
break
}
i.rb.insertUnsafe(i.rb.src, i.p, i.info)
if i.p += int(i.info.size); i.p >= i.rb.nsrc {
i.setDone()
break
}
i.info = i.rb.f.info(i.rb.src, i.p)
if i.info.ccc == 0 {
break
}
}
// new segment or too many combining characters: exit normalization
return i.buf[:i.rb.flushCopy(i.buf[:])]
}
func nextCGJDecompose(i *Iter) []byte {
i.rb.ss = 0
i.rb.insertCGJ()
i.next = nextDecomposed
buf := doNormDecomposed(i)
return buf
}
// nextComposed is the implementation of Next for forms NFC and NFKC.
func nextComposed(i *Iter) []byte {
outp, startp := 0, i.p
var prevCC uint8
ss := mkStreamSafe(i.info)
for {
if !i.info.isYesC() {
goto doNorm
}
prevCC = i.info.tccc
sz := int(i.info.size)
if sz == 0 {
sz = 1 // illegal rune: copy byte-by-byte
}
p := outp + sz
if p > len(i.buf) {
break
}
outp = p
i.p += sz
if i.p >= i.rb.nsrc {
i.setDone()
break
} else if i.rb.src._byte(i.p) < utf8.RuneSelf {
i.next = i.asciiF
break
}
i.info = i.rb.f.info(i.rb.src, i.p)
if v := ss.next(i.info); v == ssStarter {
break
} else if v == ssOverflow {
i.next = nextCGJCompose
break
}
if i.info.ccc < prevCC {
goto doNorm
}
}
return i.returnSlice(startp, i.p)
doNorm:
i.p = startp
i.info = i.rb.f.info(i.rb.src, i.p)
if i.info.multiSegment() {
d := i.info.Decomposition()
info := i.rb.f.info(input{bytes: d}, 0)
i.rb.insertUnsafe(input{bytes: d}, 0, info)
i.multiSeg = d[int(info.size):]
i.next = nextMultiNorm
return nextMultiNorm(i)
}
i.rb.ss.first(i.info)
i.rb.insertUnsafe(i.rb.src, i.p, i.info)
return doNormComposed(i)
}
func doNormComposed(i *Iter) []byte {
// First rune should already be inserted.
for {
if i.p += int(i.info.size); i.p >= i.rb.nsrc {
i.setDone()
break
}
i.info = i.rb.f.info(i.rb.src, i.p)
if s := i.rb.ss.next(i.info); s == ssStarter {
break
} else if s == ssOverflow {
i.next = nextCGJCompose
break
}
i.rb.insertUnsafe(i.rb.src, i.p, i.info)
}
i.rb.compose()
seg := i.buf[:i.rb.flushCopy(i.buf[:])]
return seg
}
func nextCGJCompose(i *Iter) []byte {
i.rb.ss = 0 // instead of first
i.rb.insertCGJ()
i.next = nextComposed
// Note that we treat any rune with nLeadingNonStarters > 0 as a non-starter,
// even if they are not. This is particularly dubious for U+FF9E and UFF9A.
// If we ever change that, insert a check here.
i.rb.ss.first(i.info)
i.rb.insertUnsafe(i.rb.src, i.p, i.info)
return doNormComposed(i)
}

View File

@@ -0,0 +1,98 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import (
"strings"
"testing"
)
func doIterNorm(f Form, s string) []byte {
acc := []byte{}
i := Iter{}
i.InitString(f, s)
for !i.Done() {
acc = append(acc, i.Next()...)
}
return acc
}
func TestIterNext(t *testing.T) {
runNormTests(t, "IterNext", func(f Form, out []byte, s string) []byte {
return doIterNorm(f, string(append(out, s...)))
})
}
type SegmentTest struct {
in string
out []string
}
var segmentTests = []SegmentTest{
{"\u1E0A\u0323a", []string{"\x44\u0323\u0307", "a", ""}},
{rep('a', segSize), append(strings.Split(rep('a', segSize), ""), "")},
{rep('a', segSize+2), append(strings.Split(rep('a', segSize+2), ""), "")},
{rep('a', segSize) + "\u0300aa",
append(strings.Split(rep('a', segSize-1), ""), "a\u0300", "a", "a", "")},
// U+0f73 is NOT treated as a starter as it is a modifier
{"a" + grave(29) + "\u0f73", []string{"a" + grave(29), cgj + "\u0f73"}},
{"a\u0f73", []string{"a\u0f73"}},
// U+ff9e is treated as a non-starter.
// TODO: should we? Note that this will only affect iteration, as whether
// or not we do so does not affect the normalization output and will either
// way result in consistent iteration output.
{"a" + grave(30) + "\uff9e", []string{"a" + grave(30), cgj + "\uff9e"}},
{"a\uff9e", []string{"a\uff9e"}},
}
var segmentTestsK = []SegmentTest{
{"\u3332", []string{"\u30D5", "\u30A1", "\u30E9", "\u30C3", "\u30C8\u3099", ""}},
// last segment of multi-segment decomposition needs normalization
{"\u3332\u093C", []string{"\u30D5", "\u30A1", "\u30E9", "\u30C3", "\u30C8\u093C\u3099", ""}},
{"\u320E", []string{"\x28", "\uAC00", "\x29"}},
// last segment should be copied to start of buffer.
{"\ufdfa", []string{"\u0635", "\u0644", "\u0649", " ", "\u0627", "\u0644", "\u0644", "\u0647", " ", "\u0639", "\u0644", "\u064a", "\u0647", " ", "\u0648", "\u0633", "\u0644", "\u0645", ""}},
{"\ufdfa" + grave(30), []string{"\u0635", "\u0644", "\u0649", " ", "\u0627", "\u0644", "\u0644", "\u0647", " ", "\u0639", "\u0644", "\u064a", "\u0647", " ", "\u0648", "\u0633", "\u0644", "\u0645" + grave(30), ""}},
{"\uFDFA" + grave(64), []string{"\u0635", "\u0644", "\u0649", " ", "\u0627", "\u0644", "\u0644", "\u0647", " ", "\u0639", "\u0644", "\u064a", "\u0647", " ", "\u0648", "\u0633", "\u0644", "\u0645" + grave(30), cgj + grave(30), cgj + grave(4), ""}},
// Hangul and Jamo are grouped togeter.
{"\uAC00", []string{"\u1100\u1161", ""}},
{"\uAC01", []string{"\u1100\u1161\u11A8", ""}},
{"\u1100\u1161", []string{"\u1100\u1161", ""}},
}
// Note that, by design, segmentation is equal for composing and decomposing forms.
func TestIterSegmentation(t *testing.T) {
segmentTest(t, "SegmentTestD", NFD, segmentTests)
segmentTest(t, "SegmentTestC", NFC, segmentTests)
segmentTest(t, "SegmentTestKD", NFKD, segmentTestsK)
segmentTest(t, "SegmentTestKC", NFKC, segmentTestsK)
}
func segmentTest(t *testing.T, name string, f Form, tests []SegmentTest) {
iter := Iter{}
for i, tt := range tests {
iter.InitString(f, tt.in)
for j, seg := range tt.out {
if seg == "" {
if !iter.Done() {
res := string(iter.Next())
t.Errorf(`%s:%d:%d: expected Done()==true, found segment %+q`, name, i, j, res)
}
continue
}
if iter.Done() {
t.Errorf("%s:%d:%d: Done()==true, want false", name, i, j)
}
seg = f.String(seg)
if res := string(iter.Next()); res != seg {
t.Errorf(`%s:%d:%d" segment was %+q (%d); want %+q (%d)`, name, i, j, pc(res), len(res), pc(seg), len(seg))
}
}
}
}

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,45 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// Generate test data for trie code.
package main
import (
"fmt"
)
func main() {
printTestTables()
}
// We take the smallest, largest and an arbitrary value for each
// of the UTF-8 sequence lengths.
var testRunes = []rune{
0x01, 0x0C, 0x7F, // 1-byte sequences
0x80, 0x100, 0x7FF, // 2-byte sequences
0x800, 0x999, 0xFFFF, // 3-byte sequences
0x10000, 0x10101, 0x10FFFF, // 4-byte sequences
0x200, 0x201, 0x202, 0x210, 0x215, // five entries in one sparse block
}
const fileHeader = `// Generated by running
// maketesttables
// DO NOT EDIT
package norm
`
func printTestTables() {
fmt.Print(fileHeader)
fmt.Printf("var testRunes = %#v\n\n", testRunes)
t := newNode()
for i, r := range testRunes {
t.insert(r, uint16(i))
}
t.printTables("testdata")
}

View File

@@ -0,0 +1,14 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm_test
import (
"testing"
)
func TestPlaceHolder(t *testing.T) {
// Does nothing, just allows the Makefile to be canonical
// while waiting for the package itself to be written.
}

View File

@@ -0,0 +1,524 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package norm contains types and functions for normalizing Unicode strings.
package norm
import "unicode/utf8"
// A Form denotes a canonical representation of Unicode code points.
// The Unicode-defined normalization and equivalence forms are:
//
// NFC Unicode Normalization Form C
// NFD Unicode Normalization Form D
// NFKC Unicode Normalization Form KC
// NFKD Unicode Normalization Form KD
//
// For a Form f, this documentation uses the notation f(x) to mean
// the bytes or string x converted to the given form.
// A position n in x is called a boundary if conversion to the form can
// proceed independently on both sides:
// f(x) == append(f(x[0:n]), f(x[n:])...)
//
// References: http://unicode.org/reports/tr15/ and
// http://unicode.org/notes/tn5/.
type Form int
const (
NFC Form = iota
NFD
NFKC
NFKD
)
// Bytes returns f(b). May return b if f(b) = b.
func (f Form) Bytes(b []byte) []byte {
src := inputBytes(b)
ft := formTable[f]
n, ok := ft.quickSpan(src, 0, len(b), true)
if ok {
return b
}
out := make([]byte, n, len(b))
copy(out, b[0:n])
rb := reorderBuffer{f: *ft, src: src, nsrc: len(b), out: out, flushF: appendFlush}
return doAppendInner(&rb, n)
}
// String returns f(s).
func (f Form) String(s string) string {
src := inputString(s)
ft := formTable[f]
n, ok := ft.quickSpan(src, 0, len(s), true)
if ok {
return s
}
out := make([]byte, n, len(s))
copy(out, s[0:n])
rb := reorderBuffer{f: *ft, src: src, nsrc: len(s), out: out, flushF: appendFlush}
return string(doAppendInner(&rb, n))
}
// IsNormal returns true if b == f(b).
func (f Form) IsNormal(b []byte) bool {
src := inputBytes(b)
ft := formTable[f]
bp, ok := ft.quickSpan(src, 0, len(b), true)
if ok {
return true
}
rb := reorderBuffer{f: *ft, src: src, nsrc: len(b)}
rb.setFlusher(nil, cmpNormalBytes)
for bp < len(b) {
rb.out = b[bp:]
if bp = decomposeSegment(&rb, bp, true); bp < 0 {
return false
}
bp, _ = rb.f.quickSpan(rb.src, bp, len(b), true)
}
return true
}
func cmpNormalBytes(rb *reorderBuffer) bool {
b := rb.out
for i := 0; i < rb.nrune; i++ {
info := rb.rune[i]
if int(info.size) > len(b) {
return false
}
p := info.pos
pe := p + info.size
for ; p < pe; p++ {
if b[0] != rb.byte[p] {
return false
}
b = b[1:]
}
}
return true
}
// IsNormalString returns true if s == f(s).
func (f Form) IsNormalString(s string) bool {
src := inputString(s)
ft := formTable[f]
bp, ok := ft.quickSpan(src, 0, len(s), true)
if ok {
return true
}
rb := reorderBuffer{f: *ft, src: src, nsrc: len(s)}
rb.setFlusher(nil, func(rb *reorderBuffer) bool {
for i := 0; i < rb.nrune; i++ {
info := rb.rune[i]
if bp+int(info.size) > len(s) {
return false
}
p := info.pos
pe := p + info.size
for ; p < pe; p++ {
if s[bp] != rb.byte[p] {
return false
}
bp++
}
}
return true
})
for bp < len(s) {
if bp = decomposeSegment(&rb, bp, true); bp < 0 {
return false
}
bp, _ = rb.f.quickSpan(rb.src, bp, len(s), true)
}
return true
}
// patchTail fixes a case where a rune may be incorrectly normalized
// if it is followed by illegal continuation bytes. It returns the
// patched buffer and whether the decomposition is still in progress.
func patchTail(rb *reorderBuffer) bool {
info, p := lastRuneStart(&rb.f, rb.out)
if p == -1 || info.size == 0 {
return true
}
end := p + int(info.size)
extra := len(rb.out) - end
if extra > 0 {
// Potentially allocating memory. However, this only
// happens with ill-formed UTF-8.
x := make([]byte, 0)
x = append(x, rb.out[len(rb.out)-extra:]...)
rb.out = rb.out[:end]
decomposeToLastBoundary(rb)
rb.doFlush()
rb.out = append(rb.out, x...)
return false
}
buf := rb.out[p:]
rb.out = rb.out[:p]
decomposeToLastBoundary(rb)
if s := rb.ss.next(info); s == ssStarter {
rb.doFlush()
rb.ss.first(info)
} else if s == ssOverflow {
rb.doFlush()
rb.insertCGJ()
rb.ss = 0
}
rb.insertUnsafe(inputBytes(buf), 0, info)
return true
}
func appendQuick(rb *reorderBuffer, i int) int {
if rb.nsrc == i {
return i
}
end, _ := rb.f.quickSpan(rb.src, i, rb.nsrc, true)
rb.out = rb.src.appendSlice(rb.out, i, end)
return end
}
// Append returns f(append(out, b...)).
// The buffer out must be nil, empty, or equal to f(out).
func (f Form) Append(out []byte, src ...byte) []byte {
return f.doAppend(out, inputBytes(src), len(src))
}
func (f Form) doAppend(out []byte, src input, n int) []byte {
if n == 0 {
return out
}
ft := formTable[f]
// Attempt to do a quickSpan first so we can avoid initializing the reorderBuffer.
if len(out) == 0 {
p, _ := ft.quickSpan(src, 0, n, true)
out = src.appendSlice(out, 0, p)
if p == n {
return out
}
rb := reorderBuffer{f: *ft, src: src, nsrc: n, out: out, flushF: appendFlush}
return doAppendInner(&rb, p)
}
rb := reorderBuffer{f: *ft, src: src, nsrc: n}
return doAppend(&rb, out, 0)
}
func doAppend(rb *reorderBuffer, out []byte, p int) []byte {
rb.setFlusher(out, appendFlush)
src, n := rb.src, rb.nsrc
doMerge := len(out) > 0
if q := src.skipContinuationBytes(p); q > p {
// Move leading non-starters to destination.
rb.out = src.appendSlice(rb.out, p, q)
p = q
doMerge = patchTail(rb)
}
fd := &rb.f
if doMerge {
var info Properties
if p < n {
info = fd.info(src, p)
if !info.BoundaryBefore() || info.nLeadingNonStarters() > 0 {
if p == 0 {
decomposeToLastBoundary(rb)
}
p = decomposeSegment(rb, p, true)
}
}
if info.size == 0 {
rb.doFlush()
// Append incomplete UTF-8 encoding.
return src.appendSlice(rb.out, p, n)
}
if rb.nrune > 0 {
return doAppendInner(rb, p)
}
}
p = appendQuick(rb, p)
return doAppendInner(rb, p)
}
func doAppendInner(rb *reorderBuffer, p int) []byte {
for n := rb.nsrc; p < n; {
p = decomposeSegment(rb, p, true)
p = appendQuick(rb, p)
}
return rb.out
}
// AppendString returns f(append(out, []byte(s))).
// The buffer out must be nil, empty, or equal to f(out).
func (f Form) AppendString(out []byte, src string) []byte {
return f.doAppend(out, inputString(src), len(src))
}
// QuickSpan returns a boundary n such that b[0:n] == f(b[0:n]).
// It is not guaranteed to return the largest such n.
func (f Form) QuickSpan(b []byte) int {
n, _ := formTable[f].quickSpan(inputBytes(b), 0, len(b), true)
return n
}
// quickSpan returns a boundary n such that src[0:n] == f(src[0:n]) and
// whether any non-normalized parts were found. If atEOF is false, n will
// not point past the last segment if this segment might be become
// non-normalized by appending other runes.
func (f *formInfo) quickSpan(src input, i, end int, atEOF bool) (n int, ok bool) {
var lastCC uint8
ss := streamSafe(0)
lastSegStart := i
for n = end; i < n; {
if j := src.skipASCII(i, n); i != j {
i = j
lastSegStart = i - 1
lastCC = 0
ss = 0
continue
}
info := f.info(src, i)
if info.size == 0 {
if atEOF {
// include incomplete runes
return n, true
}
return lastSegStart, true
}
// This block needs to be before the next, because it is possible to
// have an overflow for runes that are starters (e.g. with U+FF9E).
switch ss.next(info) {
case ssStarter:
ss.first(info)
lastSegStart = i
case ssOverflow:
return lastSegStart, false
case ssSuccess:
if lastCC > info.ccc {
return lastSegStart, false
}
}
if f.composing {
if !info.isYesC() {
break
}
} else {
if !info.isYesD() {
break
}
}
lastCC = info.ccc
i += int(info.size)
}
if i == n {
if !atEOF {
n = lastSegStart
}
return n, true
}
return lastSegStart, false
}
// QuickSpanString returns a boundary n such that b[0:n] == f(s[0:n]).
// It is not guaranteed to return the largest such n.
func (f Form) QuickSpanString(s string) int {
n, _ := formTable[f].quickSpan(inputString(s), 0, len(s), true)
return n
}
// FirstBoundary returns the position i of the first boundary in b
// or -1 if b contains no boundary.
func (f Form) FirstBoundary(b []byte) int {
return f.firstBoundary(inputBytes(b), len(b))
}
func (f Form) firstBoundary(src input, nsrc int) int {
i := src.skipContinuationBytes(0)
if i >= nsrc {
return -1
}
fd := formTable[f]
ss := streamSafe(0)
// We should call ss.first here, but we can't as the first rune is
// skipped already. This means FirstBoundary can't really determine
// CGJ insertion points correctly. Luckily it doesn't have to.
// TODO: consider adding NextBoundary
for {
info := fd.info(src, i)
if info.size == 0 {
return -1
}
if s := ss.next(info); s != ssSuccess {
return i
}
i += int(info.size)
if i >= nsrc {
if !info.BoundaryAfter() && !ss.isMax() {
return -1
}
return nsrc
}
}
}
// FirstBoundaryInString returns the position i of the first boundary in s
// or -1 if s contains no boundary.
func (f Form) FirstBoundaryInString(s string) int {
return f.firstBoundary(inputString(s), len(s))
}
// LastBoundary returns the position i of the last boundary in b
// or -1 if b contains no boundary.
func (f Form) LastBoundary(b []byte) int {
return lastBoundary(formTable[f], b)
}
func lastBoundary(fd *formInfo, b []byte) int {
i := len(b)
info, p := lastRuneStart(fd, b)
if p == -1 {
return -1
}
if info.size == 0 { // ends with incomplete rune
if p == 0 { // starts with incomplete rune
return -1
}
i = p
info, p = lastRuneStart(fd, b[:i])
if p == -1 { // incomplete UTF-8 encoding or non-starter bytes without a starter
return i
}
}
if p+int(info.size) != i { // trailing non-starter bytes: illegal UTF-8
return i
}
if info.BoundaryAfter() {
return i
}
ss := streamSafe(0)
v := ss.backwards(info)
for i = p; i >= 0 && v != ssStarter; i = p {
info, p = lastRuneStart(fd, b[:i])
if v = ss.backwards(info); v == ssOverflow {
break
}
if p+int(info.size) != i {
if p == -1 { // no boundary found
return -1
}
return i // boundary after an illegal UTF-8 encoding
}
}
return i
}
// decomposeSegment scans the first segment in src into rb. It inserts 0x034f
// (Grapheme Joiner) when it encounters a sequence of more than 30 non-starters
// and returns the number of bytes consumed from src or iShortDst or iShortSrc.
func decomposeSegment(rb *reorderBuffer, sp int, atEOF bool) int {
// Force one character to be consumed.
info := rb.f.info(rb.src, sp)
if info.size == 0 {
return 0
}
if rb.nrune > 0 {
if s := rb.ss.next(info); s == ssStarter {
goto end
} else if s == ssOverflow {
rb.insertCGJ()
goto end
}
} else {
rb.ss.first(info)
}
if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
return int(err)
}
for {
sp += int(info.size)
if sp >= rb.nsrc {
if !atEOF && !info.BoundaryAfter() {
return int(iShortSrc)
}
break
}
info = rb.f.info(rb.src, sp)
if info.size == 0 {
if !atEOF {
return int(iShortSrc)
}
break
}
if s := rb.ss.next(info); s == ssStarter {
break
} else if s == ssOverflow {
rb.insertCGJ()
break
}
if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
return int(err)
}
}
end:
if !rb.doFlush() {
return int(iShortDst)
}
return sp
}
// lastRuneStart returns the runeInfo and position of the last
// rune in buf or the zero runeInfo and -1 if no rune was found.
func lastRuneStart(fd *formInfo, buf []byte) (Properties, int) {
p := len(buf) - 1
for ; p >= 0 && !utf8.RuneStart(buf[p]); p-- {
}
if p < 0 {
return Properties{}, -1
}
return fd.info(inputBytes(buf), p), p
}
// decomposeToLastBoundary finds an open segment at the end of the buffer
// and scans it into rb. Returns the buffer minus the last segment.
func decomposeToLastBoundary(rb *reorderBuffer) {
fd := &rb.f
info, i := lastRuneStart(fd, rb.out)
if int(info.size) != len(rb.out)-i {
// illegal trailing continuation bytes
return
}
if info.BoundaryAfter() {
return
}
var add [maxNonStarters + 1]Properties // stores runeInfo in reverse order
padd := 0
ss := streamSafe(0)
p := len(rb.out)
for {
add[padd] = info
v := ss.backwards(info)
if v == ssOverflow {
// Note that if we have an overflow, it the string we are appending to
// is not correctly normalized. In this case the behavior is undefined.
break
}
padd++
p -= int(info.size)
if v == ssStarter || p < 0 {
break
}
info, i = lastRuneStart(fd, rb.out[:p])
if int(info.size) != p-i {
break
}
}
rb.ss = ss
// Copy bytes for insertion as we may need to overwrite rb.out.
var buf [maxBufferSize * utf8.UTFMax]byte
cp := buf[:copy(buf[:], rb.out[p:])]
rb.out = rb.out[:p]
for padd--; padd >= 0; padd-- {
info = add[padd]
rb.insertUnsafe(inputBytes(cp), 0, info)
cp = cp[info.size:]
}
}

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,318 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"bufio"
"bytes"
"flag"
"fmt"
"log"
"net/http"
"os"
"path"
"regexp"
"runtime"
"strconv"
"strings"
"time"
"unicode"
"unicode/utf8"
"code.google.com/p/go.text/unicode/norm"
)
func main() {
flag.Parse()
loadTestData()
CharacterByCharacterTests()
StandardTests()
PerformanceTest()
if errorCount == 0 {
fmt.Println("PASS")
}
}
const file = "NormalizationTest.txt"
var url = flag.String("url",
"http://www.unicode.org/Public/"+unicode.Version+"/ucd/"+file,
"URL of Unicode database directory")
var localFiles = flag.Bool("local",
false,
"data files have been copied to the current directory; for debugging only")
var logger = log.New(os.Stderr, "", log.Lshortfile)
// This regression test runs the test set in NormalizationTest.txt
// (taken from http://www.unicode.org/Public/<unicode.Version>/ucd/).
//
// NormalizationTest.txt has form:
// @Part0 # Specific cases
// #
// 1E0A;1E0A;0044 0307;1E0A;0044 0307; # (Ḋ; Ḋ; D◌̇; Ḋ; D◌̇; ) LATIN CAPITAL LETTER D WITH DOT ABOVE
// 1E0C;1E0C;0044 0323;1E0C;0044 0323; # (Ḍ; Ḍ; D◌̣; Ḍ; D◌̣; ) LATIN CAPITAL LETTER D WITH DOT BELOW
//
// Each test has 5 columns (c1, c2, c3, c4, c5), where
// (c1, c2, c3, c4, c5) == (c1, NFC(c1), NFD(c1), NFKC(c1), NFKD(c1))
//
// CONFORMANCE:
// 1. The following invariants must be true for all conformant implementations
//
// NFC
// c2 == NFC(c1) == NFC(c2) == NFC(c3)
// c4 == NFC(c4) == NFC(c5)
//
// NFD
// c3 == NFD(c1) == NFD(c2) == NFD(c3)
// c5 == NFD(c4) == NFD(c5)
//
// NFKC
// c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
//
// NFKD
// c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
//
// 2. For every code point X assigned in this version of Unicode that is not
// specifically listed in Part 1, the following invariants must be true
// for all conformant implementations:
//
// X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)
//
// Column types.
const (
cRaw = iota
cNFC
cNFD
cNFKC
cNFKD
cMaxColumns
)
// Holds data from NormalizationTest.txt
var part []Part
type Part struct {
name string
number int
tests []Test
}
type Test struct {
name string
partnr int
number int
r rune // used for character by character test
cols [cMaxColumns]string // Each has 5 entries, see below.
}
func (t Test) Name() string {
if t.number < 0 {
return part[t.partnr].name
}
return fmt.Sprintf("%s:%d", part[t.partnr].name, t.number)
}
var partRe = regexp.MustCompile(`@Part(\d) # (.*)$`)
var testRe = regexp.MustCompile(`^` + strings.Repeat(`([\dA-F ]+);`, 5) + ` # (.*)$`)
var counter int
// Load the data form NormalizationTest.txt
func loadTestData() {
if *localFiles {
pwd, _ := os.Getwd()
*url = "file://" + path.Join(pwd, file)
}
t := &http.Transport{}
t.RegisterProtocol("file", http.NewFileTransport(http.Dir("/")))
c := &http.Client{Transport: t}
resp, err := c.Get(*url)
if err != nil {
logger.Fatal(err)
}
if resp.StatusCode != 200 {
logger.Fatal("bad GET status for "+file, resp.Status)
}
f := resp.Body
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := scanner.Text()
if len(line) == 0 || line[0] == '#' {
continue
}
m := partRe.FindStringSubmatch(line)
if m != nil {
if len(m) < 3 {
logger.Fatal("Failed to parse Part: ", line)
}
i, err := strconv.Atoi(m[1])
if err != nil {
logger.Fatal(err)
}
name := m[2]
part = append(part, Part{name: name[:len(name)-1], number: i})
continue
}
m = testRe.FindStringSubmatch(line)
if m == nil || len(m) < 7 {
logger.Fatalf(`Failed to parse: "%s" result: %#v`, line, m)
}
test := Test{name: m[6], partnr: len(part) - 1, number: counter}
counter++
for j := 1; j < len(m)-1; j++ {
for _, split := range strings.Split(m[j], " ") {
r, err := strconv.ParseUint(split, 16, 64)
if err != nil {
logger.Fatal(err)
}
if test.r == 0 {
// save for CharacterByCharacterTests
test.r = rune(r)
}
var buf [utf8.UTFMax]byte
sz := utf8.EncodeRune(buf[:], rune(r))
test.cols[j-1] += string(buf[:sz])
}
}
part := &part[len(part)-1]
part.tests = append(part.tests, test)
}
if scanner.Err() != nil {
logger.Fatal(scanner.Err())
}
}
var fstr = []string{"NFC", "NFD", "NFKC", "NFKD"}
var errorCount int
func cmpResult(t *Test, name string, f norm.Form, gold, test, result string) {
if gold != result {
errorCount++
if errorCount > 20 {
return
}
logger.Printf("%s:%s: %s(%+q)=%+q; want %+q: %s",
t.Name(), name, fstr[f], test, result, gold, t.name)
}
}
func cmpIsNormal(t *Test, name string, f norm.Form, test string, result, want bool) {
if result != want {
errorCount++
if errorCount > 20 {
return
}
logger.Printf("%s:%s: %s(%+q)=%v; want %v", t.Name(), name, fstr[f], test, result, want)
}
}
func doTest(t *Test, f norm.Form, gold, test string) {
testb := []byte(test)
result := f.Bytes(testb)
cmpResult(t, "Bytes", f, gold, test, string(result))
sresult := f.String(test)
cmpResult(t, "String", f, gold, test, sresult)
acc := []byte{}
i := norm.Iter{}
i.InitString(f, test)
for !i.Done() {
acc = append(acc, i.Next()...)
}
cmpResult(t, "Iter.Next", f, gold, test, string(acc))
buf := make([]byte, 128)
acc = nil
for p := 0; p < len(testb); {
nDst, nSrc, _ := f.Transform(buf, testb[p:], true)
acc = append(acc, buf[:nDst]...)
p += nSrc
}
cmpResult(t, "Transform", f, gold, test, string(acc))
for i := range test {
out := f.Append(f.Bytes([]byte(test[:i])), []byte(test[i:])...)
cmpResult(t, fmt.Sprintf(":Append:%d", i), f, gold, test, string(out))
}
cmpIsNormal(t, "IsNormal", f, test, f.IsNormal([]byte(test)), test == gold)
cmpIsNormal(t, "IsNormalString", f, test, f.IsNormalString(test), test == gold)
}
func doConformanceTests(t *Test, partn int) {
for i := 0; i <= 2; i++ {
doTest(t, norm.NFC, t.cols[1], t.cols[i])
doTest(t, norm.NFD, t.cols[2], t.cols[i])
doTest(t, norm.NFKC, t.cols[3], t.cols[i])
doTest(t, norm.NFKD, t.cols[4], t.cols[i])
}
for i := 3; i <= 4; i++ {
doTest(t, norm.NFC, t.cols[3], t.cols[i])
doTest(t, norm.NFD, t.cols[4], t.cols[i])
doTest(t, norm.NFKC, t.cols[3], t.cols[i])
doTest(t, norm.NFKD, t.cols[4], t.cols[i])
}
}
func CharacterByCharacterTests() {
tests := part[1].tests
var last rune = 0
for i := 0; i <= len(tests); i++ { // last one is special case
var r rune
if i == len(tests) {
r = 0x2FA1E // Don't have to go to 0x10FFFF
} else {
r = tests[i].r
}
for last++; last < r; last++ {
// Check all characters that were not explicitly listed in the test.
t := &Test{partnr: 1, number: -1}
char := string(last)
doTest(t, norm.NFC, char, char)
doTest(t, norm.NFD, char, char)
doTest(t, norm.NFKC, char, char)
doTest(t, norm.NFKD, char, char)
}
if i < len(tests) {
doConformanceTests(&tests[i], 1)
}
}
}
func StandardTests() {
for _, j := range []int{0, 2, 3} {
for _, test := range part[j].tests {
doConformanceTests(&test, j)
}
}
}
// PerformanceTest verifies that normalization is O(n). If any of the
// code does not properly check for maxCombiningChars, normalization
// may exhibit O(n**2) behavior.
func PerformanceTest() {
runtime.GOMAXPROCS(2)
success := make(chan bool, 1)
go func() {
buf := bytes.Repeat([]byte("\u035D"), 1024*1024)
buf = append(buf, "\u035B"...)
norm.NFC.Append(nil, buf...)
success <- true
}()
timeout := time.After(1 * time.Second)
select {
case <-success:
// test completed before the timeout
case <-timeout:
errorCount++
logger.Printf(`unexpectedly long time to complete PerformanceTest`)
}
}

View File

@@ -0,0 +1,126 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import "io"
type normWriter struct {
rb reorderBuffer
w io.Writer
buf []byte
}
// Write implements the standard write interface. If the last characters are
// not at a normalization boundary, the bytes will be buffered for the next
// write. The remaining bytes will be written on close.
func (w *normWriter) Write(data []byte) (n int, err error) {
// Process data in pieces to keep w.buf size bounded.
const chunk = 4000
for len(data) > 0 {
// Normalize into w.buf.
m := len(data)
if m > chunk {
m = chunk
}
w.rb.src = inputBytes(data[:m])
w.rb.nsrc = m
w.buf = doAppend(&w.rb, w.buf, 0)
data = data[m:]
n += m
// Write out complete prefix, save remainder.
// Note that lastBoundary looks back at most 31 runes.
i := lastBoundary(&w.rb.f, w.buf)
if i == -1 {
i = 0
}
if i > 0 {
if _, err = w.w.Write(w.buf[:i]); err != nil {
break
}
bn := copy(w.buf, w.buf[i:])
w.buf = w.buf[:bn]
}
}
return n, err
}
// Close forces data that remains in the buffer to be written.
func (w *normWriter) Close() error {
if len(w.buf) > 0 {
_, err := w.w.Write(w.buf)
if err != nil {
return err
}
}
return nil
}
// Writer returns a new writer that implements Write(b)
// by writing f(b) to w. The returned writer may use an
// an internal buffer to maintain state across Write calls.
// Calling its Close method writes any buffered data to w.
func (f Form) Writer(w io.Writer) io.WriteCloser {
wr := &normWriter{rb: reorderBuffer{}, w: w}
wr.rb.init(f, nil)
return wr
}
type normReader struct {
rb reorderBuffer
r io.Reader
inbuf []byte
outbuf []byte
bufStart int
lastBoundary int
err error
}
// Read implements the standard read interface.
func (r *normReader) Read(p []byte) (int, error) {
for {
if r.lastBoundary-r.bufStart > 0 {
n := copy(p, r.outbuf[r.bufStart:r.lastBoundary])
r.bufStart += n
if r.lastBoundary-r.bufStart > 0 {
return n, nil
}
return n, r.err
}
if r.err != nil {
return 0, r.err
}
outn := copy(r.outbuf, r.outbuf[r.lastBoundary:])
r.outbuf = r.outbuf[0:outn]
r.bufStart = 0
n, err := r.r.Read(r.inbuf)
r.rb.src = inputBytes(r.inbuf[0:n])
r.rb.nsrc, r.err = n, err
if n > 0 {
r.outbuf = doAppend(&r.rb, r.outbuf, 0)
}
if err == io.EOF {
r.lastBoundary = len(r.outbuf)
} else {
r.lastBoundary = lastBoundary(&r.rb.f, r.outbuf)
if r.lastBoundary == -1 {
r.lastBoundary = 0
}
}
}
panic("should not reach here")
}
// Reader returns a new reader that implements Read
// by reading data from r and returning f(data).
func (f Form) Reader(r io.Reader) io.Reader {
const chunk = 4000
buf := make([]byte, chunk)
rr := &normReader{rb: reorderBuffer{}, r: r, inbuf: buf}
rr.rb.init(f, buf)
return rr
}

View File

@@ -0,0 +1,56 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import (
"bytes"
"fmt"
"testing"
)
var bufSizes = []int{1, 2, 3, 4, 5, 6, 7, 8, 100, 101, 102, 103, 4000, 4001, 4002, 4003}
func readFunc(size int) appendFunc {
return func(f Form, out []byte, s string) []byte {
out = append(out, s...)
r := f.Reader(bytes.NewBuffer(out))
buf := make([]byte, size)
result := []byte{}
for n, err := 0, error(nil); err == nil; {
n, err = r.Read(buf)
result = append(result, buf[:n]...)
}
return result
}
}
func TestReader(t *testing.T) {
for _, s := range bufSizes {
name := fmt.Sprintf("TestReader%d", s)
runNormTests(t, name, readFunc(s))
}
}
func writeFunc(size int) appendFunc {
return func(f Form, out []byte, s string) []byte {
in := append(out, s...)
result := new(bytes.Buffer)
w := f.Writer(result)
buf := make([]byte, size)
for n := 0; len(in) > 0; in = in[n:] {
n = copy(buf, in)
_, _ = w.Write(buf[:n])
}
w.Close()
return result.Bytes()
}
}
func TestWriter(t *testing.T) {
for _, s := range bufSizes {
name := fmt.Sprintf("TestWriter%d", s)
runNormTests(t, name, writeFunc(s))
}
}

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,85 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import (
"unicode/utf8"
"code.google.com/p/go.text/transform"
)
// Transform implements the transform.Transformer interface. It may need to
// write segments of up to MaxSegmentSize at once. Users should either catch
// ErrShortDst and allow dst to grow or have dst be at least of size
// MaxTransformChunkSize to be guaranteed of progress.
func (f Form) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
n := 0
// Cap the maximum number of src bytes to check.
b := src
eof := atEOF
if ns := len(dst); ns < len(b) {
err = transform.ErrShortDst
eof = false
b = b[:ns]
}
i, ok := formTable[f].quickSpan(inputBytes(b), n, len(b), eof)
n += copy(dst[n:], b[n:i])
if !ok {
nDst, nSrc, err = f.transform(dst[n:], src[n:], atEOF)
return nDst + n, nSrc + n, err
}
if n < len(src) && !atEOF {
err = transform.ErrShortSrc
}
return n, n, err
}
func flushTransform(rb *reorderBuffer) bool {
// Write out (must fully fit in dst, or else it is a ErrShortDst).
if len(rb.out) < rb.nrune*utf8.UTFMax {
return false
}
rb.out = rb.out[rb.flushCopy(rb.out):]
return true
}
var errs = []error{nil, transform.ErrShortDst, transform.ErrShortSrc}
// transform implements the transform.Transformer interface. It is only called
// when quickSpan does not pass for a given string.
func (f Form) transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
// TODO: get rid of reorderBuffer. See CL 23460044.
rb := reorderBuffer{}
rb.init(f, src)
for {
// Load segment into reorder buffer.
rb.setFlusher(dst[nDst:], flushTransform)
end := decomposeSegment(&rb, nSrc, atEOF)
if end < 0 {
return nDst, nSrc, errs[-end]
}
nDst = len(dst) - len(rb.out)
nSrc = end
// Next quickSpan.
end = rb.nsrc
eof := atEOF
if n := nSrc + len(dst) - nDst; n < end {
err = transform.ErrShortDst
end = n
eof = false
}
end, ok := rb.f.quickSpan(rb.src, nSrc, end, eof)
n := copy(dst[nDst:], rb.src.bytes[nSrc:end])
nSrc += n
nDst += n
if ok {
if n < rb.nsrc && !atEOF {
err = transform.ErrShortSrc
}
return nDst, nSrc, err
}
}
}

View File

@@ -0,0 +1,101 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import (
"fmt"
"testing"
"code.google.com/p/go.text/transform"
)
func TestTransform(t *testing.T) {
tests := []struct {
f Form
in, out string
eof bool
dstSize int
err error
}{
{NFC, "ab", "ab", true, 2, nil},
{NFC, "qx", "qx", true, 2, nil},
{NFD, "qx", "qx", true, 2, nil},
{NFC, "", "", true, 1, nil},
{NFD, "", "", true, 1, nil},
{NFC, "", "", false, 1, nil},
{NFD, "", "", false, 1, nil},
// Normalized segment does not fit in destination.
{NFD, "ö", "", true, 1, transform.ErrShortDst},
{NFD, "ö", "", true, 2, transform.ErrShortDst},
// As an artifact of the algorithm, only full segments are written.
// This is not strictly required, and some bytes could be written.
// In practice, for Transform to not block, the destination buffer
// should be at least MaxSegmentSize to work anyway and these edge
// conditions will be relatively rare.
{NFC, "ab", "", true, 1, transform.ErrShortDst},
// This is even true for inert runes.
{NFC, "qx", "", true, 1, transform.ErrShortDst},
{NFC, "a\u0300abc", "\u00e0a", true, 4, transform.ErrShortDst},
// We cannot write a segment if succesive runes could still change the result.
{NFD, "ö", "", false, 3, transform.ErrShortSrc},
{NFC, "a\u0300", "", false, 4, transform.ErrShortSrc},
{NFD, "a\u0300", "", false, 4, transform.ErrShortSrc},
{NFC, "ö", "", false, 3, transform.ErrShortSrc},
{NFC, "a\u0300", "", true, 1, transform.ErrShortDst},
// Theoretically could fit, but won't due to simplified checks.
{NFC, "a\u0300", "", true, 2, transform.ErrShortDst},
{NFC, "a\u0300", "", true, 3, transform.ErrShortDst},
{NFC, "a\u0300", "\u00e0", true, 4, nil},
{NFD, "öa\u0300", "o\u0308", false, 8, transform.ErrShortSrc},
{NFD, "öa\u0300ö", "o\u0308a\u0300", true, 8, transform.ErrShortDst},
{NFD, "öa\u0300ö", "o\u0308a\u0300", false, 12, transform.ErrShortSrc},
// Illegal input is copied verbatim.
{NFD, "\xbd\xb2=\xbc ", "\xbd\xb2=\xbc ", true, 8, nil},
}
b := make([]byte, 100)
for i, tt := range tests {
nDst, _, err := tt.f.Transform(b[:tt.dstSize], []byte(tt.in), tt.eof)
out := string(b[:nDst])
if out != tt.out || err != tt.err {
t.Errorf("%d: was %+q (%v); want %+q (%v)", i, out, err, tt.out, tt.err)
}
if want := tt.f.String(tt.in)[:nDst]; want != out {
t.Errorf("%d: incorect normalization: was %+q; want %+q", i, out, want)
}
}
}
var transBufSizes = []int{
MaxTransformChunkSize,
3 * MaxTransformChunkSize / 2,
2 * MaxTransformChunkSize,
3 * MaxTransformChunkSize,
100 * MaxTransformChunkSize,
}
func doTransNorm(f Form, buf []byte, b []byte) []byte {
acc := []byte{}
for p := 0; p < len(b); {
nd, ns, _ := f.Transform(buf[:], b[p:], true)
p += ns
acc = append(acc, buf[:nd]...)
}
return acc
}
func TestTransformNorm(t *testing.T) {
for _, sz := range transBufSizes {
buf := make([]byte, sz)
runNormTests(t, fmt.Sprintf("Transform:%d", sz), func(f Form, out []byte, s string) []byte {
return doTransNorm(f, buf, append(out, s...))
})
}
}

View File

@@ -0,0 +1,232 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
type valueRange struct {
value uint16 // header: value:stride
lo, hi byte // header: lo:n
}
type trie struct {
index []uint8
values []uint16
sparse []valueRange
sparseOffset []uint16
cutoff uint8 // indices >= cutoff are sparse
}
// lookupValue determines the type of block n and looks up the value for b.
// For n < t.cutoff, the block is a simple lookup table. Otherwise, the block
// is a list of ranges with an accompanying value. Given a matching range r,
// the value for b is by r.value + (b - r.lo) * stride.
func (t *trie) lookupValue(n uint8, b byte) uint16 {
if n < t.cutoff {
return t.values[uint16(n)<<6+uint16(b)]
}
offset := t.sparseOffset[n-t.cutoff]
header := t.sparse[offset]
lo := offset + 1
hi := lo + uint16(header.lo)
for lo < hi {
m := lo + (hi-lo)/2
r := t.sparse[m]
if r.lo <= b && b <= r.hi {
return r.value + uint16(b-r.lo)*header.value
}
if b < r.lo {
hi = m
} else {
lo = m + 1
}
}
return 0
}
const (
t1 = 0x00 // 0000 0000
tx = 0x80 // 1000 0000
t2 = 0xC0 // 1100 0000
t3 = 0xE0 // 1110 0000
t4 = 0xF0 // 1111 0000
t5 = 0xF8 // 1111 1000
t6 = 0xFC // 1111 1100
te = 0xFE // 1111 1110
)
// lookup returns the trie value for the first UTF-8 encoding in s and
// the width in bytes of this encoding. The size will be 0 if s does not
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *trie) lookup(s []byte) (v uint16, sz int) {
c0 := s[0]
switch {
case c0 < tx:
return t.values[c0], 1
case c0 < t2:
return 0, 1
case c0 < t3:
if len(s) < 2 {
return 0, 0
}
i := t.index[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
return t.lookupValue(i, c1), 2
case c0 < t4:
if len(s) < 3 {
return 0, 0
}
i := t.index[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := uint16(i)<<6 + uint16(c1)
i = t.index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
return t.lookupValue(i, c2), 3
case c0 < t5:
if len(s) < 4 {
return 0, 0
}
i := t.index[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := uint16(i)<<6 + uint16(c1)
i = t.index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
o = uint16(i)<<6 + uint16(c2)
i = t.index[o]
c3 := s[3]
if c3 < tx || t2 <= c3 {
return 0, 3
}
return t.lookupValue(i, c3), 4
}
// Illegal rune
return 0, 1
}
// lookupString returns the trie value for the first UTF-8 encoding in s and
// the width in bytes of this encoding. The size will be 0 if s does not
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *trie) lookupString(s string) (v uint16, sz int) {
c0 := s[0]
switch {
case c0 < tx:
return t.values[c0], 1
case c0 < t2:
return 0, 1
case c0 < t3:
if len(s) < 2 {
return 0, 0
}
i := t.index[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
return t.lookupValue(i, c1), 2
case c0 < t4:
if len(s) < 3 {
return 0, 0
}
i := t.index[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := uint16(i)<<6 + uint16(c1)
i = t.index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
return t.lookupValue(i, c2), 3
case c0 < t5:
if len(s) < 4 {
return 0, 0
}
i := t.index[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := uint16(i)<<6 + uint16(c1)
i = t.index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
o = uint16(i)<<6 + uint16(c2)
i = t.index[o]
c3 := s[3]
if c3 < tx || t2 <= c3 {
return 0, 3
}
return t.lookupValue(i, c3), 4
}
// Illegal rune
return 0, 1
}
// lookupUnsafe returns the trie value for the first UTF-8 encoding in s.
// s must hold a full encoding.
func (t *trie) lookupUnsafe(s []byte) uint16 {
c0 := s[0]
if c0 < tx {
return t.values[c0]
}
if c0 < t2 {
return 0
}
i := t.index[c0]
if c0 < t3 {
return t.lookupValue(i, s[1])
}
i = t.index[uint16(i)<<6+uint16(s[1])]
if c0 < t4 {
return t.lookupValue(i, s[2])
}
i = t.index[uint16(i)<<6+uint16(s[2])]
if c0 < t5 {
return t.lookupValue(i, s[3])
}
return 0
}
// lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s.
// s must hold a full encoding.
func (t *trie) lookupStringUnsafe(s string) uint16 {
c0 := s[0]
if c0 < tx {
return t.values[c0]
}
if c0 < t2 {
return 0
}
i := t.index[c0]
if c0 < t3 {
return t.lookupValue(i, s[1])
}
i = t.index[uint16(i)<<6+uint16(s[1])]
if c0 < t4 {
return t.lookupValue(i, s[2])
}
i = t.index[uint16(i)<<6+uint16(s[2])]
if c0 < t5 {
return t.lookupValue(i, s[3])
}
return 0
}

View File

@@ -0,0 +1,152 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import (
"testing"
"unicode/utf8"
)
// Test data is located in triedata_test.go; generated by maketesttables.
var testdata = testdataTrie
type rangeTest struct {
block uint8
lookup byte
result uint16
table []valueRange
offsets []uint16
}
var range1Off = []uint16{0, 2}
var range1 = []valueRange{
{0, 1, 0},
{1, 0x80, 0x80},
{0, 2, 0},
{1, 0x80, 0x80},
{9, 0xff, 0xff},
}
var rangeTests = []rangeTest{
{10, 0x80, 1, range1, range1Off},
{10, 0x00, 0, range1, range1Off},
{11, 0x80, 1, range1, range1Off},
{11, 0xff, 9, range1, range1Off},
{11, 0x00, 0, range1, range1Off},
}
func TestLookupSparse(t *testing.T) {
for i, test := range rangeTests {
n := trie{sparse: test.table, sparseOffset: test.offsets, cutoff: 10}
v := n.lookupValue(test.block, test.lookup)
if v != test.result {
t.Errorf("LookupSparse:%d: found %X; want %X", i, v, test.result)
}
}
}
// Test cases for illegal runes.
type trietest struct {
size int
bytes []byte
}
var tests = []trietest{
// illegal runes
{1, []byte{0x80}},
{1, []byte{0xFF}},
{1, []byte{t2, tx - 1}},
{1, []byte{t2, t2}},
{2, []byte{t3, tx, tx - 1}},
{2, []byte{t3, tx, t2}},
{1, []byte{t3, tx - 1, tx}},
{3, []byte{t4, tx, tx, tx - 1}},
{3, []byte{t4, tx, tx, t2}},
{1, []byte{t4, t2, tx, tx - 1}},
{2, []byte{t4, tx, t2, tx - 1}},
// short runes
{0, []byte{t2}},
{0, []byte{t3, tx}},
{0, []byte{t4, tx, tx}},
// we only support UTF-8 up to utf8.UTFMax bytes (4 bytes)
{1, []byte{t5, tx, tx, tx, tx}},
{1, []byte{t6, tx, tx, tx, tx, tx}},
}
func mkUTF8(r rune) ([]byte, int) {
var b [utf8.UTFMax]byte
sz := utf8.EncodeRune(b[:], r)
return b[:sz], sz
}
func TestLookup(t *testing.T) {
for i, tt := range testRunes {
b, szg := mkUTF8(tt)
v, szt := testdata.lookup(b)
if int(v) != i {
t.Errorf("lookup(%U): found value %#x, expected %#x", tt, v, i)
}
if szt != szg {
t.Errorf("lookup(%U): found size %d, expected %d", tt, szt, szg)
}
}
for i, tt := range tests {
v, sz := testdata.lookup(tt.bytes)
if v != 0 {
t.Errorf("lookup of illegal rune, case %d: found value %#x, expected 0", i, v)
}
if sz != tt.size {
t.Errorf("lookup of illegal rune, case %d: found size %d, expected %d", i, sz, tt.size)
}
}
// Verify defaults.
if v, _ := testdata.lookup([]byte{0xC1, 0x8C}); v != 0 {
t.Errorf("lookup of non-existing rune should be 0; found %X", v)
}
}
func TestLookupUnsafe(t *testing.T) {
for i, tt := range testRunes {
b, _ := mkUTF8(tt)
v := testdata.lookupUnsafe(b)
if int(v) != i {
t.Errorf("lookupUnsafe(%U): found value %#x, expected %#x", i, v, i)
}
}
}
func TestLookupString(t *testing.T) {
for i, tt := range testRunes {
b, szg := mkUTF8(tt)
v, szt := testdata.lookupString(string(b))
if int(v) != i {
t.Errorf("lookup(%U): found value %#x, expected %#x", i, v, i)
}
if szt != szg {
t.Errorf("lookup(%U): found size %d, expected %d", i, szt, szg)
}
}
for i, tt := range tests {
v, sz := testdata.lookupString(string(tt.bytes))
if int(v) != 0 {
t.Errorf("lookup of illegal rune, case %d: found value %#x, expected 0", i, v)
}
if sz != tt.size {
t.Errorf("lookup of illegal rune, case %d: found size %d, expected %d", i, sz, tt.size)
}
}
}
func TestLookupStringUnsafe(t *testing.T) {
for i, tt := range testRunes {
b, _ := mkUTF8(tt)
v := testdata.lookupStringUnsafe(string(b))
if int(v) != i {
t.Errorf("lookupUnsafe(%U): found value %#x, expected %#x", i, v, i)
}
}
}

View File

@@ -0,0 +1,85 @@
// Generated by running
// maketesttables
// DO NOT EDIT
package norm
var testRunes = []int32{1, 12, 127, 128, 256, 2047, 2048, 2457, 65535, 65536, 65793, 1114111, 512, 513, 514, 528, 533}
// testdataValues: 192 entries, 384 bytes
// Block 2 is the null block.
var testdataValues = [192]uint16{
// Block 0x0, offset 0x0
0x000c: 0x0001,
// Block 0x1, offset 0x40
0x007f: 0x0002,
// Block 0x2, offset 0x80
}
// testdataSparseOffset: 10 entries, 20 bytes
var testdataSparseOffset = []uint16{0x0, 0x2, 0x4, 0x8, 0xa, 0xc, 0xe, 0x10, 0x12, 0x14}
// testdataSparseValues: 22 entries, 88 bytes
var testdataSparseValues = [22]valueRange{
// Block 0x0, offset 0x1
{value: 0x0000, lo: 0x01},
{value: 0x0003, lo: 0x80, hi: 0x80},
// Block 0x1, offset 0x2
{value: 0x0000, lo: 0x01},
{value: 0x0004, lo: 0x80, hi: 0x80},
// Block 0x2, offset 0x3
{value: 0x0001, lo: 0x03},
{value: 0x000c, lo: 0x80, hi: 0x82},
{value: 0x000f, lo: 0x90, hi: 0x90},
{value: 0x0010, lo: 0x95, hi: 0x95},
// Block 0x3, offset 0x4
{value: 0x0000, lo: 0x01},
{value: 0x0005, lo: 0xbf, hi: 0xbf},
// Block 0x4, offset 0x5
{value: 0x0000, lo: 0x01},
{value: 0x0006, lo: 0x80, hi: 0x80},
// Block 0x5, offset 0x6
{value: 0x0000, lo: 0x01},
{value: 0x0007, lo: 0x99, hi: 0x99},
// Block 0x6, offset 0x7
{value: 0x0000, lo: 0x01},
{value: 0x0008, lo: 0xbf, hi: 0xbf},
// Block 0x7, offset 0x8
{value: 0x0000, lo: 0x01},
{value: 0x0009, lo: 0x80, hi: 0x80},
// Block 0x8, offset 0x9
{value: 0x0000, lo: 0x01},
{value: 0x000a, lo: 0x81, hi: 0x81},
// Block 0x9, offset 0xa
{value: 0x0000, lo: 0x01},
{value: 0x000b, lo: 0xbf, hi: 0xbf},
}
// testdataLookup: 640 bytes
// Block 0 is the null block.
var testdataLookup = [640]uint8{
// Block 0x0, offset 0x0
// Block 0x1, offset 0x40
// Block 0x2, offset 0x80
// Block 0x3, offset 0xc0
0x0c2: 0x01, 0x0c4: 0x02,
0x0c8: 0x03,
0x0df: 0x04,
0x0e0: 0x02,
0x0ef: 0x03,
0x0f0: 0x05, 0x0f4: 0x07,
// Block 0x4, offset 0x100
0x120: 0x05, 0x126: 0x06,
// Block 0x5, offset 0x140
0x17f: 0x07,
// Block 0x6, offset 0x180
0x180: 0x08, 0x184: 0x09,
// Block 0x7, offset 0x1c0
0x1d0: 0x04,
// Block 0x8, offset 0x200
0x23f: 0x0a,
// Block 0x9, offset 0x240
0x24f: 0x06,
}
var testdataTrie = trie{testdataLookup[:], testdataValues[:], testdataSparseValues[:], testdataSparseOffset[:], 1}

View File

@@ -0,0 +1,317 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// Trie table generator.
// Used by make*tables tools to generate a go file with trie data structures
// for mapping UTF-8 to a 16-bit value. All but the last byte in a UTF-8 byte
// sequence are used to lookup offsets in the index table to be used for the
// next byte. The last byte is used to index into a table with 16-bit values.
package main
import (
"fmt"
"hash/crc32"
"log"
"unicode/utf8"
)
const (
blockSize = 64
blockOffset = 2 // Subtract two blocks to compensate for the 0x80 added to continuation bytes.
maxSparseEntries = 16
)
// Intermediate trie structure
type trieNode struct {
table [256]*trieNode
value int
b byte
leaf bool
}
func newNode() *trieNode {
return new(trieNode)
}
func (n trieNode) String() string {
s := fmt.Sprint("trieNode{table: { non-nil at index: ")
for i, v := range n.table {
if v != nil {
s += fmt.Sprintf("%d, ", i)
}
}
s += fmt.Sprintf("}, value:%#x, b:%#x leaf:%v}", n.value, n.b, n.leaf)
return s
}
func (n trieNode) isInternal() bool {
internal := true
for i := 0; i < 256; i++ {
if nn := n.table[i]; nn != nil {
if !internal && !nn.leaf {
log.Fatalf("triegen: isInternal: node contains both leaf and non-leaf children (%v)", n)
}
internal = internal && !nn.leaf
}
}
return internal
}
func (n trieNode) mostFrequentStride() int {
counts := make(map[int]int)
v := 0
for _, t := range n.table[0x80 : 0x80+blockSize] {
if t != nil {
if stride := t.value - v; v != 0 && stride >= 0 {
counts[stride]++
}
v = t.value
} else {
v = 0
}
}
var maxs, maxc int
for stride, cnt := range counts {
if cnt > maxc || (cnt == maxc && stride < maxs) {
maxs, maxc = stride, cnt
}
}
return maxs
}
func (n trieNode) countSparseEntries() int {
stride := n.mostFrequentStride()
var count, v int
for _, t := range n.table[0x80 : 0x80+blockSize] {
tv := 0
if t != nil {
tv = t.value
}
if tv-v != stride {
if tv != 0 {
count++
}
}
v = tv
}
return count
}
func (n *trieNode) insert(r rune, value uint16) {
var p [utf8.UTFMax]byte
sz := utf8.EncodeRune(p[:], r)
for i := 0; i < sz; i++ {
if n.leaf {
log.Fatalf("triegen: insert: node (%#v) should not be a leaf", n)
}
nn := n.table[p[i]]
if nn == nil {
nn = newNode()
nn.b = p[i]
n.table[p[i]] = nn
}
n = nn
}
n.value = int(value)
n.leaf = true
}
type nodeIndex struct {
lookupBlocks []*trieNode
valueBlocks []*trieNode
sparseBlocks []*trieNode
sparseOffset []uint16
sparseCount int
lookupBlockIdx map[uint32]int
valueBlockIdx map[uint32]int
}
func newIndex() *nodeIndex {
index := &nodeIndex{}
index.lookupBlocks = make([]*trieNode, 0)
index.valueBlocks = make([]*trieNode, 0)
index.sparseBlocks = make([]*trieNode, 0)
index.sparseOffset = make([]uint16, 1)
index.lookupBlockIdx = make(map[uint32]int)
index.valueBlockIdx = make(map[uint32]int)
return index
}
func computeOffsets(index *nodeIndex, n *trieNode) int {
if n.leaf {
return n.value
}
hasher := crc32.New(crc32.MakeTable(crc32.IEEE))
// We only index continuation bytes.
for i := 0; i < blockSize; i++ {
v := 0
if nn := n.table[0x80+i]; nn != nil {
v = computeOffsets(index, nn)
}
hasher.Write([]byte{uint8(v >> 8), uint8(v)})
}
h := hasher.Sum32()
if n.isInternal() {
v, ok := index.lookupBlockIdx[h]
if !ok {
v = len(index.lookupBlocks) - blockOffset
index.lookupBlocks = append(index.lookupBlocks, n)
index.lookupBlockIdx[h] = v
}
n.value = v
} else {
v, ok := index.valueBlockIdx[h]
if !ok {
if c := n.countSparseEntries(); c > maxSparseEntries {
v = len(index.valueBlocks) - blockOffset
index.valueBlocks = append(index.valueBlocks, n)
index.valueBlockIdx[h] = v
} else {
v = -len(index.sparseOffset)
index.sparseBlocks = append(index.sparseBlocks, n)
index.sparseOffset = append(index.sparseOffset, uint16(index.sparseCount))
index.sparseCount += c + 1
index.valueBlockIdx[h] = v
}
}
n.value = v
}
return n.value
}
func printValueBlock(nr int, n *trieNode, offset int) {
boff := nr * blockSize
fmt.Printf("\n// Block %#x, offset %#x", nr, boff)
var printnewline bool
for i := 0; i < blockSize; i++ {
if i%6 == 0 {
printnewline = true
}
v := 0
if nn := n.table[i+offset]; nn != nil {
v = nn.value
}
if v != 0 {
if printnewline {
fmt.Printf("\n")
printnewline = false
}
fmt.Printf("%#04x:%#04x, ", boff+i, v)
}
}
}
func printSparseBlock(nr int, n *trieNode) {
boff := -n.value
fmt.Printf("\n// Block %#x, offset %#x", nr, boff)
v := 0
//stride := f(n)
stride := n.mostFrequentStride()
c := n.countSparseEntries()
fmt.Printf("\n{value:%#04x,lo:%#02x},", stride, uint8(c))
for i, nn := range n.table[0x80 : 0x80+blockSize] {
nv := 0
if nn != nil {
nv = nn.value
}
if nv-v != stride {
if v != 0 {
fmt.Printf(",hi:%#02x},", 0x80+i-1)
}
if nv != 0 {
fmt.Printf("\n{value:%#04x,lo:%#02x", nv, nn.b)
}
}
v = nv
}
if v != 0 {
fmt.Printf(",hi:%#02x},", 0x80+blockSize-1)
}
}
func printLookupBlock(nr int, n *trieNode, offset, cutoff int) {
boff := nr * blockSize
fmt.Printf("\n// Block %#x, offset %#x", nr, boff)
var printnewline bool
for i := 0; i < blockSize; i++ {
if i%8 == 0 {
printnewline = true
}
v := 0
if nn := n.table[i+offset]; nn != nil {
v = nn.value
}
if v != 0 {
if v < 0 {
v = -v - 1 + cutoff
}
if printnewline {
fmt.Printf("\n")
printnewline = false
}
fmt.Printf("%#03x:%#02x, ", boff+i, v)
}
}
}
// printTables returns the size in bytes of the generated tables.
func (t *trieNode) printTables(name string) int {
index := newIndex()
// Values for 7-bit ASCII are stored in first two block, followed by nil block.
index.valueBlocks = append(index.valueBlocks, nil, nil, nil)
// First byte of multi-byte UTF-8 codepoints are indexed in 4th block.
index.lookupBlocks = append(index.lookupBlocks, nil, nil, nil, nil)
// Index starter bytes of multi-byte UTF-8.
for i := 0xC0; i < 0x100; i++ {
if t.table[i] != nil {
computeOffsets(index, t.table[i])
}
}
nv := len(index.valueBlocks) * blockSize
fmt.Printf("// %sValues: %d entries, %d bytes\n", name, nv, nv*2)
fmt.Printf("// Block 2 is the null block.\n")
fmt.Printf("var %sValues = [%d]uint16 {", name, nv)
printValueBlock(0, t, 0)
printValueBlock(1, t, 64)
printValueBlock(2, newNode(), 0)
for i := 3; i < len(index.valueBlocks); i++ {
printValueBlock(i, index.valueBlocks[i], 0x80)
}
fmt.Print("\n}\n\n")
ls := len(index.sparseBlocks)
fmt.Printf("// %sSparseOffset: %d entries, %d bytes\n", name, ls, ls*2)
fmt.Printf("var %sSparseOffset = %#v\n\n", name, index.sparseOffset[1:])
ns := index.sparseCount
fmt.Printf("// %sSparseValues: %d entries, %d bytes\n", name, ns, ns*4)
fmt.Printf("var %sSparseValues = [%d]valueRange {", name, ns)
for i, n := range index.sparseBlocks {
printSparseBlock(i, n)
}
fmt.Print("\n}\n\n")
cutoff := len(index.valueBlocks) - blockOffset
ni := len(index.lookupBlocks) * blockSize
fmt.Printf("// %sLookup: %d bytes\n", name, ni)
fmt.Printf("// Block 0 is the null block.\n")
fmt.Printf("var %sLookup = [%d]uint8 {", name, ni)
printLookupBlock(0, newNode(), 0, cutoff)
printLookupBlock(1, newNode(), 0, cutoff)
printLookupBlock(2, newNode(), 0, cutoff)
printLookupBlock(3, t, 0xC0, cutoff)
for i := 4; i < len(index.lookupBlocks); i++ {
printLookupBlock(i, index.lookupBlocks[i], 0x80, cutoff)
}
fmt.Print("\n}\n\n")
fmt.Printf("var %sTrie = trie{ %sLookup[:], %sValues[:], %sSparseValues[:], %sSparseOffset[:], %d}\n\n",
name, name, name, name, name, cutoff)
return nv*2 + ns*4 + ni + ls*2
}

View File

@@ -0,0 +1,2 @@
inject
inject.test

View File

@@ -0,0 +1,20 @@
The MIT License (MIT)
Copyright (c) 2013 Jeremy Saenz
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@@ -0,0 +1,4 @@
inject
======
Dependency injection for go

View File

@@ -0,0 +1,168 @@
// Package inject provides utilities for mapping and injecting dependencies in various ways.
package inject
import (
"fmt"
"reflect"
)
// Injector represents an interface for mapping and injecting dependencies into structs
// and function arguments.
type Injector interface {
Applicator
Invoker
TypeMapper
// SetParent sets the parent of the injector. If the injector cannot find a
// dependency in its Type map it will check its parent before returning an
// error.
SetParent(Injector)
}
// Applicator represents an interface for mapping dependencies to a struct.
type Applicator interface {
// Maps dependencies in the Type map to each field in the struct
// that is tagged with 'inject'. Returns an error if the injection
// fails.
Apply(interface{}) error
}
// Invoker represents an interface for calling functions via reflection.
type Invoker interface {
// Invoke attempts to call the interface{} provided as a function,
// providing dependencies for function arguments based on Type. Returns
// a slice of reflect.Value representing the returned values of the function.
// Returns an error if the injection fails.
Invoke(interface{}) ([]reflect.Value, error)
}
// TypeMapper represents an interface for mapping interface{} values based on type.
type TypeMapper interface {
// Maps the interface{} value based on its immediate type from reflect.TypeOf.
Map(interface{}) TypeMapper
// Maps the interface{} value based on the pointer of an Interface provided.
// This is really only useful for mapping a value as an interface, as interfaces
// cannot at this time be referenced directly without a pointer.
MapTo(interface{}, interface{}) TypeMapper
// Provides a possibility to directly insert a mapping based on type and value.
// This makes it possible to directly map type arguments not possible to instantiate
// with reflect like unidirectional channels.
Set(reflect.Type, reflect.Value) TypeMapper
// Returns the Value that is mapped to the current type. Returns a zeroed Value if
// the Type has not been mapped.
Get(reflect.Type) reflect.Value
}
type injector struct {
values map[reflect.Type]reflect.Value
parent Injector
}
// InterfaceOf dereferences a pointer to an Interface type.
// It panics if value is not an pointer to an interface.
func InterfaceOf(value interface{}) reflect.Type {
t := reflect.TypeOf(value)
for t.Kind() == reflect.Ptr {
t = t.Elem()
}
if t.Kind() != reflect.Interface {
panic("Called inject.InterfaceOf with a value that is not a pointer to an interface. (*MyInterface)(nil)")
}
return t
}
// New returns a new Injector.
func New() Injector {
return &injector{
values: make(map[reflect.Type]reflect.Value),
}
}
// Invoke attempts to call the interface{} provided as a function,
// providing dependencies for function arguments based on Type.
// Returns a slice of reflect.Value representing the returned values of the function.
// Returns an error if the injection fails.
// It panics if f is not a function
func (inj *injector) Invoke(f interface{}) ([]reflect.Value, error) {
t := reflect.TypeOf(f)
var in = make([]reflect.Value, t.NumIn()) //Panic if t is not kind of Func
for i := 0; i < t.NumIn(); i++ {
argType := t.In(i)
val := inj.Get(argType)
if !val.IsValid() {
return nil, fmt.Errorf("Value not found for type %v", argType)
}
in[i] = val
}
return reflect.ValueOf(f).Call(in), nil
}
// Maps dependencies in the Type map to each field in the struct
// that is tagged with 'inject'.
// Returns an error if the injection fails.
func (inj *injector) Apply(val interface{}) error {
v := reflect.ValueOf(val)
for v.Kind() == reflect.Ptr {
v = v.Elem()
}
if v.Kind() != reflect.Struct {
return nil // Should not panic here ?
}
t := v.Type()
for i := 0; i < v.NumField(); i++ {
f := v.Field(i)
structField := t.Field(i)
if f.CanSet() && structField.Tag == "inject" {
ft := f.Type()
v := inj.Get(ft)
if !v.IsValid() {
return fmt.Errorf("Value not found for type %v", ft)
}
f.Set(v)
}
}
return nil
}
// Maps the concrete value of val to its dynamic type using reflect.TypeOf,
// It returns the TypeMapper registered in.
func (i *injector) Map(val interface{}) TypeMapper {
i.values[reflect.TypeOf(val)] = reflect.ValueOf(val)
return i
}
func (i *injector) MapTo(val interface{}, ifacePtr interface{}) TypeMapper {
i.values[InterfaceOf(ifacePtr)] = reflect.ValueOf(val)
return i
}
// Maps the given reflect.Type to the given reflect.Value and returns
// the Typemapper the mapping has been registered in.
func (i *injector) Set(typ reflect.Type, val reflect.Value) TypeMapper {
i.values[typ] = val
return i
}
func (i *injector) Get(t reflect.Type) reflect.Value {
val := i.values[t]
if !val.IsValid() && i.parent != nil {
val = i.parent.Get(t)
}
return val
}
func (i *injector) SetParent(parent Injector) {
i.parent = parent
}

View File

@@ -0,0 +1,142 @@
package inject_test
import (
"github.com/codegangsta/inject"
"reflect"
"testing"
)
type SpecialString interface {
}
type TestStruct struct {
Dep1 string `inject`
Dep2 SpecialString `inject`
Dep3 string
}
/* Test Helpers */
func expect(t *testing.T, a interface{}, b interface{}) {
if a != b {
t.Errorf("Expected %v (type %v) - Got %v (type %v)", b, reflect.TypeOf(b), a, reflect.TypeOf(a))
}
}
func refute(t *testing.T, a interface{}, b interface{}) {
if a == b {
t.Errorf("Did not expect %v (type %v) - Got %v (type %v)", b, reflect.TypeOf(b), a, reflect.TypeOf(a))
}
}
func Test_InjectorInvoke(t *testing.T) {
injector := inject.New()
expect(t, injector == nil, false)
dep := "some dependency"
injector.Map(dep)
dep2 := "another dep"
injector.MapTo(dep2, (*SpecialString)(nil))
dep3 := make(chan *SpecialString)
dep4 := make(chan *SpecialString)
typRecv := reflect.ChanOf(reflect.RecvDir, reflect.TypeOf(dep3).Elem())
typSend := reflect.ChanOf(reflect.SendDir, reflect.TypeOf(dep4).Elem())
injector.Set(typRecv, reflect.ValueOf(dep3))
injector.Set(typSend, reflect.ValueOf(dep4))
_, err := injector.Invoke(func(d1 string, d2 SpecialString, d3 <-chan *SpecialString, d4 chan<- *SpecialString) {
expect(t, d1, dep)
expect(t, d2, dep2)
expect(t, reflect.TypeOf(d3).Elem(), reflect.TypeOf(dep3).Elem())
expect(t, reflect.TypeOf(d4).Elem(), reflect.TypeOf(dep4).Elem())
expect(t, reflect.TypeOf(d3).ChanDir(), reflect.RecvDir)
expect(t, reflect.TypeOf(d4).ChanDir(), reflect.SendDir)
})
expect(t, err, nil)
}
func Test_InjectorInvokeReturnValues(t *testing.T) {
injector := inject.New()
expect(t, injector == nil, false)
dep := "some dependency"
injector.Map(dep)
dep2 := "another dep"
injector.MapTo(dep2, (*SpecialString)(nil))
result, err := injector.Invoke(func(d1 string, d2 SpecialString) string {
expect(t, d1, dep)
expect(t, d2, dep2)
return "Hello world"
})
expect(t, result[0].String(), "Hello world")
expect(t, err, nil)
}
func Test_InjectorApply(t *testing.T) {
injector := inject.New()
injector.Map("a dep").MapTo("another dep", (*SpecialString)(nil))
s := TestStruct{}
err := injector.Apply(&s)
expect(t, err, nil)
expect(t, s.Dep1, "a dep")
expect(t, s.Dep2, "another dep")
}
func Test_InterfaceOf(t *testing.T) {
iType := inject.InterfaceOf((*SpecialString)(nil))
expect(t, iType.Kind(), reflect.Interface)
iType = inject.InterfaceOf((**SpecialString)(nil))
expect(t, iType.Kind(), reflect.Interface)
// Expecting nil
defer func() {
rec := recover()
refute(t, rec, nil)
}()
iType = inject.InterfaceOf((*testing.T)(nil))
}
func Test_InjectorSet(t *testing.T) {
injector := inject.New()
typ := reflect.TypeOf("string")
typSend := reflect.ChanOf(reflect.SendDir, typ)
typRecv := reflect.ChanOf(reflect.RecvDir, typ)
// instantiating unidirectional channels is not possible using reflect
// http://golang.org/src/pkg/reflect/value.go?s=60463:60504#L2064
chanRecv := reflect.MakeChan(reflect.ChanOf(reflect.BothDir, typ), 0)
chanSend := reflect.MakeChan(reflect.ChanOf(reflect.BothDir, typ), 0)
injector.Set(typSend, chanSend)
injector.Set(typRecv, chanRecv)
expect(t, injector.Get(typSend).IsValid(), true)
expect(t, injector.Get(typRecv).IsValid(), true)
expect(t, injector.Get(chanSend.Type()).IsValid(), false)
}
func Test_InjectorGet(t *testing.T) {
injector := inject.New()
injector.Map("some dependency")
expect(t, injector.Get(reflect.TypeOf("string")).IsValid(), true)
expect(t, injector.Get(reflect.TypeOf(11)).IsValid(), false)
}
func Test_InjectorSetParent(t *testing.T) {
injector := inject.New()
injector.MapTo("another dep", (*SpecialString)(nil))
injector2 := inject.New()
injector2.SetParent(injector)
expect(t, injector2.Get(inject.InterfaceOf((*SpecialString)(nil))).IsValid(), true)
}

View File

@@ -0,0 +1,23 @@
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
# Folders
_obj
_test
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe
*.test

View File

@@ -0,0 +1,20 @@
The MIT License (MIT)
Copyright (c) 2013 Jeremy Saenz
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@@ -0,0 +1,345 @@
# Martini [![wercker status](https://app.wercker.com/status/174bef7e3c999e103cacfe2770102266 "wercker status")](https://app.wercker.com/project/bykey/174bef7e3c999e103cacfe2770102266) [![GoDoc](https://godoc.org/github.com/codegangsta/martini?status.png)](http://godoc.org/github.com/codegangsta/martini)
Martini is a powerful package for quickly writing modular web applications/services in Golang.
Language Translations: [Simplified Chinese (zh_CN)](translations/README_zh_cn.md)
## Getting Started
After installing Go and setting up your [GOPATH](http://golang.org/doc/code.html#GOPATH), create your first `.go` file. We'll call it `server.go`.
~~~ go
package main
import "github.com/codegangsta/martini"
func main() {
m := martini.Classic()
m.Get("/", func() string {
return "Hello world!"
})
m.Run()
}
~~~
Then install the Martini package (**go 1.1** and greater is required):
~~~
go get github.com/codegangsta/martini
~~~
Then run your server:
~~~
go run server.go
~~~
You will now have a Martini webserver running on `localhost:3000`.
## Getting Help
Join the [Mailing list](https://groups.google.com/forum/#!forum/martini-go)
Watch the [Demo Video](http://martini.codegangsta.io/#demo)
## Features
* Extremely simple to use.
* Non-intrusive design.
* Plays nice with other Golang packages.
* Awesome path matching and routing.
* Modular design - Easy to add functionality, easy to rip stuff out.
* Lots of good handlers/middlewares to use.
* Great 'out of the box' feature set.
* **Fully compatible with the [http.HandlerFunc](http://godoc.org/net/http#HandlerFunc) interface.**
## More Middleware
For more middleware and functionality, check out the repositories in the [martini-contrib](https://github.com/martini-contrib) organization.
## Table of Contents
* [Classic Martini](#classic-martini)
* [Handlers](#handlers)
* [Routing](#routing)
* [Services](#services)
* [Serving Static Files](#serving-static-files)
* [Middleware Handlers](#middleware-handlers)
* [Next()](#next)
* [Martini Env](#martini-env)
* [FAQ](#faq)
## Classic Martini
To get up and running quickly, [martini.Classic()](http://godoc.org/github.com/codegangsta/martini#Classic) provides some reasonable defaults that work well for most web applications:
~~~ go
m := martini.Classic()
// ... middleware and routing goes here
m.Run()
~~~
Below is some of the functionality [martini.Classic()](http://godoc.org/github.com/codegangsta/martini#Classic) pulls in automatically:
* Request/Response Logging - [martini.Logger](http://godoc.org/github.com/codegangsta/martini#Logger)
* Panic Recovery - [martini.Recovery](http://godoc.org/github.com/codegangsta/martini#Recovery)
* Static File serving - [martini.Static](http://godoc.org/github.com/codegangsta/martini#Static)
* Routing - [martini.Router](http://godoc.org/github.com/codegangsta/martini#Router)
### Handlers
Handlers are the heart and soul of Martini. A handler is basically any kind of callable function:
~~~ go
m.Get("/", func() {
println("hello world")
})
~~~
#### Return Values
If a handler returns something, Martini will write the result to the current [http.ResponseWriter](http://godoc.org/net/http#ResponseWriter) as a string:
~~~ go
m.Get("/", func() string {
return "hello world" // HTTP 200 : "hello world"
})
~~~
You can also optionally return a status code:
~~~ go
m.Get("/", func() (int, string) {
return 418, "i'm a teapot" // HTTP 418 : "i'm a teapot"
})
~~~
#### Service Injection
Handlers are invoked via reflection. Martini makes use of *Dependency Injection* to resolve dependencies in a Handlers argument list. **This makes Martini completely compatible with golang's `http.HandlerFunc` interface.**
If you add an argument to your Handler, Martini will search its list of services and attempt to resolve the dependency via type assertion:
~~~ go
m.Get("/", func(res http.ResponseWriter, req *http.Request) { // res and req are injected by Martini
res.WriteHeader(200) // HTTP 200
})
~~~
The following services are included with [martini.Classic()](http://godoc.org/github.com/codegangsta/martini#Classic):
* [*log.Logger](http://godoc.org/log#Logger) - Global logger for Martini.
* [martini.Context](http://godoc.org/github.com/codegangsta/martini#Context) - http request context.
* [martini.Params](http://godoc.org/github.com/codegangsta/martini#Params) - `map[string]string` of named params found by route matching.
* [martini.Routes](http://godoc.org/github.com/codegangsta/martini#Routes) - Route helper service.
* [http.ResponseWriter](http://godoc.org/net/http/#ResponseWriter) - http Response writer interface.
* [*http.Request](http://godoc.org/net/http/#Request) - http Request.
### Routing
In Martini, a route is an HTTP method paired with a URL-matching pattern.
Each route can take one or more handler methods:
~~~ go
m.Get("/", func() {
// show something
})
m.Patch("/", func() {
// update something
})
m.Post("/", func() {
// create something
})
m.Put("/", func() {
// replace something
})
m.Delete("/", func() {
// destroy something
})
m.Options("/", func() {
// http options
})
m.NotFound(func() {
// handle 404
})
~~~
Routes are matched in the order they are defined. The first route that
matches the request is invoked.
Route patterns may include named parameters, accessible via the [martini.Params](http://godoc.org/github.com/codegangsta/martini#Params) service:
~~~ go
m.Get("/hello/:name", func(params martini.Params) string {
return "Hello " + params["name"]
})
~~~
Routes can be matched with regular expressions and globs as well:
~~~ go
m.Get("/hello/**", func(params martini.Params) string {
return "Hello " + params["_1"]
})
~~~
Route handlers can be stacked on top of each other, which is useful for things like authentication and authorization:
~~~ go
m.Get("/secret", authorize, func() {
// this will execute as long as authorize doesn't write a response
})
~~~
Route groups can be added too using the Group method.
~~~ go
m.Group("/books", func(r Router) {
r.Get("/:id", GetBooks)
r.Post("/new", NewBook)
r.Put("/update/:id", UpdateBook)
r.Delete("/delete/:id", DeleteBook)
})
~~~
Just like you can pass middlewares to a handler you can pass middlewares to groups.
~~~ go
m.Group("/books", func(r Router) {
r.Get("/:id", GetBooks)
r.Post("/new", NewBook)
r.Put("/update/:id", UpdateBook)
r.Delete("/delete/:id", DeleteBook)
}, MyMiddleware1, MyMiddleware2)
~~~
### Services
Services are objects that are available to be injected into a Handler's argument list. You can map a service on a *Global* or *Request* level.
#### Global Mapping
A Martini instance implements the inject.Injector interface, so mapping a service is easy:
~~~ go
db := &MyDatabase{}
m := martini.Classic()
m.Map(db) // the service will be available to all handlers as *MyDatabase
// ...
m.Run()
~~~
#### Request-Level Mapping
Mapping on the request level can be done in a handler via [martini.Context](http://godoc.org/github.com/codegangsta/martini#Context):
~~~ go
func MyCustomLoggerHandler(c martini.Context, req *http.Request) {
logger := &MyCustomLogger{req}
c.Map(logger) // mapped as *MyCustomLogger
}
~~~
#### Mapping values to Interfaces
One of the most powerful parts about services is the ability to map a service to an interface. For instance, if you wanted to override the [http.ResponseWriter](http://godoc.org/net/http#ResponseWriter) with an object that wrapped it and performed extra operations, you can write the following handler:
~~~ go
func WrapResponseWriter(res http.ResponseWriter, c martini.Context) {
rw := NewSpecialResponseWriter(res)
c.MapTo(rw, (*http.ResponseWriter)(nil)) // override ResponseWriter with our wrapper ResponseWriter
}
~~~
### Serving Static Files
A [martini.Classic()](http://godoc.org/github.com/codegangsta/martini#Classic) instance automatically serves static files from the "public" directory in the root of your server.
You can serve from more directories by adding more [martini.Static](http://godoc.org/github.com/codegangsta/martini#Static) handlers.
~~~ go
m.Use(martini.Static("assets")) // serve from the "assets" directory as well
~~~
## Middleware Handlers
Middleware Handlers sit between the incoming http request and the router. In essence they are no different than any other Handler in Martini. You can add a middleware handler to the stack like so:
~~~ go
m.Use(func() {
// do some middleware stuff
})
~~~
You can have full control over the middleware stack with the `Handlers` function. This will replace any handlers that have been previously set:
~~~ go
m.Handlers(
Middleware1,
Middleware2,
Middleware3,
)
~~~
Middleware Handlers work really well for things like logging, authorization, authentication, sessions, gzipping, error pages and any other operations that must happen before or after an http request:
~~~ go
// validate an api key
m.Use(func(res http.ResponseWriter, req *http.Request) {
if req.Header.Get("X-API-KEY") != "secret123" {
res.WriteHeader(http.StatusUnauthorized)
}
})
~~~
### Next()
[Context.Next()](http://godoc.org/github.com/codegangsta/martini#Context) is an optional function that Middleware Handlers can call to yield the until after the other Handlers have been executed. This works really well for any operations that must happen after an http request:
~~~ go
// log before and after a request
m.Use(func(c martini.Context, log *log.Logger){
log.Println("before a request")
c.Next()
log.Println("after a request")
})
~~~
## Martini Env
Some Martini handlers make use of the `martini.Env` global variable to provide special functionality for development environments vs production environments. It is reccomended that the `MARTINI_ENV=production` environment variable to be set when deploying a Martini server into a production environment.
## FAQ
### Where do I find middleware X?
Start by looking in the [martini-contrib](https://github.com/martini-contrib) projects. If it is not there feel free to contact a martini-contrib team member about adding a new repo to the organization.
* [auth](https://github.com/martini-contrib/auth) - Handlers for authentication.
* [binding](https://github.com/martini-contrib/binding) - Handler for mapping/validating a raw request into a structure.
* [gzip](https://github.com/martini-contrib/gzip) - Handler for adding gzip compress to requests
* [render](https://github.com/martini-contrib/render) - Handler that provides a service for easily rendering JSON and HTML templates.
* [acceptlang](https://github.com/martini-contrib/acceptlang) - Handler for parsing the `Accept-Language` HTTP header.
* [sessions](https://github.com/martini-contrib/sessions) - Handler that provides a Session service.
* [strip](https://github.com/martini-contrib/strip) - URL Prefix stripping.
* [method](https://github.com/martini-contrib/method) - HTTP method overriding via Header or form fields.
* [secure](https://github.com/martini-contrib/secure) - Implements a few quick security wins.
* [encoder](https://github.com/martini-contrib/encoder) - Encoder service for rendering data in several formats and content negotiation.
* [cors](https://github.com/martini-contrib/cors) - Handler that enables CORS support.
* [oauth2](https://github.com/martini-contrib/oauth2) - Handler that provides OAuth 2.0 login for Martini apps. Google Sign-in, Facebook Connect and Github login is supported.
### How do I integrate with existing servers?
A Martini instance implements `http.Handler`, so it can easily be used to serve subtrees
on existing Go servers. For example this is a working Martini app for Google App Engine:
~~~ go
package hello
import (
"net/http"
"github.com/codegangsta/martini"
)
func init() {
m := martini.Classic()
m.Get("/", func() string {
return "Hello world!"
})
http.Handle("/", m)
}
~~~
### How do I change the port/host?
Martini's `Run` function looks for the PORT and HOST environment variables and uses those. Otherwise Martini will default to localhost:3000.
To have more flexibility over port and host, use the `http.ListenAndServe` function instead.
~~~ go
m := martini.Classic()
// ...
log.Fatal(http.ListenAndServe(":8080", m))
~~~
### Live code reload?
[gin](https://github.com/codegangsta/gin) and [fresh](https://github.com/pilu/fresh) both live reload martini apps.
## Contributing
Martini is meant to be kept tiny and clean. Most contributions should end up in a repository in the [martini-contrib](https://github.com/martini-contrib) organization. If you do have a contribution for the core of Martini feel free to put up a Pull Request.
## About
Inspired by [express](https://github.com/visionmedia/express) and [sinatra](https://github.com/sinatra/sinatra)
Martini is obsessively designed by none other than the [Code Gangsta](http://codegangsta.io/)

View File

@@ -0,0 +1,25 @@
package martini
import (
"os"
)
// Envs
const (
Dev string = "development"
Prod string = "production"
Test string = "test"
)
// Env is the environment that Martini is executing in. The MARTINI_ENV is read on initialization to set this variable.
var Env = Dev
func setENV(e string) {
if len(e) > 0 {
Env = e
}
}
func init() {
setENV(os.Getenv("MARTINI_ENV"))
}

View File

@@ -0,0 +1,22 @@
package martini
import (
"testing"
)
func Test_SetENV(t *testing.T) {
tests := []struct {
in string
out string
}{
{"", "development"},
{"not_development", "not_development"},
}
for _, test := range tests {
setENV(test.in)
if Env != test.out {
expect(t, Env, test.out)
}
}
}

View File

@@ -0,0 +1,7 @@
// +build !go1.1
package martini
func MartiniDoesNotSupportGo1Point0() {
"Martini requires Go 1.1 or greater."
}

View File

@@ -0,0 +1,20 @@
package martini
import (
"log"
"net/http"
"time"
)
// Logger returns a middleware handler that logs the request as it goes in and the response as it goes out.
func Logger() Handler {
return func(res http.ResponseWriter, req *http.Request, c Context, log *log.Logger) {
start := time.Now()
log.Printf("Started %s %s", req.Method, req.URL.Path)
rw := res.(ResponseWriter)
c.Next()
log.Printf("Completed %v %s in %v\n", rw.Status(), http.StatusText(rw.Status()), time.Since(start))
}
}

View File

@@ -0,0 +1,31 @@
package martini
import (
"bytes"
"log"
"net/http"
"net/http/httptest"
"testing"
)
func Test_Logger(t *testing.T) {
buff := bytes.NewBufferString("")
recorder := httptest.NewRecorder()
m := New()
// replace log for testing
m.Map(log.New(buff, "[martini] ", 0))
m.Use(Logger())
m.Use(func(res http.ResponseWriter) {
res.WriteHeader(http.StatusNotFound)
})
req, err := http.NewRequest("GET", "http://localhost:3000/foobar", nil)
if err != nil {
t.Error(err)
}
m.ServeHTTP(recorder, req)
expect(t, recorder.Code, http.StatusNotFound)
refute(t, len(buff.String()), 0)
}

View File

@@ -0,0 +1,173 @@
// Package martini is a powerful package for quickly writing modular web applications/services in Golang.
//
// For a full guide visit http://github.com/codegangsta/martini
//
// package main
//
// import "github.com/codegangsta/martini"
//
// func main() {
// m := martini.Classic()
//
// m.Get("/", func() string {
// return "Hello world!"
// })
//
// m.Run()
// }
package martini
import (
"log"
"net/http"
"os"
"reflect"
"github.com/codegangsta/inject"
)
// Martini represents the top level web application. inject.Injector methods can be invoked to map services on a global level.
type Martini struct {
inject.Injector
handlers []Handler
action Handler
logger *log.Logger
}
// New creates a bare bones Martini instance. Use this method if you want to have full control over the middleware that is used.
func New() *Martini {
m := &Martini{Injector: inject.New(), action: func() {}, logger: log.New(os.Stdout, "[martini] ", 0)}
m.Map(m.logger)
m.Map(defaultReturnHandler())
return m
}
// Handlers sets the entire middleware stack with the given Handlers. This will clear any current middleware handlers.
// Will panic if any of the handlers is not a callable function
func (m *Martini) Handlers(handlers ...Handler) {
m.handlers = make([]Handler, 0)
for _, handler := range handlers {
m.Use(handler)
}
}
// Action sets the handler that will be called after all the middleware has been invoked. This is set to martini.Router in a martini.Classic().
func (m *Martini) Action(handler Handler) {
validateHandler(handler)
m.action = handler
}
// Use adds a middleware Handler to the stack. Will panic if the handler is not a callable func. Middleware Handlers are invoked in the order that they are added.
func (m *Martini) Use(handler Handler) {
validateHandler(handler)
m.handlers = append(m.handlers, handler)
}
// ServeHTTP is the HTTP Entry point for a Martini instance. Useful if you want to control your own HTTP server.
func (m *Martini) ServeHTTP(res http.ResponseWriter, req *http.Request) {
m.createContext(res, req).run()
}
// Run the http server. Listening on os.GetEnv("PORT") or 3000 by default.
func (m *Martini) Run() {
port := os.Getenv("PORT")
if port == "" {
port = "3000"
}
host := os.Getenv("HOST")
m.logger.Println("listening on " + host + ":" + port)
m.logger.Fatalln(http.ListenAndServe(host+":"+port, m))
}
func (m *Martini) createContext(res http.ResponseWriter, req *http.Request) *context {
c := &context{inject.New(), m.handlers, m.action, NewResponseWriter(res), 0}
c.SetParent(m)
c.MapTo(c, (*Context)(nil))
c.MapTo(c.rw, (*http.ResponseWriter)(nil))
c.Map(req)
return c
}
// ClassicMartini represents a Martini with some reasonable defaults. Embeds the router functions for convenience.
type ClassicMartini struct {
*Martini
Router
}
// Classic creates a classic Martini with some basic default middleware - martini.Logger, martini.Recovery and martini.Static.
// Classic also maps martini.Routes as a service.
func Classic() *ClassicMartini {
r := NewRouter()
m := New()
m.Use(Logger())
m.Use(Recovery())
m.Use(Static("public"))
m.MapTo(r, (*Routes)(nil))
m.Action(r.Handle)
return &ClassicMartini{m, r}
}
// Handler can be any callable function. Martini attempts to inject services into the handler's argument list.
// Martini will panic if an argument could not be fullfilled via dependency injection.
type Handler interface{}
func validateHandler(handler Handler) {
if reflect.TypeOf(handler).Kind() != reflect.Func {
panic("martini handler must be a callable func")
}
}
// Context represents a request context. Services can be mapped on the request level from this interface.
type Context interface {
inject.Injector
// Next is an optional function that Middleware Handlers can call to yield the until after
// the other Handlers have been executed. This works really well for any operations that must
// happen after an http request
Next()
// Written returns whether or not the response for this context has been written.
Written() bool
}
type context struct {
inject.Injector
handlers []Handler
action Handler
rw ResponseWriter
index int
}
func (c *context) handler() Handler {
if c.index < len(c.handlers) {
return c.handlers[c.index]
}
if c.index == len(c.handlers) {
return c.action
}
panic("invalid index for context handler")
}
func (c *context) Next() {
c.index += 1
c.run()
}
func (c *context) Written() bool {
return c.rw.Written()
}
func (c *context) run() {
for c.index <= len(c.handlers) {
_, err := c.Invoke(c.handler())
if err != nil {
panic(err)
}
c.index += 1
if c.Written() {
return
}
}
}

View File

@@ -0,0 +1,141 @@
package martini
import (
"net/http"
"net/http/httptest"
"reflect"
"testing"
)
/* Test Helpers */
func expect(t *testing.T, a interface{}, b interface{}) {
if a != b {
t.Errorf("Expected %v (type %v) - Got %v (type %v)", b, reflect.TypeOf(b), a, reflect.TypeOf(a))
}
}
func refute(t *testing.T, a interface{}, b interface{}) {
if a == b {
t.Errorf("Did not expect %v (type %v) - Got %v (type %v)", b, reflect.TypeOf(b), a, reflect.TypeOf(a))
}
}
func Test_New(t *testing.T) {
m := New()
if m == nil {
t.Error("martini.New() cannot return nil")
}
}
func Test_Martini_Run(t *testing.T) {
// just test that Run doesn't bomb
go New().Run()
}
func Test_Martini_ServeHTTP(t *testing.T) {
result := ""
response := httptest.NewRecorder()
m := New()
m.Use(func(c Context) {
result += "foo"
c.Next()
result += "ban"
})
m.Use(func(c Context) {
result += "bar"
c.Next()
result += "baz"
})
m.Action(func(res http.ResponseWriter, req *http.Request) {
result += "bat"
res.WriteHeader(http.StatusBadRequest)
})
m.ServeHTTP(response, (*http.Request)(nil))
expect(t, result, "foobarbatbazban")
expect(t, response.Code, http.StatusBadRequest)
}
func Test_Martini_Handlers(t *testing.T) {
result := ""
response := httptest.NewRecorder()
batman := func(c Context) {
result += "batman!"
}
m := New()
m.Use(func(c Context) {
result += "foo"
c.Next()
result += "ban"
})
m.Handlers(
batman,
batman,
batman,
)
m.Action(func(res http.ResponseWriter, req *http.Request) {
result += "bat"
res.WriteHeader(http.StatusBadRequest)
})
m.ServeHTTP(response, (*http.Request)(nil))
expect(t, result, "batman!batman!batman!bat")
expect(t, response.Code, http.StatusBadRequest)
}
func Test_Martini_EarlyWrite(t *testing.T) {
result := ""
response := httptest.NewRecorder()
m := New()
m.Use(func(res http.ResponseWriter) {
result += "foobar"
res.Write([]byte("Hello world"))
})
m.Use(func() {
result += "bat"
})
m.Action(func(res http.ResponseWriter) {
result += "baz"
res.WriteHeader(http.StatusBadRequest)
})
m.ServeHTTP(response, (*http.Request)(nil))
expect(t, result, "foobar")
expect(t, response.Code, http.StatusOK)
}
func Test_Martini_Written(t *testing.T) {
response := httptest.NewRecorder()
m := New()
m.Handlers(func(res http.ResponseWriter) {
res.WriteHeader(http.StatusOK)
})
ctx := m.createContext(response, (*http.Request)(nil))
expect(t, ctx.Written(), false)
ctx.run()
expect(t, ctx.Written(), true)
}
func Test_Martini_Basic_NoRace(t *testing.T) {
m := New()
handlers := []Handler{func() {}, func() {}}
// Ensure append will not realloc to trigger the race condition
m.handlers = handlers[:1]
req, _ := http.NewRequest("GET", "/", nil)
for i := 0; i < 2; i++ {
go func() {
response := httptest.NewRecorder()
m.ServeHTTP(response, req)
}()
}
}

View File

@@ -0,0 +1,142 @@
package martini
import (
"bytes"
"fmt"
"io/ioutil"
"log"
"net/http"
"runtime"
"github.com/codegangsta/inject"
)
const (
panicHtml = `<html>
<head><title>PANIC: %s</title>
<style type="text/css">
html, body {
font-family: "Roboto", sans-serif;
color: #333333;
background-color: #ea5343;
margin: 0px;
}
h1 {
color: #d04526;
background-color: #ffffff;
padding: 20px;
border-bottom: 1px dashed #2b3848;
}
pre {
margin: 20px;
padding: 20px;
border: 2px solid #2b3848;
background-color: #ffffff;
}
</style>
</head><body>
<h1>PANIC</h1>
<pre style="font-weight: bold;">%s</pre>
<pre>%s</pre>
</body>
</html>`
)
var (
dunno = []byte("???")
centerDot = []byte("·")
dot = []byte(".")
slash = []byte("/")
)
// stack returns a nicely formated stack frame, skipping skip frames
func stack(skip int) []byte {
buf := new(bytes.Buffer) // the returned data
// As we loop, we open files and read them. These variables record the currently
// loaded file.
var lines [][]byte
var lastFile string
for i := skip; ; i++ { // Skip the expected number of frames
pc, file, line, ok := runtime.Caller(i)
if !ok {
break
}
// Print this much at least. If we can't find the source, it won't show.
fmt.Fprintf(buf, "%s:%d (0x%x)\n", file, line, pc)
if file != lastFile {
data, err := ioutil.ReadFile(file)
if err != nil {
continue
}
lines = bytes.Split(data, []byte{'\n'})
lastFile = file
}
fmt.Fprintf(buf, "\t%s: %s\n", function(pc), source(lines, line))
}
return buf.Bytes()
}
// source returns a space-trimmed slice of the n'th line.
func source(lines [][]byte, n int) []byte {
n-- // in stack trace, lines are 1-indexed but our array is 0-indexed
if n < 0 || n >= len(lines) {
return dunno
}
return bytes.TrimSpace(lines[n])
}
// function returns, if possible, the name of the function containing the PC.
func function(pc uintptr) []byte {
fn := runtime.FuncForPC(pc)
if fn == nil {
return dunno
}
name := []byte(fn.Name())
// The name includes the path name to the package, which is unnecessary
// since the file name is already included. Plus, it has center dots.
// That is, we see
// runtime/debug.*T·ptrmethod
// and want
// *T.ptrmethod
// Also the package path might contains dot (e.g. code.google.com/...),
// so first eliminate the path prefix
if lastslash := bytes.LastIndex(name, slash); lastslash >= 0 {
name = name[lastslash+1:]
}
if period := bytes.Index(name, dot); period >= 0 {
name = name[period+1:]
}
name = bytes.Replace(name, centerDot, dot, -1)
return name
}
// Recovery returns a middleware that recovers from any panics and writes a 500 if there was one.
// While Martini is in development mode, Recovery will also output the panic as HTML.
func Recovery() Handler {
return func(c Context, log *log.Logger) {
defer func() {
if err := recover(); err != nil {
stack := stack(3)
log.Printf("PANIC: %s\n%s", err, stack)
// Lookup the current responsewriter
val := c.Get(inject.InterfaceOf((*http.ResponseWriter)(nil)))
res := val.Interface().(http.ResponseWriter)
// respond with panic message while in development mode
var body []byte
if Env == Dev {
res.Header().Set("Content-Type", "text/html")
body = []byte(fmt.Sprintf(panicHtml, err, err, stack))
}
res.WriteHeader(http.StatusInternalServerError)
if nil != body {
res.Write(body)
}
}
}()
c.Next()
}
}

Some files were not shown because too many files have changed in this diff Show More