Compare commits

...

29 Commits

Author SHA1 Message Date
Michael
52bad03c8d Prepare release v1.5.2 2018-02-12 11:46:03 +01:00
Ludovic Fernandez
2fde3e8679 Continue refresh the configuration after a failure. 2018-02-12 09:28:03 +01:00
Michael
1e71f52b72 Explain how to write entrypoints definition in a compose file 2018-02-09 18:16:04 +01:00
NicoMen
2b1d2853cd Compress ACME certificates in KV stores. 2018-02-09 10:38:03 +01:00
SALLEYRON Julien
f07e8f58e6 Fix goroutine leaks in websocket 2018-02-08 08:24:03 +01:00
Ludovic Fernandez
7b19cb5631 Migrate to dep 0.4 2018-02-07 23:30:05 +01:00
djeeg
dbd173b4e4 Docs: regex+replacement hints for URL rewriting 2018-02-07 13:42:04 +01:00
Sune Keller
85cfd87c44 Clarify how setting a frontend priority works 2018-02-07 13:00:04 +01:00
Ludovic Fernandez
c867f48f11 Change go-bindata 2018-02-07 12:40:03 +01:00
Timo Reimann
514f9a7215 Reduce oxy round trip logs to debug. 2018-02-07 11:32:03 +01:00
Wilhelm Uschtrin
0b0380b690 Fix typo 2018-02-06 14:30:04 +01:00
Sonu Kumar
4d0c8c189a Fixed typo. 2018-02-06 14:04:03 +01:00
SALLEYRON Julien
afe4c307f9 Traefik still start when Let's encrypt is down 2018-02-05 18:20:04 +01:00
Michael
ce3a0fdd46 Fix dnsrr endpoint mode excluded when not using swarm LB 2018-02-05 11:34:03 +01:00
Ludovic Fernandez
203a5c5c48 Hide the pflag error when displaying help. 2018-02-05 09:12:03 +01:00
Ludovic Fernandez
be4aeaacde Add documentation about entry points definition with CLI. 2018-02-05 08:54:03 +01:00
Ludovic Fernandez
26dc2f4d61 doc: option not available in 1.5. 2018-01-30 17:16:03 +01:00
Alexandre Guédon
6aac78fc36 typo in "i"ngress annotations. 2018-01-29 16:48:05 +01:00
Ludovic Fernandez
f6c53f0450 Rebuild experimental image 2018-01-29 16:08:03 +01:00
NicoMen
54e09b98c7 Prepare release v1.5.1 2018-01-29 15:04:03 +01:00
Ludovic Fernandez
4eebaa1a80 Enhance file provider documentation. 2018-01-29 14:36:03 +01:00
NicoMen
cb9bf3ce68 Fix domain names in dynamic TLS configuration 2018-01-29 10:48:03 +01:00
SALLEYRON Julien
49a8cb76f5 Add note on redirect for ACME http challenge 2018-01-26 09:22:03 +01:00
SALLEYRON Julien
bf12306f17 Change gzipwriter receiver to implement CloseNotifier 2018-01-25 21:46:04 +01:00
SALLEYRON Julien
323b8237a0 Handle undefined entrypoint on ACME config and frontend config 2018-01-25 12:02:04 +01:00
Michael
039ccaf4f1 Fix tar gz source only on tags on travis 2018-01-24 16:10:04 +01:00
Michael
4afb39778a Fix add src.tar.gz in Træfik release 2018-01-24 10:40:04 +01:00
Ludovic Fernandez
751781a3b7 Increase integration tests timeout. 2018-01-24 09:14:02 +01:00
Ludovic Fernandez
f5d150c3b4 Fix the k8s redirection template. 2018-01-24 08:12:03 +01:00
289 changed files with 2472 additions and 35368 deletions

View File

@@ -24,9 +24,10 @@ before_deploy:
sudo -E apt-get -yq update;
sudo -E apt-get -yq --no-install-suggests --no-install-recommends --force-yes install docker-ce=${DOCKER_VERSION}*;
docker version;
make image;
if [ "$TRAVIS_TAG" ]; then
make -j${N_MAKE_JOBS} crossbinary-parallel;
make image-dirty;
tar cfz dist/traefik-${VERSION}.src.tar.gz --exclude-vcs --exclude dist .;
fi;
curl -sI https://github.com/containous/structor/releases/latest | grep -Fi Location | tr -d '\r' | sed "s/tag/download/g" | awk -F " " '{ print $2 "/structor_linux-amd64"}' | wget --output-document=$GOPATH/bin/structor -i -;
chmod +x $GOPATH/bin/structor;

View File

@@ -1,5 +1,39 @@
# Change Log
## [v1.5.2](https://github.com/containous/traefik/tree/v1.5.2) (2018-02-12)
[All Commits](https://github.com/containous/traefik/compare/v1.5.1...v1.5.2)
**Bug fixes:**
- **[acme,cluster,kv]** Compress ACME certificates in KV stores. ([#2814](https://github.com/containous/traefik/pull/2814) by [nmengin](https://github.com/nmengin))
- **[acme]** Traefik still start when Let's encrypt is down ([#2794](https://github.com/containous/traefik/pull/2794) by [Juliens](https://github.com/Juliens))
- **[docker]** Fix dnsrr endpoint mode excluded when not using swarm LB ([#2795](https://github.com/containous/traefik/pull/2795) by [mmatur](https://github.com/mmatur))
- **[eureka]** Continue refresh the configuration after a failure. ([#2838](https://github.com/containous/traefik/pull/2838) by [ldez](https://github.com/ldez))
- **[logs]** Reduce oxy round trip logs to debug. ([#2821](https://github.com/containous/traefik/pull/2821) by [timoreimann](https://github.com/timoreimann))
- **[websocket]** Fix goroutine leaks in websocket ([#2825](https://github.com/containous/traefik/pull/2825) by [Juliens](https://github.com/Juliens))
- Hide the pflag error when displaying help. ([#2800](https://github.com/containous/traefik/pull/2800) by [ldez](https://github.com/ldez))
**Documentation:**
- **[docker]** Explain how to write entrypoints definition in a compose file ([#2834](https://github.com/containous/traefik/pull/2834) by [mmatur](https://github.com/mmatur))
- **[docker]** Fix typo ([#2813](https://github.com/containous/traefik/pull/2813) by [uschtwill](https://github.com/uschtwill))
- **[k8s]** typo in "i"ngress annotations. ([#2780](https://github.com/containous/traefik/pull/2780) by [RRAlex](https://github.com/RRAlex))
- Clarify how setting a frontend priority works ([#2818](https://github.com/containous/traefik/pull/2818) by [sirlatrom](https://github.com/sirlatrom))
- Fixed typo. ([#2811](https://github.com/containous/traefik/pull/2811) by [sonus21](https://github.com/sonus21))
- Docs: regex+replacement hints for URL rewriting ([#2802](https://github.com/containous/traefik/pull/2802) by [djeeg](https://github.com/djeeg))
- Add documentation about entry points definition with CLI. ([#2798](https://github.com/containous/traefik/pull/2798) by [ldez](https://github.com/ldez))
## [v1.5.1](https://github.com/containous/traefik/tree/v1.5.1) (2018-01-29)
[All Commits](https://github.com/containous/traefik/compare/v1.5.0...v1.5.1)
**Bug fixes:**
- **[acme]** Handle undefined entrypoint on ACME config and frontend config ([#2756](https://github.com/containous/traefik/pull/2756) by [Juliens](https://github.com/Juliens))
- **[k8s]** Fix the k8s redirection template. ([#2748](https://github.com/containous/traefik/pull/2748) by [ldez](https://github.com/ldez))
- **[middleware]** Change gzipwriter receiver to implement CloseNotifier ([#2766](https://github.com/containous/traefik/pull/2766) by [Juliens](https://github.com/Juliens))
- **[tls]** Fix domain names in dynamic TLS configuration ([#2768](https://github.com/containous/traefik/pull/2768) by [nmengin](https://github.com/nmengin))
**Documentation:**
- **[acme]** Add note on redirect for ACME http challenge ([#2767](https://github.com/containous/traefik/pull/2767) by [Juliens](https://github.com/Juliens))
- **[file]** Enhance file provider documentation. ([#2777](https://github.com/containous/traefik/pull/2777) by [ldez](https://github.com/ldez))
## [v1.5.0](https://github.com/containous/traefik/tree/v1.5.0) (2018-01-23)
[All Commits](https://github.com/containous/traefik/compare/v1.4.0-rc1...v1.5.0)

View File

@@ -64,7 +64,7 @@ Once your environment is set up and the Træfik repository cloned you can build
cd ~/go/src/github.com/containous/traefik
# Get go-bindata. Please note, the ellipses are required
go get github.com/jteeuwen/go-bindata/...
go get github.com/containous/go-bindata/...
# Start build
@@ -87,9 +87,11 @@ If you happen to update the provider templates (in `/templates`), you need to ru
[dep](https://github.com/golang/dep) is not required for building; however, it is necessary to modify dependencies (i.e., add, update, or remove third-party packages)
You need to use [dep](https://github.com/golang/dep) >= O.4.1.
If you want to add a dependency, use `dep ensure -add` to have [dep](https://github.com/golang/dep) put it into the vendor folder and update the dep manifest/lock files (`Gopkg.toml` and `Gopkg.lock`, respectively).
A following `make prune-dep` run should be triggered to trim down the size of the vendor folder.
A following `make dep-prune` run should be triggered to trim down the size of the vendor folder.
The final result must be committed into VCS.
Here's a full example using dep to add a new dependency:

11
Gopkg.lock generated
View File

@@ -89,7 +89,7 @@
branch = "master"
name = "github.com/NYTimes/gziphandler"
packages = ["."]
revision = "47ca22a0aeea4c9ceddfb935d818d636d934c312"
revision = "289a3b81f5aedc99f8d6eb0f67827c142f1310d8"
[[projects]]
name = "github.com/Nvveen/Gotty"
@@ -223,8 +223,8 @@
[[projects]]
name = "github.com/containous/staert"
packages = ["."]
revision = "af517d5b70db9c4b0505e0144fcc62b054057d2a"
version = "v2.0.0"
revision = "68c67b32c3a986672d994d38127cd5c78d53eb26"
version = "v2.1.0"
[[projects]]
name = "github.com/containous/traefik-extra-service-fabric"
@@ -962,7 +962,8 @@
"mock",
"require"
]
revision = "4d4bfba8f1d1027c4fdbe371823030df51419987"
revision = "69483b4bd14f5845b5a1e55bca19e954e827f1d0"
version = "v1.1.4"
[[projects]]
branch = "master"
@@ -1027,7 +1028,7 @@
"roundrobin",
"utils"
]
revision = "fd6f71c694e2ab8b584c50b98ab4825027feb315"
revision = "af377749f48ff0ae9974b30ce12a816738b94558"
source = "https://github.com/containous/oxy.git"
[[projects]]

View File

@@ -64,7 +64,7 @@ ignored = ["github.com/sirupsen/logrus"]
[[constraint]]
name = "github.com/containous/staert"
version = "2.0.0"
version = "2.1.0"
[[constraint]]
name = "github.com/containous/traefik-extra-service-fabric"
@@ -190,3 +190,8 @@ ignored = ["github.com/sirupsen/logrus"]
# remove override on master
name = "github.com/coreos/bbolt"
revision = "32c383e75ce054674c53b5a07e55de85332aee14"
[prune]
non-go = true
go-tests = true
unused-packages = true

View File

@@ -127,7 +127,11 @@ fmt:
pull-images:
grep --no-filename -E '^\s+image:' ./integration/resources/compose/*.yml | awk '{print $$2}' | sort | uniq | xargs -P 6 -n 1 docker pull
prune-dep:
dep-ensure:
dep ensure -v
./script/prune-dep.sh
dep-prune:
./script/prune-dep.sh
help: ## this help

View File

@@ -295,6 +295,7 @@ func (a *ACME) leadershipListener(elected bool) error {
// CreateLocalConfig creates a tls.config using local ACME configuration
func (a *ACME) CreateLocalConfig(tlsConfig *tls.Config, certs *safe.Safe, checkOnDemandDomain func(domain string) bool) error {
defer a.runJobs()
err := a.init()
if err != nil {
return err
@@ -333,7 +334,9 @@ func (a *ACME) CreateLocalConfig(tlsConfig *tls.Config, certs *safe.Safe, checkO
a.client, err = a.buildACMEClient(account)
if err != nil {
return err
log.Errorf(`Failed to build ACME client: %s
Let's Encrypt functionality will be limited until traefik is restarted.`, err)
return nil
}
if needRegister {
@@ -374,7 +377,6 @@ func (a *ACME) CreateLocalConfig(tlsConfig *tls.Config, certs *safe.Safe, checkO
a.retrieveCertificates()
a.renewCertificates()
a.runJobs()
ticker := time.NewTicker(24 * time.Hour)
safe.Go(func() {

View File

@@ -441,9 +441,9 @@ var _templatesKubernetesTmpl = []byte(`[backends]{{range $backendName, $backend
{{if $frontend.Redirect}}
[frontends."{{$frontendName}}".redirect]
entryPoint = "{{$frontend.RedirectEntryPoint}}"
regex = "{{$frontend.RedirectRegex}}"
replacement = "{{$frontend.RedirectReplacement}}"
entryPoint = "{{$frontend.Redirect.EntryPoint}}"
regex = "{{$frontend.Redirect.Regex}}"
replacement = "{{$frontend.Redirect.Replacement}}"
{{end}}
{{ if $frontend.Headers }}

View File

@@ -4,22 +4,20 @@ RUN apk --update upgrade \
&& apk --no-cache --no-progress add git mercurial bash gcc musl-dev curl tar \
&& rm -rf /var/cache/apk/*
RUN go get github.com/jteeuwen/go-bindata/... \
RUN go get github.com/containous/go-bindata/... \
&& go get github.com/golang/lint/golint \
&& go get github.com/kisielk/errcheck \
&& go get github.com/client9/misspell/cmd/misspell
# Which docker version to test on
ARG DOCKER_VERSION=17.03.2
ARG DEP_VERSION=0.3.2
ARG DEP_VERSION=0.4.1
# Download dep binary to bin folder in $GOPATH
RUN mkdir -p /usr/local/bin \
&& curl -fsSL -o /usr/local/bin/dep https://github.com/golang/dep/releases/download/v${DEP_VERSION}/dep-linux-amd64 \
&& chmod +x /usr/local/bin/dep
# Download docker
RUN mkdir -p /usr/local/bin \
&& curl -fL https://download.docker.com/linux/static/stable/x86_64/docker-${DOCKER_VERSION}-ce.tgz \

View File

@@ -28,6 +28,7 @@ import (
"github.com/containous/traefik/types"
"github.com/containous/traefik/version"
"github.com/coreos/go-systemd/daemon"
"github.com/ogier/pflag"
)
func main() {
@@ -75,6 +76,9 @@ Complete documentation is available at https://traefik.io`,
}
if _, err := f.Parse(usedCmd); err != nil {
if err == pflag.ErrHelp {
os.Exit(0)
}
fmtlog.Printf("Error parsing command: %s\n", err)
os.Exit(-1)
}
@@ -142,6 +146,7 @@ func run(globalConfiguration *configuration.GlobalConfiguration, configFile stri
http.DefaultTransport.(*http.Transport).Proxy = http.ProxyFromEnvironment
globalConfiguration.SetEffectiveConfiguration(configFile)
globalConfiguration.ValidateConfiguration()
jsonConf, _ := json.Marshal(globalConfiguration)
log.Infof("Traefik version %s built on %s", version.Version, version.BuildDate)

View File

@@ -259,6 +259,19 @@ func (gc *GlobalConfiguration) SetEffectiveConfiguration(configFile string) {
}
}
// ValidateConfiguration validate that configuration is coherent
func (gc *GlobalConfiguration) ValidateConfiguration() {
if gc.ACME != nil {
if _, ok := gc.EntryPoints[gc.ACME.EntryPoint]; !ok {
log.Fatalf("Unknown entrypoint %q for ACME configuration", gc.ACME.EntryPoint)
} else {
if gc.EntryPoints[gc.ACME.EntryPoint].TLS == nil {
log.Fatalf("Entrypoint without TLS %q for ACME configuration", gc.ACME.EntryPoint)
}
}
}
}
// DefaultEntryPoints holds default entry points
type DefaultEntryPoints []string

View File

@@ -236,7 +236,7 @@ The following rules are both `Matchers` and `Modifiers`, so the `Matcher` portio
By default, routes will be sorted (in descending order) using rules length (to avoid path overlap):
`PathPrefix:/12345` will be matched before `PathPrefix:/1234` that will be matched before `PathPrefix:/1`.
You can customize priority by frontend:
You can customize priority by frontend. The priority value is added to the rule length during sorting:
```toml
[frontends]
@@ -254,7 +254,7 @@ You can customize priority by frontend:
rule = "PathPrefix:/toto"
```
Here, `frontend1` will be matched before `frontend2` (`10 > 5`).
Here, `frontend1` will be matched before `frontend2` (`(3 + 10 == 13) > (4 + 5 == 9)`).
#### Custom headers
@@ -612,6 +612,7 @@ Those data help us prioritize our developments and focus on what's more importan
### What ?
Once a day (the first call begins 10 minutes after the start of Træfik), we collect:
- the Træfik version
- a hash of the configuration
- an **anonymous version** of the static configuration:

View File

@@ -144,6 +144,18 @@ entryPoint = "https"
If `HTTP-01` challenge is used, `acme.httpChallenge.entryPoint` has to be defined and reachable by Let's Encrypt through the port 80.
These are Let's Encrypt limitations as described on the [community forum](https://community.letsencrypt.org/t/support-for-ports-other-than-80-and-443/3419/72).
### Let's Encrypt downtime
Let's Encrypt functionality will be limited until Træfik is restarted.
If Let's Encrypt is not reachable, these certificates will be used :
- ACME certificates already generated before downtime
- Expired ACME certificates
- Provided certificates
!!! note
Default Træfik certificate will be used instead of ACME certificates for new (sub)domains (which need Let's Encrypt challenge).
### `storage`
```toml
@@ -153,27 +165,13 @@ storage = "acme.json"
# ...
```
File or key used for certificates storage.
The `storage` option sets where are stored your ACME certificates.
**WARNING:** If you use Træfik in Docker, you have 2 options:
There are two kind of `storage` :
- a JSON file,
- a KV store entry.
- create a file on your host and mount it as a volume:
```toml
storage = "acme.json"
```
```bash
docker run -v "/my/host/acme.json:acme.json" traefik
```
- mount the folder containing the file as a volume
```toml
storage = "/etc/traefik/acme/acme.json"
```
```bash
docker run -v "/my/host/acme:/etc/traefik/acme" traefik
```
!!! note
!!! danger "DEPRECATED"
`storage` replaces `storageFile` which is deprecated.
!!! note
@@ -182,10 +180,53 @@ docker run -v "/my/host/acme:/etc/traefik/acme" traefik
- `storageFile` will contain the path to the `acme.json` file to migrate.
- `storage` will contain the key where the certificates will be stored.
#### Store data in a file
ACME certificates can be stored in a JSON file which with the `600` right mode.
There are two ways to store ACME certificates in a file from Docker:
- create a file on your host and mount it as a volume:
```toml
storage = "acme.json"
```
```bash
docker run -v "/my/host/acme.json:acme.json" traefik
```
- mount the folder containing the file as a volume
```toml
storage = "/etc/traefik/acme/acme.json"
```
```bash
docker run -v "/my/host/acme:/etc/traefik/acme" traefik
```
!!! warning
This file cannot be shared per many instances of Træfik at the same time.
If you have to use Træfik cluster mode, please use [a KV Store entry](/configuration/acme/#storage-kv-entry).
#### Store data in a KV store entry
ACME certificates can be stored in a KV Store entry.
```toml
storage = "traefik/acme/account"
```
**This kind of storage is mandatory in cluster mode.**
Because KV stores (like Consul) have limited entries size, the certificates list is compressed before to be set in a KV store entry.
!!! note
It's possible to store up to approximately 100 ACME certificates in Consul.
### `acme.httpChallenge`
Use `HTTP-01` challenge to generate/renew ACME certificates.
The redirection is fully compatible with the HTTP-01 challenge.
You can use redirection with HTTP-01 challenge without problem.
```toml
[acme]
# ...
@@ -273,7 +314,7 @@ Useful if internal networks block external DNS queries.
### `onDemand` (Deprecated)
!!! warning
!!! danger "DEPRECATED"
This option is deprecated.
```toml
@@ -350,12 +391,12 @@ Each domain & SANs will lead to a certificate request.
### `dnsProvider` (Deprecated)
!!! warning
!!! danger "DEPRECATED"
This option is deprecated.
Please refer to [DNS challenge provider section](/configuration/acme/#provider)
### `delayDontCheckDNS` (Deprecated)
!!! warning
!!! danger "DEPRECATED"
This option is deprecated.
Please refer to [DNS challenge delayBeforeCheck section](/configuration/acme/#delaybeforecheck)

View File

@@ -1,6 +1,140 @@
# File Backends
Like any other reverse proxy, Træfik can be configured with a file.
Træfik can be configured with a file.
## Reference
```toml
# Backends
[backends]
[backends.backend1]
[backends.backend1.servers]
[backends.backend1.servers.server0]
url = "http://10.10.10.1:80"
weight = 1
[backends.backend1.servers.server1]
url = "http://10.10.10.2:80"
weight = 2
# ...
[backends.backend1.circuitBreaker]
expression = "NetworkErrorRatio() > 0.5"
[backends.backend1.loadBalancer]
method = "drr"
[backends.backend1.loadBalancer.stickiness]
cookieName = "foobar"
[backends.backend1.maxConn]
amount = 10
extractorfunc = "request.host"
[backends.backend1.healthCheck]
path = "/health"
port = 88
interval = "30s"
[backends.backend2]
# ...
# Frontends
[frontends]
[frontends.frontend1]
entryPoints = ["http", "https"]
backend = "backend1"
passHostHeader = true
passTLSCert = true
priority = 42
basicAuth = [
"test:$apr1$H6uskkkW$IgXLP6ewTrSuBkTrqE8wj/",
"test2:$apr1$d9hr9HBB$4HxwgUir3HP4EsggP/QNo0",
]
whitelistSourceRange = ["10.42.0.0/16", "152.89.1.33/32", "afed:be44::/16"]
[frontends.frontend1.routes]
[frontends.frontend1.routes.route0]
rule = "Host:test.localhost"
[frontends.frontend1.routes.Route1]
rule = "Method:GET"
# ...
[frontends.frontend1.headers]
allowedHosts = ["foobar", "foobar"]
hostsProxyHeaders = ["foobar", "foobar"]
SSLRedirect = true
SSLTemporaryRedirect = true
SSLHost = "foobar"
STSSeconds = 42
STSIncludeSubdomains = true
STSPreload = true
forceSTSHeader = true
frameDeny = true
customFrameOptionsValue = "foobar"
contentTypeNosniff = true
browserXSSFilter = true
contentSecurityPolicy = "foobar"
publicKey = "foobar"
referrerPolicy = "foobar"
isDevelopment = true
[frontends.frontend1.headers.customRequestHeaders]
X-Foo-Bar-01 = "foobar"
X-Foo-Bar-02 = "foobar"
# ...
[frontends.frontend1.headers.customResponseHeaders]
X-Foo-Bar-03 = "foobar"
X-Foo-Bar-04 = "foobar"
# ...
[frontends.frontend1.headers.SSLProxyHeaders]
X-Foo-Bar-05 = "foobar"
X-Foo-Bar-06 = "foobar"
# ...
[frontends.frontend1.errors]
[frontends.frontend1.errors.errorPage0]
status = ["500-599"]
backend = "error"
query = "/{status}.html"
[frontends.frontend1.errors.errorPage1]
status = ["404", "403"]
backend = "error"
query = "/{status}.html"
# ...
[frontends.frontend1.ratelimit]
extractorfunc = "client.ip"
[frontends.frontend1.ratelimit.rateset.rateset1]
period = "10s"
average = 100
burst = 200
[frontends.frontend1.ratelimit.rateset.rateset2]
period = "3s"
average = 5
burst = 10
# ...
[frontends.frontend1.redirect]
entryPoint = "https"
regex = "^http://localhost/(.*)"
replacement = "http://mydomain/$1"
[frontends.frontend2]
# ...
# HTTPS certificates
[[tls]]
entryPoints = ["https"]
[tls.certificate]
certFile = "path/to/my.cert"
keyFile = "path/to/my.key"
[[tls]]
# ...
```
## Configuration mode
You have three choices:
@@ -12,7 +146,7 @@ To enable the file backend, you must either pass the `--file` option to the Træ
The configuration file allows managing both backends/frontends and HTTPS certificates (which are not [Let's Encrypt](https://letsencrypt.org) certificates generated through Træfik).
## Simple
### Simple
Add your configuration at the end of the global configuration file `traefik.toml`:
@@ -21,172 +155,93 @@ defaultEntryPoints = ["http", "https"]
[entryPoints]
[entryPoints.http]
address = ":80"
[entryPoints.http.redirect]
entryPoint = "https"
# ...
[entryPoints.https]
address = ":443"
[entryPoints.https.tls]
[[entryPoints.https.tls.certificates]]
certFile = "integration/fixtures/https/snitest.org.cert"
keyFile = "integration/fixtures/https/snitest.org.key"
# ...
[file]
# rules
[backends]
[backends.backend1]
[backends.backend1.circuitbreaker]
expression = "NetworkErrorRatio() > 0.5"
[backends.backend1.servers.server1]
url = "http://172.17.0.2:80"
weight = 10
[backends.backend1.servers.server2]
url = "http://172.17.0.3:80"
weight = 1
# ...
[backends.backend2]
[backends.backend2.maxconn]
amount = 10
extractorfunc = "request.host"
[backends.backend2.LoadBalancer]
method = "drr"
[backends.backend2.servers.server1]
url = "http://172.17.0.4:80"
weight = 1
[backends.backend2.servers.server2]
url = "http://172.17.0.5:80"
weight = 2
# ...
[frontends]
[frontends.frontend1]
backend = "backend2"
[frontends.frontend1.routes.test_1]
rule = "Host:test.localhost"
# ...
[frontends.frontend2]
backend = "backend1"
passHostHeader = true
priority = 10
# restrict access to this frontend to the specified list of IPv4/IPv6 CIDR Nets
# an unset or empty list allows all Source-IPs to access
# if one of the Net-Specifications are invalid, the whole list is invalid
# and allows all Source-IPs to access.
whitelistSourceRange = ["10.42.0.0/16", "152.89.1.33/32", "afed:be44::/16"]
entrypoints = ["https"] # overrides defaultEntryPoints
[frontends.frontend2.routes.test_1]
rule = "Host:{subdomain:[a-z]+}.localhost"
# ...
[frontends.frontend3]
entrypoints = ["http", "https"] # overrides defaultEntryPoints
backend = "backend2"
rule = "Path:/test"
# ...
# HTTPS certificate
[[tls]]
entryPoints = ["https"]
[tls.certificate]
certFile = "path/to/my.cert"
keyFile = "path/to/my.key"
# ...
[[tls]]
entryPoints = ["https"]
[tls.certificate]
certFile = "path/to/my/other.cert"
keyFile = "path/to/my/other.key"
# ...
```
!!! note
adding certificates directly to the entrypoint is still maintained but certificates declared in this way cannot be managed dynamically.
It's recommended to use the file provider to declare certificates.
## Rules in a Separate File
### Rules in a Separate File
Put your rules in a separate file, for example `rules.toml`:
```toml
# traefik.toml
defaultEntryPoints = ["http", "https"]
[entryPoints]
[entryPoints.http]
address = ":80"
[entryPoints.http.redirect]
entryPoint = "https"
# ...
[entryPoints.https]
address = ":443"
[entryPoints.https.tls]
# ...
[file]
filename = "rules.toml"
filename = "rules.toml"
```
```toml
# rules.toml
[backends]
[backends.backend1]
[backends.backend1.circuitbreaker]
expression = "NetworkErrorRatio() > 0.5"
[backends.backend1.servers.server1]
url = "http://172.17.0.2:80"
weight = 10
[backends.backend1.servers.server2]
url = "http://172.17.0.3:80"
weight = 1
# ...
[backends.backend2]
[backends.backend2.maxconn]
amount = 10
extractorfunc = "request.host"
[backends.backend2.LoadBalancer]
method = "drr"
[backends.backend2.servers.server1]
url = "http://172.17.0.4:80"
weight = 1
[backends.backend2.servers.server2]
url = "http://172.17.0.5:80"
weight = 2
# ...
[frontends]
[frontends.frontend1]
backend = "backend2"
[frontends.frontend1.routes.test_1]
rule = "Host:test.localhost"
# ...
[frontends.frontend2]
backend = "backend1"
passHostHeader = true
priority = 10
entrypoints = ["https"] # overrides defaultEntryPoints
[frontends.frontend2.routes.test_1]
rule = "Host:{subdomain:[a-z]+}.localhost"
# ...
[frontends.frontend3]
entrypoints = ["http", "https"] # overrides defaultEntryPoints
backend = "backend2"
rule = "Path:/test"
# ...
# HTTPS certificate
[[tls]]
entryPoints = ["https"]
[tls.certificate]
certFile = "path/to/my.cert"
keyFile = "path/to/my.key"
[[tls]]
entryPoints = ["https"]
[tls.certificate]
certFile = "path/to/my/other.cert"
keyFile = "path/to/my/other.key"
# ...
## Multiple `.toml` Files
[[tls]]
# ...
```
### Multiple `.toml` Files
You could have multiple `.toml` files in a directory (and recursively in its sub-directories):
```toml
[file]
directory = "/path/to/config/"
directory = "/path/to/config/"
```
If you want Træfik to watch file changes automatically, just add:
```toml
[file]
watch = true
watch = true
```

View File

@@ -150,7 +150,7 @@ The following security annotations are applicable on the Ingress object:
| `ingress.kubernetes.io/ssl-host:HOST` | This setting configures the hostname that redirects will be based on. Default is "", which is the same host as the request. |
| `ingress.kubernetes.io/ssl-proxy-headers:EXPR` | Header combinations that would signify a proper SSL Request (Such as `X-Forwarded-For:https`). Format: <code>HEADER:value&vert;&vert;HEADER2:value2</code> |
| `ingress.kubernetes.io/hsts-max-age:315360000` | Sets the max-age of the HSTS header. |
| `ngress.kubernetes.io/hsts-include-subdomains:true` | Adds the IncludeSubdomains section of the STS header. |
| `ingress.kubernetes.io/hsts-include-subdomains:true` | Adds the IncludeSubdomains section of the STS header. |
| `ingress.kubernetes.io/hsts-preload:true` | Adds the preload flag to the HSTS header. |
| `ingress.kubernetes.io/force-hsts:false` | Adds the STS header to non-SSL requests. |
| `ingress.kubernetes.io/frame-deny:false` | Adds the `X-Frame-Options` header with the value of `DENY`. |

View File

@@ -285,21 +285,17 @@ Multiple sets of rates can be added to each frontend, but the time periods must
```toml
[frontends]
[frontends.frontend1]
passHostHeader = true
entrypoints = ["http"]
backend = "backend1"
[frontends.frontend1.routes.test_1]
rule = "Path:/"
[frontends.frontend1.ratelimit]
extractorfunc = "client.ip"
[frontends.frontend1.ratelimit.rateset.rateset1]
period = "10s"
average = 100
burst = 200
[frontends.frontend1.ratelimit.rateset.rateset2]
period = "3s"
average = 5
burst = 10
# ...
[frontends.frontend1.ratelimit]
extractorfunc = "client.ip"
[frontends.frontend1.ratelimit.rateset.rateset1]
period = "10s"
average = 100
burst = 200
[frontends.frontend1.ratelimit.rateset.rateset2]
period = "3s"
average = 5
burst = 10
```
In the above example, frontend1 is configured to limit requests by the client's ip address.

View File

@@ -1,5 +1,120 @@
# Entry Points Definition
## Reference
### TOML
```toml
[entryPoints]
[entryPoints.http]
address = ":80"
whitelistSourceRange = ["10.42.0.0/16", "152.89.1.33/32", "afed:be44::/16"]
compress = true
[entryPoints.http.tls]
minVersion = "VersionTLS12"
cipherSuites = ["TLS_RSA_WITH_AES_256_GCM_SHA384"]
[[entryPoints.http.tls.certificates]]
certFile = "path/to/my.cert"
keyFile = "path/to/my.key"
[[entryPoints.http.tls.certificates]]
certFile = "path/to/other.cert"
keyFile = "path/to/other.key"
# ...
[entryPoints.http.tls.clientCA]
files = ["path/to/ca1.crt", "path/to/ca2.crt"]
optional = false
[entryPoints.http.redirect]
entryPoint = "https"
regex = "^http://localhost/(.*)"
replacement = "http://mydomain/$1"
[entryPoints.http.auth]
headerField = "X-WebAuth-User"
[entryPoints.http.auth.basic]
users = [
"test:$apr1$H6uskkkW$IgXLP6ewTrSuBkTrqE8wj/",
"test2:$apr1$d9hr9HBB$4HxwgUir3HP4EsggP/QNo0",
]
usersFile = "/path/to/.htpasswd"
[entryPoints.http.auth.digest]
users = [
"test:traefik:a2688e031edb4be6a3797f3882655c05",
"test2:traefik:518845800f9e2bfb1f1f740ec24f074e",
]
usersFile = "/path/to/.htdigest"
[entryPoints.http.auth.forward]
address = "https://authserver.com/auth"
trustForwardHeader = true
[entryPoints.http.auth.forward.tls]
ca = [ "path/to/local.crt"]
caOptional = true
cert = "path/to/foo.cert"
key = "path/to/foo.key"
insecureSkipVerify = true
[entryPoints.http.proxyProtocol]
insecure = true
trustedIPs = ["10.10.10.1", "10.10.10.2"]
[entryPoints.http.forwardedHeaders]
trustedIPs = ["10.10.10.1", "10.10.10.2"]
[entryPoints.https]
# ...
```
### CLI
For more information about the CLI, see the documentation about [Traefik command](/basics/#traefik).
```shell
--entryPoints='Name:http Address::80'
--entryPoints='Name:https Address::443 TLS'
```
!!! note
Whitespace is used as option separator and `,` is used as value separator for the list.
The names of the options are case-insensitive.
In compose file the entrypoint syntax is different:
```yaml
traefik:
image: traefik
command:
- --defaultentrypoints=powpow
- "--entryPoints=Name:powpow Address::42 Compress:true"
```
or
```yaml
traefik:
image: traefik
command: --defaultentrypoints=powpow --entryPoints='Name:powpow Address::42 Compress:true'
```
#### All available options:
```ini
Name:foo
Address::80
TLS:goo,gii
TLS
CA:car
CA.Optional:true
Redirect.EntryPoint:https
Redirect.Regex:http://localhost/(.*)
Redirect.Replacement:http://mydomain/$1
Compress:true
WhiteListSourceRange:10.42.0.0/16,152.89.1.33/32,afed:be44::/16
ProxyProtocol.TrustedIPs:192.168.0.1
ProxyProtocol.Insecure:tue
ForwardedHeaders.TrustedIPs:10.0.0.3/24,20.0.0.3/24
```
## Basic
```toml
# Entrypoints definition
#
@@ -51,7 +166,11 @@ To redirect an entrypoint rewriting the URL.
```
!!! note
Please note that `regex` and `replacement` do not have to be set in the `redirect` structure if an entrypoint is defined for the redirection (they will not be used in this case).
Please note that `regex` and `replacement` do not have to be set in the `redirect` structure if an `entrypoint` is defined for the redirection (they will not be used in this case).
Care should be taken when defining replacement expand variables: `$1x` is equivalent to `${1x}`, not `${1}x` (see [Regexp.Expand](https://golang.org/pkg/regexp/#Regexp.Expand)), so use `${1}` syntax.
Regular expressions and replacements can be tested using online tools such as [Go Playground](https://play.golang.org/p/mWU9p-wk2ru) or the [Regex101](https://regex101.com/r/58sIgx/2).
## TLS
@@ -71,7 +190,7 @@ Define an entrypoint with SNI support.
!!! note
If an empty TLS configuration is done, default self-signed certificates are generated.
### Dynamic Certificates
@@ -108,9 +227,8 @@ In the example below both `snitest.com` and `snitest.org` will require client ce
```
!!! note
The deprecated argument `ClientCAFiles` allows adding Client CA files which are mandatory.
If this parameter exists, the new ones are not checked.
The deprecated argument `ClientCAFiles` allows adding Client CA files which are mandatory.
If this parameter exists, the new ones are not checked.
## Authentication
@@ -162,7 +280,7 @@ Otherwise, the response from the auth server is returned.
# To enable forward auth on an entrypoint
[entryPoints.http.auth.forward]
address = "https://authserver.com/auth"
# Trust existing X-Forwarded-* headers.
# Useful with another reverse proxy in front of Traefik.
#
@@ -170,7 +288,7 @@ Otherwise, the response from the auth server is returned.
# Default: false
#
trustForwardHeader = true
# Enable forward auth TLS connection.
#
# Optional
@@ -234,7 +352,7 @@ Only IPs in `trustedIPs` will lead to remote client address replacement: you sho
!!! danger
When queuing Træfik behind another load-balancer, be sure to carefully configure Proxy Protocol on both sides.
Otherwise, it could introduce a security risk in your system by forging requests.
Otherwise, it could introduce a security risk in your system by forging requests.
```toml
[entryPoints]

View File

@@ -35,14 +35,14 @@ TL;DR:
```shell
$ traefik \
--entrypoints=Name:http Address::80 Redirect.EntryPoint:https \
--entrypoints=Name:https Address::443 TLS \
--entrypoints='Name:http Address::80 Redirect.EntryPoint:https' \
--entrypoints='Name:https Address::443 TLS' \
--defaultentrypoints=http,https
```
To listen to different ports, we need to create an entry point for each.
The CLI syntax is `--entrypoints=Name:a_name Address:an_ip_or_empty:a_port options`.
The CLI syntax is `--entrypoints='Name:a_name Address:an_ip_or_empty:a_port options'`.
If you want to redirect traffic from one entry point to another, it's the option `Redirect.EntryPoint:entrypoint_name`.
By default, we don't want to configure all our services to listen on http and https, we add a default entry point configuration: `--defaultentrypoints=http,https`.

View File

@@ -23,3 +23,11 @@ A Træfik cluster is based on a manager/worker model.
When starting, Træfik will elect a manager.
If this instance fails, another manager will be automatically elected.
## Træfik cluster and Let's Encrypt
**In cluster mode, ACME certificates have to be stored in [a KV Store entry](/configuration/acme/#storage-kv-entry).**
Thanks to the Træfik cluster mode algorithm (based on [the Raft Consensus Algorithm](https://raft.github.io/)), only one instance will contact Let's encrypt to solve the challenges.
The others instances will get ACME certificate from the KV Store entry.

View File

@@ -110,7 +110,7 @@ entryPoint = "http"
This is the minimum configuration required to do the following:
- Log `ERROR`-level messages (or more severe) to the console, but silence `DEBUG`-level messagse
- Log `ERROR`-level messages (or more severe) to the console, but silence `DEBUG`-level messages
- Check for new versions of Træfik periodically
- Create two entry points, namely an `HTTP` endpoint on port `80`, and an `HTTPS` endpoint on port `443` where all incoming traffic on port `80` will immediately get redirected to `HTTPS`.
- Enable the Docker configuration backend and listen for container events on the Docker unix socket we've mounted earlier. However, **new containers will not be exposed by Træfik by default, we'll get into this in a bit!**

View File

@@ -1,6 +1,6 @@
# Key-value store configuration
Both [static global configuration](/user-guide/kv-config/#static-configuration-in-key-value-store) and [dynamic](/user-guide/kv-config/#dynamic-configuration-in-key-value-store) configuration can be sorted in a Key-value store.
Both [static global configuration](/user-guide/kv-config/#static-configuration-in-key-value-store) and [dynamic](/user-guide/kv-config/#dynamic-configuration-in-key-value-store) configuration can be stored in a Key-value store.
This section explains how to launch Træfik using a configuration loaded from a Key-value store.

View File

@@ -29,6 +29,8 @@ services :
- bhsm
- bmysql
- brabbitmq
volumes:
- "./rate-limit-policies.yml:/go/src/github.com/letsencrypt/boulder/test/rate-limit-policies.yml:ro"
bhsm:
image: letsencrypt/boulder-tools:2016-11-02

View File

@@ -0,0 +1,42 @@
totalCertificates:
window: 1h
threshold: 100000
certificatesPerName:
window: 1h
threshold: 100000
overrides:
ratelimit.me: 1
lim.it: 0
# Hostnames used by the letsencrypt client integration test.
le.wtf: 10000
le1.wtf: 10000
le2.wtf: 10000
le3.wtf: 10000
nginx.wtf: 10000
good-caa-reserved.com: 10000
bad-caa-reserved.com: 10000
ecdsa.le.wtf: 10000
must-staple.le.wtf: 10000
registrationOverrides:
101: 1000
registrationsPerIP:
window: 1h
threshold: 100000
overrides:
127.0.0.1: 1000000
pendingAuthorizationsPerAccount:
window: 1h
threshold: 100000
certificatesPerFQDNSet:
window: 1h
threshold: 100000
overrides:
le.wtf: 10000
le1.wtf: 10000
le2.wtf: 10000
le3.wtf: 10000
le.wtf,le1.wtf: 10000
good-caa-reserved.com: 10000
nginx.wtf: 10000
ecdsa.le.wtf: 10000
must-staple.le.wtf: 10000

View File

@@ -73,6 +73,8 @@ services:
- bhsm
- bmysql
- brabbitmq
volumes:
- "./rate-limit-policies.yml:/go/src/github.com/letsencrypt/boulder/test/rate-limit-policies.yml:ro"
networks:
net:
ipv4_address: 10.0.1.3

View File

@@ -0,0 +1,42 @@
totalCertificates:
window: 1h
threshold: 100000
certificatesPerName:
window: 1h
threshold: 100000
overrides:
ratelimit.me: 1
lim.it: 0
# Hostnames used by the letsencrypt client integration test.
le.wtf: 10000
le1.wtf: 10000
le2.wtf: 10000
le3.wtf: 10000
nginx.wtf: 10000
good-caa-reserved.com: 10000
bad-caa-reserved.com: 10000
ecdsa.le.wtf: 10000
must-staple.le.wtf: 10000
registrationOverrides:
101: 1000
registrationsPerIP:
window: 1h
threshold: 100000
overrides:
127.0.0.1: 1000000
pendingAuthorizationsPerAccount:
window: 1h
threshold: 100000
certificatesPerFQDNSet:
window: 1h
threshold: 100000
overrides:
le.wtf: 10000
le1.wtf: 10000
le2.wtf: 10000
le3.wtf: 10000
le.wtf,le1.wtf: 10000
good-caa-reserved.com: 10000
nginx.wtf: 10000
ecdsa.le.wtf: 10000
must-staple.le.wtf: 10000

View File

@@ -19,8 +19,7 @@ caServer = "http://traefik.boulder.com:4000/directory"
entryPoint="http"
[web]
address = ":8080"
[api]
[docker]
endpoint = "unix:///var/run/docker.sock"

View File

@@ -142,6 +142,19 @@ func (s *AcmeSuite) TestOnHostRuleRetrieveAcmeCertificateWithDynamicWildcard(c *
s.retrieveAcmeCertificate(c, testCase)
}
// Test Let's encrypt down
func (s *AcmeSuite) TestNoValidLetsEncryptServer(c *check.C) {
cmd, display := s.traefikCmd(withConfigFile("fixtures/acme/wrong_acme.toml"))
defer display(c)
err := cmd.Start()
c.Assert(err, checker.IsNil)
defer cmd.Process.Kill()
// Expected traefik works
err = try.GetRequest("http://127.0.0.1:8080/api/providers", 10*time.Second, try.StatusCodeIs(http.StatusOK))
c.Assert(err, checker.IsNil)
}
// Doing an HTTPS request and test the response certificate
func (s *AcmeSuite) retrieveAcmeCertificate(c *check.C, testCase AcmeTestCase) {
file := s.adaptFile(c, testCase.traefikConfFilePath, struct {

View File

@@ -0,0 +1,34 @@
logLevel = "DEBUG"
defaultEntryPoints = ["http", "https"]
[api]
[entryPoints]
[entryPoints.http]
address = ":8081"
[entryPoints.https]
address = ":5001"
[entryPoints.https.tls]
[acme]
email = "test@traefik.io"
storage = "/dev/null"
entryPoint = "https"
OnHostRule = true
caServer = "http://wrongurl:4000/directory"
[file]
[backends]
[backends.backend]
[backends.backend.servers.server1]
url = "http://127.0.0.1:9010"
[frontends]
[frontends.frontend]
backend = "backend"
[frontends.frontend.routes.test]
rule = "Host:traefik.acme.wtf"

View File

@@ -777,19 +777,21 @@ func listServices(ctx context.Context, dockerClient client.APIClient) ([]dockerD
for _, service := range serviceList {
dockerData := parseService(service, networkMap)
if len(dockerData.NetworkSettings.Networks) > 0 {
useSwarmLB, _ := strconv.ParseBool(getIsBackendLBSwarm(dockerData))
if useSwarmLB {
useSwarmLB, _ := strconv.ParseBool(getIsBackendLBSwarm(dockerData))
if useSwarmLB {
if len(dockerData.NetworkSettings.Networks) > 0 {
dockerDataList = append(dockerDataList, dockerData)
} else {
isGlobalSvc := service.Spec.Mode.Global != nil
dockerDataListTasks, err = listTasks(ctx, dockerClient, service.ID, dockerData, networkMap, isGlobalSvc)
for _, dockerDataTask := range dockerDataListTasks {
dockerDataList = append(dockerDataList, dockerDataTask)
}
}
} else {
isGlobalSvc := service.Spec.Mode.Global != nil
dockerDataListTasks, err = listTasks(ctx, dockerClient, service.ID, dockerData, networkMap, isGlobalSvc)
for _, dockerDataTask := range dockerDataListTasks {
dockerDataList = append(dockerDataList, dockerDataTask)
}
}
}
return dockerDataList, err
@@ -805,7 +807,10 @@ func parseService(service swarmtypes.Service, networkMap map[string]*dockertypes
if service.Spec.EndpointSpec != nil {
if service.Spec.EndpointSpec.Mode == swarmtypes.ResolutionModeDNSRR {
log.Warnf("Ignored endpoint-mode not supported, service name: %s", service.Spec.Annotations.Name)
useSwarmLB, _ := strconv.ParseBool(getIsBackendLBSwarm(dockerData))
if useSwarmLB {
log.Warnf("Ignored %s endpoint-mode not supported, service name: %s. Fallback to Træfik load balancing", swarmtypes.ResolutionModeDNSRR, service.Spec.Annotations.Name)
}
} else if service.Spec.EndpointSpec.Mode == swarmtypes.ResolutionModeVIP {
dockerData.NetworkSettings.Networks = make(map[string]*networkData)
for _, virtualIP := range service.Endpoint.VirtualIPs {

View File

@@ -773,6 +773,7 @@ type fakeServicesClient struct {
dockerVersion string
networks []dockertypes.NetworkResource
services []swarm.Service
tasks []swarm.Task
err error
}
@@ -788,10 +789,15 @@ func (c *fakeServicesClient) NetworkList(ctx context.Context, options dockertype
return c.networks, c.err
}
func (c *fakeServicesClient) TaskList(ctx context.Context, options dockertypes.TaskListOptions) ([]swarm.Task, error) {
return c.tasks, c.err
}
func TestListServices(t *testing.T) {
testCases := []struct {
desc string
services []swarm.Service
tasks []swarm.Task
dockerVersion string
networks []dockertypes.NetworkResource
expectedServices []string
@@ -813,7 +819,8 @@ func TestListServices(t *testing.T) {
swarmService(
serviceName("service2"),
serviceLabels(map[string]string{
labelDockerNetwork: "barnet",
labelDockerNetwork: "barnet",
labelBackendLoadBalancerSwarm: "true",
}),
withEndpointSpec(modeDNSSR)),
},
@@ -838,7 +845,8 @@ func TestListServices(t *testing.T) {
swarmService(
serviceName("service2"),
serviceLabels(map[string]string{
labelDockerNetwork: "barnet",
labelDockerNetwork: "barnet",
labelBackendLoadBalancerSwarm: "true",
}),
withEndpointSpec(modeDNSSR)),
},
@@ -867,13 +875,65 @@ func TestListServices(t *testing.T) {
"service1",
},
},
{
desc: "Should return service1 and service2",
services: []swarm.Service{
swarmService(
serviceName("service1"),
serviceLabels(map[string]string{
labelDockerNetwork: "barnet",
}),
withEndpointSpec(modeVIP),
withEndpoint(
virtualIP("yk6l57rfwizjzxxzftn4amaot", "10.11.12.13/24"),
virtualIP("2", "10.11.12.99/24"),
)),
swarmService(
serviceName("service2"),
serviceLabels(map[string]string{
labelDockerNetwork: "barnet",
}),
withEndpointSpec(modeDNSSR)),
},
tasks: []swarm.Task{
swarmTask("id1", taskStatus(taskState(swarm.TaskStateRunning))),
swarmTask("id2", taskStatus(taskState(swarm.TaskStateRunning))),
},
dockerVersion: "1.30",
networks: []dockertypes.NetworkResource{
{
Name: "network_name",
ID: "yk6l57rfwizjzxxzftn4amaot",
Created: time.Now(),
Scope: "swarm",
Driver: "overlay",
EnableIPv6: false,
Internal: true,
Ingress: false,
ConfigOnly: false,
Options: map[string]string{
"com.docker.network.driver.overlay.vxlanid_list": "4098",
"com.docker.network.enable_ipv6": "false",
},
Labels: map[string]string{
"com.docker.stack.namespace": "test",
},
},
},
expectedServices: []string{
"service1.0",
"service1.0",
"service2.0",
"service2.0",
},
},
}
for caseID, test := range testCases {
test := test
t.Run(strconv.Itoa(caseID), func(t *testing.T) {
t.Parallel()
dockerClient := &fakeServicesClient{services: test.services, dockerVersion: test.dockerVersion, networks: test.networks}
dockerClient := &fakeServicesClient{services: test.services, tasks: test.tasks, dockerVersion: test.dockerVersion, networks: test.networks}
serviceDockerData, _ := listServices(context.Background(), dockerClient)
assert.Equal(t, len(test.expectedServices), len(serviceDockerData))

View File

@@ -55,12 +55,12 @@ func (p *Provider) Provide(configurationChan chan<- types.ConfigMessage, pool *s
safe.Go(func() {
for t := range ticker.C {
log.Debug("Refreshing Provider " + t.String())
log.Debugf("Refreshing Provider %s", t.String())
configuration, err := p.buildConfiguration()
if err != nil {
log.Errorf("Failed to refresh Provider configuration, error: %s", err)
return
continue
}
configurationChan <- types.ConfigMessage{

View File

@@ -28,14 +28,6 @@ echo $VERSION | git commit --file -
echo $VERSION | git tag -a $VERSION --file -
git push -q --follow-tags -u origin master > /dev/null 2>&1
# create docker image emilevauge/traefik (compatibility)
echo "Updating docker emilevauge/traefik image..."
docker login -u $DOCKER_USER -p $DOCKER_PASS
docker tag containous/traefik emilevauge/traefik:latest
docker push emilevauge/traefik:latest
docker tag emilevauge/traefik:latest emilevauge/traefik:${VERSION}
docker push emilevauge/traefik:${VERSION}
cd ..
rm -Rf traefik-library-image/

View File

@@ -6,8 +6,6 @@ set -o nounset
echo "Prune dependencies"
dep prune
find vendor -name '*_test.go' -exec rm {} \;
find vendor -type f \( ! -iname 'licen[cs]e*' \
@@ -32,3 +30,28 @@ find vendor -type f \( ! -iname 'licen[cs]e*' \
-a ! -iname '*.hpp' \
-a ! -iname '*.hxx' \
-a ! -iname '*.s' \) -exec rm -f {} +
find -type d \( -iname '*Godeps*' \) -exec rm -rf {} +
find vendor -type l \( ! -iname 'licen[cs]e*' \
-a ! -iname '*notice*' \
-a ! -iname '*patent*' \
-a ! -iname '*copying*' \
-a ! -iname '*unlicense*' \
-a ! -iname '*copyright*' \
-a ! -iname '*copyleft*' \
-a ! -iname '*legal*' \
-a ! -iname 'disclaimer*' \
-a ! -iname 'third-party*' \
-a ! -iname 'thirdparty*' \
-a ! -iname '*.go' \
-a ! -iname '*.c' \
-a ! -iname '*.S' \
-a ! -iname '*.cc' \
-a ! -iname '*.cpp' \
-a ! -iname '*.cxx' \
-a ! -iname '*.h' \
-a ! -iname '*.hh' \
-a ! -iname '*.hpp' \
-a ! -iname '*.hxx' \
-a ! -iname '*.s' \) -exec rm -f {} +

View File

@@ -4,7 +4,7 @@ set -e
export SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
export DEST=.
TESTFLAGS="${TESTFLAGS} -test.timeout=9m -check.v"
TESTFLAGS="${TESTFLAGS} -test.timeout=20m -check.v"
if [ -n "$VERBOSE" ]; then
TESTFLAGS="${TESTFLAGS} -v"

View File

@@ -673,31 +673,27 @@ func (s *Server) createTLSConfig(entryPointName string, tlsOption *traefikTls.TL
}
if s.globalConfiguration.ACME != nil {
if _, ok := s.serverEntryPoints[s.globalConfiguration.ACME.EntryPoint]; ok {
if entryPointName == s.globalConfiguration.ACME.EntryPoint {
checkOnDemandDomain := func(domain string) bool {
routeMatch := &mux.RouteMatch{}
router := router.GetHandler()
match := router.Match(&http.Request{URL: &url.URL{}, Host: domain}, routeMatch)
if match && routeMatch.Route != nil {
return true
}
return false
if entryPointName == s.globalConfiguration.ACME.EntryPoint {
checkOnDemandDomain := func(domain string) bool {
routeMatch := &mux.RouteMatch{}
router := router.GetHandler()
match := router.Match(&http.Request{URL: &url.URL{}, Host: domain}, routeMatch)
if match && routeMatch.Route != nil {
return true
}
if s.leadership == nil {
err := s.globalConfiguration.ACME.CreateLocalConfig(config, &s.serverEntryPoints[entryPointName].certs, checkOnDemandDomain)
if err != nil {
return nil, err
}
} else {
err := s.globalConfiguration.ACME.CreateClusterConfig(s.leadership, config, &s.serverEntryPoints[entryPointName].certs, checkOnDemandDomain)
if err != nil {
return nil, err
}
return false
}
if s.leadership == nil {
err := s.globalConfiguration.ACME.CreateLocalConfig(config, &s.serverEntryPoints[entryPointName].certs, checkOnDemandDomain)
if err != nil {
return nil, err
}
} else {
err := s.globalConfiguration.ACME.CreateClusterConfig(s.leadership, config, &s.serverEntryPoints[entryPointName].certs, checkOnDemandDomain)
if err != nil {
return nil, err
}
}
} else {
return nil, errors.New("Unknown entrypoint " + s.globalConfiguration.ACME.EntryPoint + " for ACME configuration")
}
} else {
config.GetCertificate = s.serverEntryPoints[entryPointName].getCertificate
@@ -925,23 +921,23 @@ func (s *Server) loadConfig(configurations types.Configurations, globalConfigura
log.Debugf("Creating frontend %s", frontendName)
var frontendEntryPoints []string
for _, entryPointName := range frontend.EntryPoints {
if _, ok := serverEntryPoints[entryPointName]; !ok {
log.Errorf("Undefined entrypoint '%s' for frontend %s", entryPointName, frontendName)
} else {
frontendEntryPoints = append(frontendEntryPoints, entryPointName)
}
}
frontend.EntryPoints = frontendEntryPoints
if len(frontend.EntryPoints) == 0 {
log.Errorf("No entrypoint defined for frontend %s, defaultEntryPoints:%s", frontendName, globalConfiguration.DefaultEntryPoints)
log.Errorf("No entrypoint defined for frontend %s", frontendName)
log.Errorf("Skipping frontend %s...", frontendName)
continue frontend
}
var failedEntrypoints int
for _, entryPointName := range frontend.EntryPoints {
log.Debugf("Wiring frontend %s to entryPoint %s", frontendName, entryPointName)
if _, ok := serverEntryPoints[entryPointName]; !ok {
log.Errorf("Undefined entrypoint '%s' for frontend %s", entryPointName, frontendName)
failedEntrypoints++
if failedEntrypoints == len(frontend.EntryPoints) {
log.Errorf("Skipping frontend %s...", frontendName)
continue frontend
}
continue
}
newServerRoute := &serverRoute{route: serverEntryPoints[entryPointName].httpRouter.GetHandler().NewRoute().Name(frontendName)}
for routeName, route := range frontend.Routes {

View File

@@ -37,9 +37,9 @@
{{if $frontend.Redirect}}
[frontends."{{$frontendName}}".redirect]
entryPoint = "{{$frontend.RedirectEntryPoint}}"
regex = "{{$frontend.RedirectRegex}}"
replacement = "{{$frontend.RedirectReplacement}}"
entryPoint = "{{$frontend.Redirect.EntryPoint}}"
regex = "{{$frontend.Redirect.Regex}}"
replacement = "{{$frontend.Redirect.Replacement}}"
{{end}}
{{ if $frontend.Headers }}

View File

@@ -144,7 +144,12 @@ func (c *Certificate) AppendCertificates(certs map[string]*DomainsCertificates,
certKey := parsedCert.Subject.CommonName
if parsedCert.DNSNames != nil {
sort.Strings(parsedCert.DNSNames)
certKey += fmt.Sprintf("%s,%s", parsedCert.Subject.CommonName, strings.Join(parsedCert.DNSNames, ","))
for _, dnsName := range parsedCert.DNSNames {
if dnsName != parsedCert.Subject.CommonName {
certKey += fmt.Sprintf(",%s", dnsName)
}
}
}
certExists := false

20
vendor/cloud.google.com/go/cloud.go generated vendored
View File

@@ -1,20 +0,0 @@
// Copyright 2014 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package cloud is the root of the packages used to access Google Cloud
// Services. See https://godoc.org/cloud.google.com/go for a full list
// of sub-packages.
//
// This package documents how to authorize and authenticate the sub packages.
package cloud // import "cloud.google.com/go"

View File

@@ -0,0 +1,14 @@
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
Version 2, December 2004
Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
Everyone is permitted to copy and distribute verbatim or modified
copies of this license document, and changing it is allowed as long
as the name is changed.
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. You just DO WHAT THE FUCK YOU WANT TO.

View File

@@ -0,0 +1,14 @@
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
Version 2, December 2004
Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
Everyone is permitted to copy and distribute verbatim or modified
copies of this license document, and changing it is allowed as long
as the name is changed.
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. You just DO WHAT THE FUCK YOU WANT TO.

14
vendor/github.com/BurntSushi/toml/cmd/tomlv/COPYING generated vendored Normal file
View File

@@ -0,0 +1,14 @@
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
Version 2, December 2004
Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
Everyone is permitted to copy and distribute verbatim or modified
copies of this license document, and changing it is allowed as long
as the name is changed.
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. You just DO WHAT THE FUCK YOU WANT TO.

View File

@@ -1,13 +0,0 @@
package main
import (
"os"
"github.com/JamesClonk/vultr/cmd"
)
func main() {
cli := cmd.NewCLI()
cli.RegisterCommands()
cli.Run(os.Args)
}

View File

@@ -0,0 +1,27 @@
Copyright (c) 2012 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -88,7 +88,7 @@ type GzipResponseWriterWithCloseNotify struct {
*GzipResponseWriter
}
func (w *GzipResponseWriterWithCloseNotify) CloseNotify() <-chan bool {
func (w GzipResponseWriterWithCloseNotify) CloseNotify() <-chan bool {
return w.ResponseWriter.(http.CloseNotifier).CloseNotify()
}

View File

@@ -1,7 +0,0 @@
// Package sdk is the official AWS SDK for the Go programming language.
//
// See our Developer Guide for information for on getting started and using
// the SDK.
//
// https://github.com/aws/aws-sdk-go/wiki
package sdk

View File

@@ -1,5 +0,0 @@
// Package service contains automatically generated AWS clients.
package service
//go:generate go run -tags codegen ../private/model/cli/gen-api/main.go -path=../service ../models/apis/*/*/api-2.json
//go:generate gofmt -s -w ../service

View File

@@ -1,10 +1,14 @@
package staert
import (
"bytes"
"compress/gzip"
"encoding"
"encoding/base64"
"errors"
"fmt"
"io"
"io/ioutil"
"reflect"
"sort"
"strconv"
@@ -155,16 +159,32 @@ func decodeHook(fromType reflect.Type, toType reflect.Type, data interface{}) (i
return dataOutput, nil
} else if fromType.Kind() == reflect.String {
b, err := base64.StdEncoding.DecodeString(data.(string))
if err != nil {
return nil, err
}
return b, nil
return readCompressedData(data.(string), gzipReader, base64Reader)
}
}
return data, nil
}
func readCompressedData(data string, fs ...func(io.Reader) (io.Reader, error)) ([]byte, error) {
var err error
for _, f := range fs {
var reader io.Reader
reader, err = f(bytes.NewBufferString(data))
if err == nil {
return ioutil.ReadAll(reader)
}
}
return nil, err
}
func base64Reader(r io.Reader) (io.Reader, error) {
return base64.NewDecoder(base64.StdEncoding, r), nil
}
func gzipReader(r io.Reader) (io.Reader, error) {
return gzip.NewReader(r)
}
// StoreConfig stores the config into the KV Store
func (kv *KvSource) StoreConfig(config interface{}) error {
kvMap := map[string]string{}
@@ -263,7 +283,11 @@ func collateKvRecursive(objValue reflect.Value, kv map[string]string, key string
case reflect.Array, reflect.Slice:
// Byte slices get special treatment
if objValue.Type().Elem().Kind() == reflect.Uint8 {
kv[name] = base64.StdEncoding.EncodeToString(objValue.Bytes())
compressedData, err := writeCompressedData(objValue.Bytes())
if err != nil {
return err
}
kv[name] = compressedData
} else {
for i := 0; i < objValue.Len(); i++ {
name = key + "/" + strconv.Itoa(i)
@@ -286,6 +310,17 @@ func collateKvRecursive(objValue reflect.Value, kv map[string]string, key string
return nil
}
func writeCompressedData(data []byte) (string, error) {
var buffer bytes.Buffer
gzipWriter := gzip.NewWriter(&buffer)
_, err := gzipWriter.Write(data)
if err != nil {
return "", err
}
gzipWriter.Close()
return buffer.String(), nil
}
// ListRecursive lists all key value children under key
func (kv *KvSource) ListRecursive(key string, pairs map[string][]byte) error {
pairsN1, err := kv.List(key, nil)

View File

@@ -1,16 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package auth provides client role authentication for accessing keys in etcd.
package auth

View File

@@ -1,137 +0,0 @@
// Copyright 2017 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package auth
import (
"crypto/rsa"
"io/ioutil"
jwt "github.com/dgrijalva/jwt-go"
"golang.org/x/net/context"
)
type tokenJWT struct {
signMethod string
signKey *rsa.PrivateKey
verifyKey *rsa.PublicKey
}
func (t *tokenJWT) enable() {}
func (t *tokenJWT) disable() {}
func (t *tokenJWT) invalidateUser(string) {}
func (t *tokenJWT) genTokenPrefix() (string, error) { return "", nil }
func (t *tokenJWT) info(ctx context.Context, token string, rev uint64) (*AuthInfo, bool) {
// rev isn't used in JWT, it is only used in simple token
var (
username string
revision uint64
)
parsed, err := jwt.Parse(token, func(token *jwt.Token) (interface{}, error) {
return t.verifyKey, nil
})
switch err.(type) {
case nil:
if !parsed.Valid {
plog.Warningf("invalid jwt token: %s", token)
return nil, false
}
claims := parsed.Claims.(jwt.MapClaims)
username = claims["username"].(string)
revision = uint64(claims["revision"].(float64))
default:
plog.Warningf("failed to parse jwt token: %s", err)
return nil, false
}
return &AuthInfo{Username: username, Revision: revision}, true
}
func (t *tokenJWT) assign(ctx context.Context, username string, revision uint64) (string, error) {
// Future work: let a jwt token include permission information would be useful for
// permission checking in proxy side.
tk := jwt.NewWithClaims(jwt.GetSigningMethod(t.signMethod),
jwt.MapClaims{
"username": username,
"revision": revision,
})
token, err := tk.SignedString(t.signKey)
if err != nil {
plog.Debugf("failed to sign jwt token: %s", err)
return "", err
}
plog.Debugf("jwt token: %s", token)
return token, err
}
func prepareOpts(opts map[string]string) (jwtSignMethod, jwtPubKeyPath, jwtPrivKeyPath string, err error) {
for k, v := range opts {
switch k {
case "sign-method":
jwtSignMethod = v
case "pub-key":
jwtPubKeyPath = v
case "priv-key":
jwtPrivKeyPath = v
default:
plog.Errorf("unknown token specific option: %s", k)
return "", "", "", ErrInvalidAuthOpts
}
}
return jwtSignMethod, jwtPubKeyPath, jwtPrivKeyPath, nil
}
func newTokenProviderJWT(opts map[string]string) (*tokenJWT, error) {
jwtSignMethod, jwtPubKeyPath, jwtPrivKeyPath, err := prepareOpts(opts)
if err != nil {
return nil, ErrInvalidAuthOpts
}
t := &tokenJWT{}
t.signMethod = jwtSignMethod
verifyBytes, err := ioutil.ReadFile(jwtPubKeyPath)
if err != nil {
plog.Errorf("failed to read public key (%s) for jwt: %s", jwtPubKeyPath, err)
return nil, err
}
t.verifyKey, err = jwt.ParseRSAPublicKeyFromPEM(verifyBytes)
if err != nil {
plog.Errorf("failed to parse public key (%s): %s", jwtPubKeyPath, err)
return nil, err
}
signBytes, err := ioutil.ReadFile(jwtPrivKeyPath)
if err != nil {
plog.Errorf("failed to read private key (%s) for jwt: %s", jwtPrivKeyPath, err)
return nil, err
}
t.signKey, err = jwt.ParseRSAPrivateKeyFromPEM(signBytes)
if err != nil {
plog.Errorf("failed to parse private key (%s): %s", jwtPrivKeyPath, err)
return nil, err
}
return t, nil
}

View File

@@ -1,133 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package auth
import (
"github.com/coreos/etcd/auth/authpb"
"github.com/coreos/etcd/mvcc/backend"
"github.com/coreos/etcd/pkg/adt"
)
func getMergedPerms(tx backend.BatchTx, userName string) *unifiedRangePermissions {
user := getUser(tx, userName)
if user == nil {
plog.Errorf("invalid user name %s", userName)
return nil
}
readPerms := &adt.IntervalTree{}
writePerms := &adt.IntervalTree{}
for _, roleName := range user.Roles {
role := getRole(tx, roleName)
if role == nil {
continue
}
for _, perm := range role.KeyPermission {
var ivl adt.Interval
var rangeEnd []byte
if len(perm.RangeEnd) != 1 || perm.RangeEnd[0] != 0 {
rangeEnd = perm.RangeEnd
}
if len(perm.RangeEnd) != 0 {
ivl = adt.NewBytesAffineInterval(perm.Key, rangeEnd)
} else {
ivl = adt.NewBytesAffinePoint(perm.Key)
}
switch perm.PermType {
case authpb.READWRITE:
readPerms.Insert(ivl, struct{}{})
writePerms.Insert(ivl, struct{}{})
case authpb.READ:
readPerms.Insert(ivl, struct{}{})
case authpb.WRITE:
writePerms.Insert(ivl, struct{}{})
}
}
}
return &unifiedRangePermissions{
readPerms: readPerms,
writePerms: writePerms,
}
}
func checkKeyInterval(cachedPerms *unifiedRangePermissions, key, rangeEnd []byte, permtyp authpb.Permission_Type) bool {
if len(rangeEnd) == 1 && rangeEnd[0] == 0 {
rangeEnd = nil
}
ivl := adt.NewBytesAffineInterval(key, rangeEnd)
switch permtyp {
case authpb.READ:
return cachedPerms.readPerms.Contains(ivl)
case authpb.WRITE:
return cachedPerms.writePerms.Contains(ivl)
default:
plog.Panicf("unknown auth type: %v", permtyp)
}
return false
}
func checkKeyPoint(cachedPerms *unifiedRangePermissions, key []byte, permtyp authpb.Permission_Type) bool {
pt := adt.NewBytesAffinePoint(key)
switch permtyp {
case authpb.READ:
return cachedPerms.readPerms.Intersects(pt)
case authpb.WRITE:
return cachedPerms.writePerms.Intersects(pt)
default:
plog.Panicf("unknown auth type: %v", permtyp)
}
return false
}
func (as *authStore) isRangeOpPermitted(tx backend.BatchTx, userName string, key, rangeEnd []byte, permtyp authpb.Permission_Type) bool {
// assumption: tx is Lock()ed
_, ok := as.rangePermCache[userName]
if !ok {
perms := getMergedPerms(tx, userName)
if perms == nil {
plog.Errorf("failed to create a unified permission of user %s", userName)
return false
}
as.rangePermCache[userName] = perms
}
if len(rangeEnd) == 0 {
return checkKeyPoint(as.rangePermCache[userName], key, permtyp)
}
return checkKeyInterval(as.rangePermCache[userName], key, rangeEnd, permtyp)
}
func (as *authStore) clearCachedPerm() {
as.rangePermCache = make(map[string]*unifiedRangePermissions)
}
func (as *authStore) invalidateCachedPerm(userName string) {
delete(as.rangePermCache, userName)
}
type unifiedRangePermissions struct {
readPerms *adt.IntervalTree
writePerms *adt.IntervalTree
}

View File

@@ -1,220 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package auth
// CAUTION: This randum number based token mechanism is only for testing purpose.
// JWT based mechanism will be added in the near future.
import (
"crypto/rand"
"fmt"
"math/big"
"strconv"
"strings"
"sync"
"time"
"golang.org/x/net/context"
)
const (
letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
defaultSimpleTokenLength = 16
)
// var for testing purposes
var (
simpleTokenTTL = 5 * time.Minute
simpleTokenTTLResolution = 1 * time.Second
)
type simpleTokenTTLKeeper struct {
tokens map[string]time.Time
donec chan struct{}
stopc chan struct{}
deleteTokenFunc func(string)
mu *sync.Mutex
}
func (tm *simpleTokenTTLKeeper) stop() {
select {
case tm.stopc <- struct{}{}:
case <-tm.donec:
}
<-tm.donec
}
func (tm *simpleTokenTTLKeeper) addSimpleToken(token string) {
tm.tokens[token] = time.Now().Add(simpleTokenTTL)
}
func (tm *simpleTokenTTLKeeper) resetSimpleToken(token string) {
if _, ok := tm.tokens[token]; ok {
tm.tokens[token] = time.Now().Add(simpleTokenTTL)
}
}
func (tm *simpleTokenTTLKeeper) deleteSimpleToken(token string) {
delete(tm.tokens, token)
}
func (tm *simpleTokenTTLKeeper) run() {
tokenTicker := time.NewTicker(simpleTokenTTLResolution)
defer func() {
tokenTicker.Stop()
close(tm.donec)
}()
for {
select {
case <-tokenTicker.C:
nowtime := time.Now()
tm.mu.Lock()
for t, tokenendtime := range tm.tokens {
if nowtime.After(tokenendtime) {
tm.deleteTokenFunc(t)
delete(tm.tokens, t)
}
}
tm.mu.Unlock()
case <-tm.stopc:
return
}
}
}
type tokenSimple struct {
indexWaiter func(uint64) <-chan struct{}
simpleTokenKeeper *simpleTokenTTLKeeper
simpleTokensMu sync.Mutex
simpleTokens map[string]string // token -> username
}
func (t *tokenSimple) genTokenPrefix() (string, error) {
ret := make([]byte, defaultSimpleTokenLength)
for i := 0; i < defaultSimpleTokenLength; i++ {
bInt, err := rand.Int(rand.Reader, big.NewInt(int64(len(letters))))
if err != nil {
return "", err
}
ret[i] = letters[bInt.Int64()]
}
return string(ret), nil
}
func (t *tokenSimple) assignSimpleTokenToUser(username, token string) {
t.simpleTokensMu.Lock()
_, ok := t.simpleTokens[token]
if ok {
plog.Panicf("token %s is alredy used", token)
}
t.simpleTokens[token] = username
t.simpleTokenKeeper.addSimpleToken(token)
t.simpleTokensMu.Unlock()
}
func (t *tokenSimple) invalidateUser(username string) {
if t.simpleTokenKeeper == nil {
return
}
t.simpleTokensMu.Lock()
for token, name := range t.simpleTokens {
if strings.Compare(name, username) == 0 {
delete(t.simpleTokens, token)
t.simpleTokenKeeper.deleteSimpleToken(token)
}
}
t.simpleTokensMu.Unlock()
}
func (t *tokenSimple) enable() {
delf := func(tk string) {
if username, ok := t.simpleTokens[tk]; ok {
plog.Infof("deleting token %s for user %s", tk, username)
delete(t.simpleTokens, tk)
}
}
t.simpleTokenKeeper = &simpleTokenTTLKeeper{
tokens: make(map[string]time.Time),
donec: make(chan struct{}),
stopc: make(chan struct{}),
deleteTokenFunc: delf,
mu: &t.simpleTokensMu,
}
go t.simpleTokenKeeper.run()
}
func (t *tokenSimple) disable() {
t.simpleTokensMu.Lock()
tk := t.simpleTokenKeeper
t.simpleTokenKeeper = nil
t.simpleTokens = make(map[string]string) // invalidate all tokens
t.simpleTokensMu.Unlock()
if tk != nil {
tk.stop()
}
}
func (t *tokenSimple) info(ctx context.Context, token string, revision uint64) (*AuthInfo, bool) {
if !t.isValidSimpleToken(ctx, token) {
return nil, false
}
t.simpleTokensMu.Lock()
username, ok := t.simpleTokens[token]
if ok && t.simpleTokenKeeper != nil {
t.simpleTokenKeeper.resetSimpleToken(token)
}
t.simpleTokensMu.Unlock()
return &AuthInfo{Username: username, Revision: revision}, ok
}
func (t *tokenSimple) assign(ctx context.Context, username string, rev uint64) (string, error) {
// rev isn't used in simple token, it is only used in JWT
index := ctx.Value("index").(uint64)
simpleToken := ctx.Value("simpleToken").(string)
token := fmt.Sprintf("%s.%d", simpleToken, index)
t.assignSimpleTokenToUser(username, token)
return token, nil
}
func (t *tokenSimple) isValidSimpleToken(ctx context.Context, token string) bool {
splitted := strings.Split(token, ".")
if len(splitted) != 2 {
return false
}
index, err := strconv.Atoi(splitted[1])
if err != nil {
return false
}
select {
case <-t.indexWaiter(uint64(index)):
return true
case <-ctx.Done():
}
return false
}
func newTokenProviderSimple(indexWaiter func(uint64) <-chan struct{}) *tokenSimple {
return &tokenSimple{
simpleTokens: make(map[string]string),
indexWaiter: indexWaiter,
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,86 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package api
import (
"sync"
"github.com/coreos/etcd/version"
"github.com/coreos/go-semver/semver"
"github.com/coreos/pkg/capnslog"
)
type Capability string
const (
AuthCapability Capability = "auth"
V3rpcCapability Capability = "v3rpc"
)
var (
plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "etcdserver/api")
// capabilityMaps is a static map of version to capability map.
capabilityMaps = map[string]map[Capability]bool{
"3.0.0": {AuthCapability: true, V3rpcCapability: true},
"3.1.0": {AuthCapability: true, V3rpcCapability: true},
"3.2.0": {AuthCapability: true, V3rpcCapability: true},
}
enableMapMu sync.RWMutex
// enabledMap points to a map in capabilityMaps
enabledMap map[Capability]bool
curVersion *semver.Version
)
func init() {
enabledMap = map[Capability]bool{
AuthCapability: true,
V3rpcCapability: true,
}
}
// UpdateCapability updates the enabledMap when the cluster version increases.
func UpdateCapability(v *semver.Version) {
if v == nil {
// if recovered but version was never set by cluster
return
}
enableMapMu.Lock()
if curVersion != nil && !curVersion.LessThan(*v) {
enableMapMu.Unlock()
return
}
curVersion = v
enabledMap = capabilityMaps[curVersion.String()]
enableMapMu.Unlock()
plog.Infof("enabled capabilities for version %s", version.Cluster(v.String()))
}
func IsCapabilityEnabled(c Capability) bool {
enableMapMu.RLock()
defer enableMapMu.RUnlock()
if enabledMap == nil {
return false
}
return enabledMap[c]
}
func EnableCapability(c Capability) {
enableMapMu.Lock()
defer enableMapMu.Unlock()
enabledMap[c] = true
}

View File

@@ -1,41 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package api
import (
"github.com/coreos/etcd/etcdserver/membership"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/go-semver/semver"
)
// Cluster is an interface representing a collection of members in one etcd cluster.
type Cluster interface {
// ID returns the cluster ID
ID() types.ID
// ClientURLs returns an aggregate set of all URLs on which this
// cluster is listening for client requests
ClientURLs() []string
// Members returns a slice of members sorted by their ID
Members() []*membership.Member
// Member retrieves a particular member based on ID, or nil if the
// member does not exist in the cluster
Member(id types.ID) *membership.Member
// IsIDRemoved checks whether the given ID has been removed from this
// cluster at some point in the past
IsIDRemoved(id types.ID) bool
// Version is the cluster-wide minimum major.minor version.
Version() *semver.Version
}

View File

@@ -1,16 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package api manages the capabilities and features that are exposed to clients by the etcd cluster.
package api

View File

@@ -1,157 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v3rpc
import (
"github.com/coreos/etcd/etcdserver"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"golang.org/x/net/context"
)
type AuthServer struct {
authenticator etcdserver.Authenticator
}
func NewAuthServer(s *etcdserver.EtcdServer) *AuthServer {
return &AuthServer{authenticator: s}
}
func (as *AuthServer) AuthEnable(ctx context.Context, r *pb.AuthEnableRequest) (*pb.AuthEnableResponse, error) {
resp, err := as.authenticator.AuthEnable(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) AuthDisable(ctx context.Context, r *pb.AuthDisableRequest) (*pb.AuthDisableResponse, error) {
resp, err := as.authenticator.AuthDisable(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) Authenticate(ctx context.Context, r *pb.AuthenticateRequest) (*pb.AuthenticateResponse, error) {
resp, err := as.authenticator.Authenticate(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) RoleAdd(ctx context.Context, r *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, error) {
resp, err := as.authenticator.RoleAdd(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) RoleDelete(ctx context.Context, r *pb.AuthRoleDeleteRequest) (*pb.AuthRoleDeleteResponse, error) {
resp, err := as.authenticator.RoleDelete(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) RoleGet(ctx context.Context, r *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse, error) {
resp, err := as.authenticator.RoleGet(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) RoleList(ctx context.Context, r *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error) {
resp, err := as.authenticator.RoleList(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) RoleRevokePermission(ctx context.Context, r *pb.AuthRoleRevokePermissionRequest) (*pb.AuthRoleRevokePermissionResponse, error) {
resp, err := as.authenticator.RoleRevokePermission(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) RoleGrantPermission(ctx context.Context, r *pb.AuthRoleGrantPermissionRequest) (*pb.AuthRoleGrantPermissionResponse, error) {
resp, err := as.authenticator.RoleGrantPermission(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) UserAdd(ctx context.Context, r *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error) {
resp, err := as.authenticator.UserAdd(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) UserDelete(ctx context.Context, r *pb.AuthUserDeleteRequest) (*pb.AuthUserDeleteResponse, error) {
resp, err := as.authenticator.UserDelete(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) UserGet(ctx context.Context, r *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse, error) {
resp, err := as.authenticator.UserGet(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) UserList(ctx context.Context, r *pb.AuthUserListRequest) (*pb.AuthUserListResponse, error) {
resp, err := as.authenticator.UserList(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) UserGrantRole(ctx context.Context, r *pb.AuthUserGrantRoleRequest) (*pb.AuthUserGrantRoleResponse, error) {
resp, err := as.authenticator.UserGrantRole(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) UserRevokeRole(ctx context.Context, r *pb.AuthUserRevokeRoleRequest) (*pb.AuthUserRevokeRoleResponse, error) {
resp, err := as.authenticator.UserRevokeRole(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) UserChangePassword(ctx context.Context, r *pb.AuthUserChangePasswordRequest) (*pb.AuthUserChangePasswordResponse, error) {
resp, err := as.authenticator.UserChangePassword(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}

View File

@@ -1,34 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v3rpc
import "github.com/gogo/protobuf/proto"
type codec struct{}
func (c *codec) Marshal(v interface{}) ([]byte, error) {
b, err := proto.Marshal(v.(proto.Message))
sentBytes.Add(float64(len(b)))
return b, err
}
func (c *codec) Unmarshal(data []byte, v interface{}) error {
receivedBytes.Add(float64(len(data)))
return proto.Unmarshal(data, v.(proto.Message))
}
func (c *codec) String() string {
return "proto"
}

View File

@@ -1,53 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v3rpc
import (
"crypto/tls"
"math"
"github.com/coreos/etcd/etcdserver"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials"
"google.golang.org/grpc/grpclog"
)
const maxStreams = math.MaxUint32
func init() {
grpclog.SetLogger(plog)
}
func Server(s *etcdserver.EtcdServer, tls *tls.Config) *grpc.Server {
var opts []grpc.ServerOption
opts = append(opts, grpc.CustomCodec(&codec{}))
if tls != nil {
opts = append(opts, grpc.Creds(credentials.NewTLS(tls)))
}
opts = append(opts, grpc.UnaryInterceptor(newUnaryInterceptor(s)))
opts = append(opts, grpc.StreamInterceptor(newStreamInterceptor(s)))
opts = append(opts, grpc.MaxConcurrentStreams(maxStreams))
grpcServer := grpc.NewServer(opts...)
pb.RegisterKVServer(grpcServer, NewQuotaKVServer(s))
pb.RegisterWatchServer(grpcServer, NewWatchServer(s))
pb.RegisterLeaseServer(grpcServer, NewQuotaLeaseServer(s))
pb.RegisterClusterServer(grpcServer, NewClusterServer(s))
pb.RegisterAuthServer(grpcServer, NewAuthServer(s))
pb.RegisterMaintenanceServer(grpcServer, NewMaintenanceServer(s))
return grpcServer
}

View File

@@ -1,46 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v3rpc
import (
"github.com/coreos/etcd/etcdserver"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
)
type header struct {
clusterID int64
memberID int64
raftTimer etcdserver.RaftTimer
rev func() int64
}
func newHeader(s *etcdserver.EtcdServer) header {
return header{
clusterID: int64(s.Cluster().ID()),
memberID: int64(s.ID()),
raftTimer: s,
rev: func() int64 { return s.KV().Rev() },
}
}
// fill populates pb.ResponseHeader using etcdserver information
func (h *header) fill(rh *pb.ResponseHeader) {
rh.ClusterId = uint64(h.clusterID)
rh.MemberId = uint64(h.memberID)
rh.RaftTerm = h.raftTimer.Term()
if rh.Revision == 0 {
rh.Revision = h.rev()
}
}

View File

@@ -1,144 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v3rpc
import (
"sync"
"time"
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/etcdserver/api"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/raft"
prometheus "github.com/grpc-ecosystem/go-grpc-prometheus"
"golang.org/x/net/context"
"google.golang.org/grpc"
"google.golang.org/grpc/metadata"
)
const (
maxNoLeaderCnt = 3
)
type streamsMap struct {
mu sync.Mutex
streams map[grpc.ServerStream]struct{}
}
func newUnaryInterceptor(s *etcdserver.EtcdServer) grpc.UnaryServerInterceptor {
return func(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (resp interface{}, err error) {
if !api.IsCapabilityEnabled(api.V3rpcCapability) {
return nil, rpctypes.ErrGRPCNotCapable
}
md, ok := metadata.FromContext(ctx)
if ok {
if ks := md[rpctypes.MetadataRequireLeaderKey]; len(ks) > 0 && ks[0] == rpctypes.MetadataHasLeader {
if s.Leader() == types.ID(raft.None) {
return nil, rpctypes.ErrGRPCNoLeader
}
}
}
return prometheus.UnaryServerInterceptor(ctx, req, info, handler)
}
}
func newStreamInterceptor(s *etcdserver.EtcdServer) grpc.StreamServerInterceptor {
smap := monitorLeader(s)
return func(srv interface{}, ss grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error {
if !api.IsCapabilityEnabled(api.V3rpcCapability) {
return rpctypes.ErrGRPCNotCapable
}
md, ok := metadata.FromContext(ss.Context())
if ok {
if ks := md[rpctypes.MetadataRequireLeaderKey]; len(ks) > 0 && ks[0] == rpctypes.MetadataHasLeader {
if s.Leader() == types.ID(raft.None) {
return rpctypes.ErrGRPCNoLeader
}
cctx, cancel := context.WithCancel(ss.Context())
ss = serverStreamWithCtx{ctx: cctx, cancel: &cancel, ServerStream: ss}
smap.mu.Lock()
smap.streams[ss] = struct{}{}
smap.mu.Unlock()
defer func() {
smap.mu.Lock()
delete(smap.streams, ss)
smap.mu.Unlock()
cancel()
}()
}
}
return prometheus.StreamServerInterceptor(srv, ss, info, handler)
}
}
type serverStreamWithCtx struct {
grpc.ServerStream
ctx context.Context
cancel *context.CancelFunc
}
func (ssc serverStreamWithCtx) Context() context.Context { return ssc.ctx }
func monitorLeader(s *etcdserver.EtcdServer) *streamsMap {
smap := &streamsMap{
streams: make(map[grpc.ServerStream]struct{}),
}
go func() {
election := time.Duration(s.Cfg.TickMs) * time.Duration(s.Cfg.ElectionTicks) * time.Millisecond
noLeaderCnt := 0
for {
select {
case <-s.StopNotify():
return
case <-time.After(election):
if s.Leader() == types.ID(raft.None) {
noLeaderCnt++
} else {
noLeaderCnt = 0
}
// We are more conservative on canceling existing streams. Reconnecting streams
// cost much more than just rejecting new requests. So we wait until the member
// cannot find a leader for maxNoLeaderCnt election timeouts to cancel existing streams.
if noLeaderCnt >= maxNoLeaderCnt {
smap.mu.Lock()
for ss := range smap.streams {
if ssWithCtx, ok := ss.(serverStreamWithCtx); ok {
(*ssWithCtx.cancel)()
<-ss.Context().Done()
}
}
smap.streams = make(map[grpc.ServerStream]struct{})
smap.mu.Unlock()
}
}
}
}()
return smap
}

View File

@@ -1,259 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package v3rpc implements etcd v3 RPC system based on gRPC.
package v3rpc
import (
"sort"
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/pkg/capnslog"
"golang.org/x/net/context"
)
var (
plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "etcdserver/api/v3rpc")
// Max operations per txn list. For example, Txn.Success can have at most 128 operations,
// and Txn.Failure can have at most 128 operations.
MaxOpsPerTxn = 128
)
type kvServer struct {
hdr header
kv etcdserver.RaftKV
}
func NewKVServer(s *etcdserver.EtcdServer) pb.KVServer {
return &kvServer{hdr: newHeader(s), kv: s}
}
func (s *kvServer) Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeResponse, error) {
if err := checkRangeRequest(r); err != nil {
return nil, err
}
resp, err := s.kv.Range(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
if resp.Header == nil {
plog.Panic("unexpected nil resp.Header")
}
s.hdr.fill(resp.Header)
return resp, nil
}
func (s *kvServer) Put(ctx context.Context, r *pb.PutRequest) (*pb.PutResponse, error) {
if err := checkPutRequest(r); err != nil {
return nil, err
}
resp, err := s.kv.Put(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
if resp.Header == nil {
plog.Panic("unexpected nil resp.Header")
}
s.hdr.fill(resp.Header)
return resp, nil
}
func (s *kvServer) DeleteRange(ctx context.Context, r *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error) {
if err := checkDeleteRequest(r); err != nil {
return nil, err
}
resp, err := s.kv.DeleteRange(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
if resp.Header == nil {
plog.Panic("unexpected nil resp.Header")
}
s.hdr.fill(resp.Header)
return resp, nil
}
func (s *kvServer) Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse, error) {
if err := checkTxnRequest(r); err != nil {
return nil, err
}
resp, err := s.kv.Txn(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
if resp.Header == nil {
plog.Panic("unexpected nil resp.Header")
}
s.hdr.fill(resp.Header)
return resp, nil
}
func (s *kvServer) Compact(ctx context.Context, r *pb.CompactionRequest) (*pb.CompactionResponse, error) {
resp, err := s.kv.Compact(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
if resp.Header == nil {
plog.Panic("unexpected nil resp.Header")
}
s.hdr.fill(resp.Header)
return resp, nil
}
func checkRangeRequest(r *pb.RangeRequest) error {
if len(r.Key) == 0 {
return rpctypes.ErrGRPCEmptyKey
}
return nil
}
func checkPutRequest(r *pb.PutRequest) error {
if len(r.Key) == 0 {
return rpctypes.ErrGRPCEmptyKey
}
if r.IgnoreValue && len(r.Value) != 0 {
return rpctypes.ErrGRPCValueProvided
}
if r.IgnoreLease && r.Lease != 0 {
return rpctypes.ErrGRPCLeaseProvided
}
return nil
}
func checkDeleteRequest(r *pb.DeleteRangeRequest) error {
if len(r.Key) == 0 {
return rpctypes.ErrGRPCEmptyKey
}
return nil
}
func checkTxnRequest(r *pb.TxnRequest) error {
if len(r.Compare) > MaxOpsPerTxn || len(r.Success) > MaxOpsPerTxn || len(r.Failure) > MaxOpsPerTxn {
return rpctypes.ErrGRPCTooManyOps
}
for _, c := range r.Compare {
if len(c.Key) == 0 {
return rpctypes.ErrGRPCEmptyKey
}
}
for _, u := range r.Success {
if err := checkRequestOp(u); err != nil {
return err
}
}
if err := checkRequestDupKeys(r.Success); err != nil {
return err
}
for _, u := range r.Failure {
if err := checkRequestOp(u); err != nil {
return err
}
}
return checkRequestDupKeys(r.Failure)
}
// checkRequestDupKeys gives rpctypes.ErrGRPCDuplicateKey if the same key is modified twice
func checkRequestDupKeys(reqs []*pb.RequestOp) error {
// check put overlap
keys := make(map[string]struct{})
for _, requ := range reqs {
tv, ok := requ.Request.(*pb.RequestOp_RequestPut)
if !ok {
continue
}
preq := tv.RequestPut
if preq == nil {
continue
}
if _, ok := keys[string(preq.Key)]; ok {
return rpctypes.ErrGRPCDuplicateKey
}
keys[string(preq.Key)] = struct{}{}
}
// no need to check deletes if no puts; delete overlaps are permitted
if len(keys) == 0 {
return nil
}
// sort keys for range checking
sortedKeys := []string{}
for k := range keys {
sortedKeys = append(sortedKeys, k)
}
sort.Strings(sortedKeys)
// check put overlap with deletes
for _, requ := range reqs {
tv, ok := requ.Request.(*pb.RequestOp_RequestDeleteRange)
if !ok {
continue
}
dreq := tv.RequestDeleteRange
if dreq == nil {
continue
}
if dreq.RangeEnd == nil {
if _, found := keys[string(dreq.Key)]; found {
return rpctypes.ErrGRPCDuplicateKey
}
} else {
lo := sort.SearchStrings(sortedKeys, string(dreq.Key))
hi := sort.SearchStrings(sortedKeys, string(dreq.RangeEnd))
if lo != hi {
// element between lo and hi => overlap
return rpctypes.ErrGRPCDuplicateKey
}
}
}
return nil
}
func checkRequestOp(u *pb.RequestOp) error {
// TODO: ensure only one of the field is set.
switch uv := u.Request.(type) {
case *pb.RequestOp_RequestRange:
if uv.RequestRange != nil {
return checkRangeRequest(uv.RequestRange)
}
case *pb.RequestOp_RequestPut:
if uv.RequestPut != nil {
return checkPutRequest(uv.RequestPut)
}
case *pb.RequestOp_RequestDeleteRange:
if uv.RequestDeleteRange != nil {
return checkDeleteRequest(uv.RequestDeleteRange)
}
default:
// empty op / nil entry
return rpctypes.ErrGRPCKeyNotFound
}
return nil
}

View File

@@ -1,123 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v3rpc
import (
"io"
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/lease"
"golang.org/x/net/context"
)
type LeaseServer struct {
hdr header
le etcdserver.Lessor
}
func NewLeaseServer(s *etcdserver.EtcdServer) pb.LeaseServer {
return &LeaseServer{le: s, hdr: newHeader(s)}
}
func (ls *LeaseServer) LeaseGrant(ctx context.Context, cr *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error) {
resp, err := ls.le.LeaseGrant(ctx, cr)
if err != nil {
return nil, togRPCError(err)
}
ls.hdr.fill(resp.Header)
return resp, nil
}
func (ls *LeaseServer) LeaseRevoke(ctx context.Context, rr *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error) {
resp, err := ls.le.LeaseRevoke(ctx, rr)
if err != nil {
return nil, togRPCError(err)
}
ls.hdr.fill(resp.Header)
return resp, nil
}
func (ls *LeaseServer) LeaseTimeToLive(ctx context.Context, rr *pb.LeaseTimeToLiveRequest) (*pb.LeaseTimeToLiveResponse, error) {
resp, err := ls.le.LeaseTimeToLive(ctx, rr)
if err != nil && err != lease.ErrLeaseNotFound {
return nil, togRPCError(err)
}
if err == lease.ErrLeaseNotFound {
resp = &pb.LeaseTimeToLiveResponse{
Header: &pb.ResponseHeader{},
ID: rr.ID,
TTL: -1,
}
}
ls.hdr.fill(resp.Header)
return resp, nil
}
func (ls *LeaseServer) LeaseKeepAlive(stream pb.Lease_LeaseKeepAliveServer) (err error) {
errc := make(chan error, 1)
go func() {
errc <- ls.leaseKeepAlive(stream)
}()
select {
case err = <-errc:
case <-stream.Context().Done():
// the only server-side cancellation is noleader for now.
err = stream.Context().Err()
if err == context.Canceled {
err = rpctypes.ErrGRPCNoLeader
}
}
return err
}
func (ls *LeaseServer) leaseKeepAlive(stream pb.Lease_LeaseKeepAliveServer) error {
for {
req, err := stream.Recv()
if err == io.EOF {
return nil
}
if err != nil {
return err
}
// Create header before we sent out the renew request.
// This can make sure that the revision is strictly smaller or equal to
// when the keepalive happened at the local server (when the local server is the leader)
// or remote leader.
// Without this, a lease might be revoked at rev 3 but client can see the keepalive succeeded
// at rev 4.
resp := &pb.LeaseKeepAliveResponse{ID: req.ID, Header: &pb.ResponseHeader{}}
ls.hdr.fill(resp.Header)
ttl, err := ls.le.LeaseRenew(stream.Context(), lease.LeaseID(req.ID))
if err == lease.ErrLeaseNotFound {
err = nil
ttl = 0
}
if err != nil {
return togRPCError(err)
}
resp.TTL = ttl
err = stream.Send(resp)
if err != nil {
return err
}
}
}

View File

@@ -1,190 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v3rpc
import (
"crypto/sha256"
"io"
"github.com/coreos/etcd/auth"
"github.com/coreos/etcd/etcdserver"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/mvcc"
"github.com/coreos/etcd/mvcc/backend"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/version"
"golang.org/x/net/context"
)
type KVGetter interface {
KV() mvcc.ConsistentWatchableKV
}
type BackendGetter interface {
Backend() backend.Backend
}
type Alarmer interface {
Alarm(ctx context.Context, ar *pb.AlarmRequest) (*pb.AlarmResponse, error)
}
type RaftStatusGetter interface {
Index() uint64
Term() uint64
Leader() types.ID
}
type AuthGetter interface {
AuthInfoFromCtx(ctx context.Context) (*auth.AuthInfo, error)
AuthStore() auth.AuthStore
}
type maintenanceServer struct {
rg RaftStatusGetter
kg KVGetter
bg BackendGetter
a Alarmer
hdr header
}
func NewMaintenanceServer(s *etcdserver.EtcdServer) pb.MaintenanceServer {
srv := &maintenanceServer{rg: s, kg: s, bg: s, a: s, hdr: newHeader(s)}
return &authMaintenanceServer{srv, s}
}
func (ms *maintenanceServer) Defragment(ctx context.Context, sr *pb.DefragmentRequest) (*pb.DefragmentResponse, error) {
plog.Noticef("starting to defragment the storage backend...")
err := ms.bg.Backend().Defrag()
if err != nil {
plog.Errorf("failed to defragment the storage backend (%v)", err)
return nil, err
}
plog.Noticef("finished defragmenting the storage backend")
return &pb.DefragmentResponse{}, nil
}
func (ms *maintenanceServer) Snapshot(sr *pb.SnapshotRequest, srv pb.Maintenance_SnapshotServer) error {
snap := ms.bg.Backend().Snapshot()
pr, pw := io.Pipe()
defer pr.Close()
go func() {
snap.WriteTo(pw)
if err := snap.Close(); err != nil {
plog.Errorf("error closing snapshot (%v)", err)
}
pw.Close()
}()
// send file data
h := sha256.New()
br := int64(0)
buf := make([]byte, 32*1024)
sz := snap.Size()
for br < sz {
n, err := io.ReadFull(pr, buf)
if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
return togRPCError(err)
}
br += int64(n)
resp := &pb.SnapshotResponse{
RemainingBytes: uint64(sz - br),
Blob: buf[:n],
}
if err = srv.Send(resp); err != nil {
return togRPCError(err)
}
h.Write(buf[:n])
}
// send sha
sha := h.Sum(nil)
hresp := &pb.SnapshotResponse{RemainingBytes: 0, Blob: sha}
if err := srv.Send(hresp); err != nil {
return togRPCError(err)
}
return nil
}
func (ms *maintenanceServer) Hash(ctx context.Context, r *pb.HashRequest) (*pb.HashResponse, error) {
h, rev, err := ms.kg.KV().Hash()
if err != nil {
return nil, togRPCError(err)
}
resp := &pb.HashResponse{Header: &pb.ResponseHeader{Revision: rev}, Hash: h}
ms.hdr.fill(resp.Header)
return resp, nil
}
func (ms *maintenanceServer) Alarm(ctx context.Context, ar *pb.AlarmRequest) (*pb.AlarmResponse, error) {
return ms.a.Alarm(ctx, ar)
}
func (ms *maintenanceServer) Status(ctx context.Context, ar *pb.StatusRequest) (*pb.StatusResponse, error) {
resp := &pb.StatusResponse{
Header: &pb.ResponseHeader{Revision: ms.hdr.rev()},
Version: version.Version,
DbSize: ms.bg.Backend().Size(),
Leader: uint64(ms.rg.Leader()),
RaftIndex: ms.rg.Index(),
RaftTerm: ms.rg.Term(),
}
ms.hdr.fill(resp.Header)
return resp, nil
}
type authMaintenanceServer struct {
*maintenanceServer
ag AuthGetter
}
func (ams *authMaintenanceServer) isAuthenticated(ctx context.Context) error {
authInfo, err := ams.ag.AuthInfoFromCtx(ctx)
if err != nil {
return err
}
return ams.ag.AuthStore().IsAdminPermitted(authInfo)
}
func (ams *authMaintenanceServer) Defragment(ctx context.Context, sr *pb.DefragmentRequest) (*pb.DefragmentResponse, error) {
if err := ams.isAuthenticated(ctx); err != nil {
return nil, err
}
return ams.maintenanceServer.Defragment(ctx, sr)
}
func (ams *authMaintenanceServer) Snapshot(sr *pb.SnapshotRequest, srv pb.Maintenance_SnapshotServer) error {
if err := ams.isAuthenticated(srv.Context()); err != nil {
return err
}
return ams.maintenanceServer.Snapshot(sr, srv)
}
func (ams *authMaintenanceServer) Hash(ctx context.Context, r *pb.HashRequest) (*pb.HashResponse, error) {
if err := ams.isAuthenticated(ctx); err != nil {
return nil, err
}
return ams.maintenanceServer.Hash(ctx, r)
}
func (ams *authMaintenanceServer) Status(ctx context.Context, ar *pb.StatusRequest) (*pb.StatusResponse, error) {
return ams.maintenanceServer.Status(ctx, ar)
}

View File

@@ -1,103 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v3rpc
import (
"time"
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/etcdserver/api"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/etcdserver/membership"
"github.com/coreos/etcd/pkg/types"
"golang.org/x/net/context"
)
type ClusterServer struct {
cluster api.Cluster
server etcdserver.Server
raftTimer etcdserver.RaftTimer
}
func NewClusterServer(s *etcdserver.EtcdServer) *ClusterServer {
return &ClusterServer{
cluster: s.Cluster(),
server: s,
raftTimer: s,
}
}
func (cs *ClusterServer) MemberAdd(ctx context.Context, r *pb.MemberAddRequest) (*pb.MemberAddResponse, error) {
urls, err := types.NewURLs(r.PeerURLs)
if err != nil {
return nil, rpctypes.ErrGRPCMemberBadURLs
}
now := time.Now()
m := membership.NewMember("", urls, "", &now)
membs, merr := cs.server.AddMember(ctx, *m)
if merr != nil {
return nil, togRPCError(merr)
}
return &pb.MemberAddResponse{
Header: cs.header(),
Member: &pb.Member{ID: uint64(m.ID), PeerURLs: m.PeerURLs},
Members: membersToProtoMembers(membs),
}, nil
}
func (cs *ClusterServer) MemberRemove(ctx context.Context, r *pb.MemberRemoveRequest) (*pb.MemberRemoveResponse, error) {
membs, err := cs.server.RemoveMember(ctx, r.ID)
if err != nil {
return nil, togRPCError(err)
}
return &pb.MemberRemoveResponse{Header: cs.header(), Members: membersToProtoMembers(membs)}, nil
}
func (cs *ClusterServer) MemberUpdate(ctx context.Context, r *pb.MemberUpdateRequest) (*pb.MemberUpdateResponse, error) {
m := membership.Member{
ID: types.ID(r.ID),
RaftAttributes: membership.RaftAttributes{PeerURLs: r.PeerURLs},
}
membs, err := cs.server.UpdateMember(ctx, m)
if err != nil {
return nil, togRPCError(err)
}
return &pb.MemberUpdateResponse{Header: cs.header(), Members: membersToProtoMembers(membs)}, nil
}
func (cs *ClusterServer) MemberList(ctx context.Context, r *pb.MemberListRequest) (*pb.MemberListResponse, error) {
membs := membersToProtoMembers(cs.cluster.Members())
return &pb.MemberListResponse{Header: cs.header(), Members: membs}, nil
}
func (cs *ClusterServer) header() *pb.ResponseHeader {
return &pb.ResponseHeader{ClusterId: uint64(cs.cluster.ID()), MemberId: uint64(cs.server.ID()), RaftTerm: cs.raftTimer.Term()}
}
func membersToProtoMembers(membs []*membership.Member) []*pb.Member {
protoMembs := make([]*pb.Member, len(membs))
for i := range membs {
protoMembs[i] = &pb.Member{
Name: membs[i].Name,
ID: uint64(membs[i].ID),
PeerURLs: membs[i].PeerURLs,
ClientURLs: membs[i].ClientURLs,
}
}
return protoMembs
}

View File

@@ -1,38 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v3rpc
import "github.com/prometheus/client_golang/prometheus"
var (
sentBytes = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "network",
Name: "client_grpc_sent_bytes_total",
Help: "The total number of bytes sent to grpc clients.",
})
receivedBytes = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "network",
Name: "client_grpc_received_bytes_total",
Help: "The total number of bytes received from grpc clients.",
})
)
func init() {
prometheus.MustRegister(sentBytes)
prometheus.MustRegister(receivedBytes)
}

View File

@@ -1,89 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v3rpc
import (
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/pkg/types"
"golang.org/x/net/context"
)
type quotaKVServer struct {
pb.KVServer
qa quotaAlarmer
}
type quotaAlarmer struct {
q etcdserver.Quota
a Alarmer
id types.ID
}
// check whether request satisfies the quota. If there is not enough space,
// ignore request and raise the free space alarm.
func (qa *quotaAlarmer) check(ctx context.Context, r interface{}) error {
if qa.q.Available(r) {
return nil
}
req := &pb.AlarmRequest{
MemberID: uint64(qa.id),
Action: pb.AlarmRequest_ACTIVATE,
Alarm: pb.AlarmType_NOSPACE,
}
qa.a.Alarm(ctx, req)
return rpctypes.ErrGRPCNoSpace
}
func NewQuotaKVServer(s *etcdserver.EtcdServer) pb.KVServer {
return &quotaKVServer{
NewKVServer(s),
quotaAlarmer{etcdserver.NewBackendQuota(s), s, s.ID()},
}
}
func (s *quotaKVServer) Put(ctx context.Context, r *pb.PutRequest) (*pb.PutResponse, error) {
if err := s.qa.check(ctx, r); err != nil {
return nil, err
}
return s.KVServer.Put(ctx, r)
}
func (s *quotaKVServer) Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse, error) {
if err := s.qa.check(ctx, r); err != nil {
return nil, err
}
return s.KVServer.Txn(ctx, r)
}
type quotaLeaseServer struct {
pb.LeaseServer
qa quotaAlarmer
}
func (s *quotaLeaseServer) LeaseGrant(ctx context.Context, cr *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error) {
if err := s.qa.check(ctx, cr); err != nil {
return nil, err
}
return s.LeaseServer.LeaseGrant(ctx, cr)
}
func NewQuotaLeaseServer(s *etcdserver.EtcdServer) pb.LeaseServer {
return &quotaLeaseServer{
NewLeaseServer(s),
quotaAlarmer{etcdserver.NewBackendQuota(s), s, s.ID()},
}
}

View File

@@ -1,103 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v3rpc
import (
"github.com/coreos/etcd/auth"
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
"github.com/coreos/etcd/etcdserver/membership"
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/mvcc"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
)
func togRPCError(err error) error {
switch err {
case membership.ErrIDRemoved:
return rpctypes.ErrGRPCMemberNotFound
case membership.ErrIDNotFound:
return rpctypes.ErrGRPCMemberNotFound
case membership.ErrIDExists:
return rpctypes.ErrGRPCMemberExist
case membership.ErrPeerURLexists:
return rpctypes.ErrGRPCPeerURLExist
case etcdserver.ErrNotEnoughStartedMembers:
return rpctypes.ErrMemberNotEnoughStarted
case mvcc.ErrCompacted:
return rpctypes.ErrGRPCCompacted
case mvcc.ErrFutureRev:
return rpctypes.ErrGRPCFutureRev
case etcdserver.ErrRequestTooLarge:
return rpctypes.ErrGRPCRequestTooLarge
case etcdserver.ErrNoSpace:
return rpctypes.ErrGRPCNoSpace
case etcdserver.ErrTooManyRequests:
return rpctypes.ErrTooManyRequests
case etcdserver.ErrNoLeader:
return rpctypes.ErrGRPCNoLeader
case etcdserver.ErrStopped:
return rpctypes.ErrGRPCStopped
case etcdserver.ErrTimeout:
return rpctypes.ErrGRPCTimeout
case etcdserver.ErrTimeoutDueToLeaderFail:
return rpctypes.ErrGRPCTimeoutDueToLeaderFail
case etcdserver.ErrTimeoutDueToConnectionLost:
return rpctypes.ErrGRPCTimeoutDueToConnectionLost
case etcdserver.ErrUnhealthy:
return rpctypes.ErrGRPCUnhealthy
case etcdserver.ErrKeyNotFound:
return rpctypes.ErrGRPCKeyNotFound
case lease.ErrLeaseNotFound:
return rpctypes.ErrGRPCLeaseNotFound
case lease.ErrLeaseExists:
return rpctypes.ErrGRPCLeaseExist
case auth.ErrRootUserNotExist:
return rpctypes.ErrGRPCRootUserNotExist
case auth.ErrRootRoleNotExist:
return rpctypes.ErrGRPCRootRoleNotExist
case auth.ErrUserAlreadyExist:
return rpctypes.ErrGRPCUserAlreadyExist
case auth.ErrUserEmpty:
return rpctypes.ErrGRPCUserEmpty
case auth.ErrUserNotFound:
return rpctypes.ErrGRPCUserNotFound
case auth.ErrRoleAlreadyExist:
return rpctypes.ErrGRPCRoleAlreadyExist
case auth.ErrRoleNotFound:
return rpctypes.ErrGRPCRoleNotFound
case auth.ErrAuthFailed:
return rpctypes.ErrGRPCAuthFailed
case auth.ErrPermissionDenied:
return rpctypes.ErrGRPCPermissionDenied
case auth.ErrRoleNotGranted:
return rpctypes.ErrGRPCRoleNotGranted
case auth.ErrPermissionNotGranted:
return rpctypes.ErrGRPCPermissionNotGranted
case auth.ErrAuthNotEnabled:
return rpctypes.ErrGRPCAuthNotEnabled
case auth.ErrInvalidAuthToken:
return rpctypes.ErrGRPCInvalidAuthToken
case auth.ErrInvalidAuthMgmt:
return rpctypes.ErrGRPCInvalidAuthMgmt
default:
return grpc.Errorf(codes.Unknown, err.Error())
}
}

View File

@@ -1,426 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v3rpc
import (
"io"
"sync"
"time"
"golang.org/x/net/context"
"github.com/coreos/etcd/auth"
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/mvcc"
"github.com/coreos/etcd/mvcc/mvccpb"
)
type watchServer struct {
clusterID int64
memberID int64
raftTimer etcdserver.RaftTimer
watchable mvcc.WatchableKV
ag AuthGetter
}
func NewWatchServer(s *etcdserver.EtcdServer) pb.WatchServer {
return &watchServer{
clusterID: int64(s.Cluster().ID()),
memberID: int64(s.ID()),
raftTimer: s,
watchable: s.Watchable(),
ag: s,
}
}
var (
// External test can read this with GetProgressReportInterval()
// and change this to a small value to finish fast with
// SetProgressReportInterval().
progressReportInterval = 10 * time.Minute
progressReportIntervalMu sync.RWMutex
)
func GetProgressReportInterval() time.Duration {
progressReportIntervalMu.RLock()
defer progressReportIntervalMu.RUnlock()
return progressReportInterval
}
func SetProgressReportInterval(newTimeout time.Duration) {
progressReportIntervalMu.Lock()
defer progressReportIntervalMu.Unlock()
progressReportInterval = newTimeout
}
const (
// We send ctrl response inside the read loop. We do not want
// send to block read, but we still want ctrl response we sent to
// be serialized. Thus we use a buffered chan to solve the problem.
// A small buffer should be OK for most cases, since we expect the
// ctrl requests are infrequent.
ctrlStreamBufLen = 16
)
// serverWatchStream is an etcd server side stream. It receives requests
// from client side gRPC stream. It receives watch events from mvcc.WatchStream,
// and creates responses that forwarded to gRPC stream.
// It also forwards control message like watch created and canceled.
type serverWatchStream struct {
clusterID int64
memberID int64
raftTimer etcdserver.RaftTimer
watchable mvcc.WatchableKV
gRPCStream pb.Watch_WatchServer
watchStream mvcc.WatchStream
ctrlStream chan *pb.WatchResponse
// mu protects progress, prevKV
mu sync.Mutex
// progress tracks the watchID that stream might need to send
// progress to.
// TODO: combine progress and prevKV into a single struct?
progress map[mvcc.WatchID]bool
prevKV map[mvcc.WatchID]bool
// closec indicates the stream is closed.
closec chan struct{}
// wg waits for the send loop to complete
wg sync.WaitGroup
ag AuthGetter
}
func (ws *watchServer) Watch(stream pb.Watch_WatchServer) (err error) {
sws := serverWatchStream{
clusterID: ws.clusterID,
memberID: ws.memberID,
raftTimer: ws.raftTimer,
watchable: ws.watchable,
gRPCStream: stream,
watchStream: ws.watchable.NewWatchStream(),
// chan for sending control response like watcher created and canceled.
ctrlStream: make(chan *pb.WatchResponse, ctrlStreamBufLen),
progress: make(map[mvcc.WatchID]bool),
prevKV: make(map[mvcc.WatchID]bool),
closec: make(chan struct{}),
ag: ws.ag,
}
sws.wg.Add(1)
go func() {
sws.sendLoop()
sws.wg.Done()
}()
errc := make(chan error, 1)
// Ideally recvLoop would also use sws.wg to signal its completion
// but when stream.Context().Done() is closed, the stream's recv
// may continue to block since it uses a different context, leading to
// deadlock when calling sws.close().
go func() {
if rerr := sws.recvLoop(); rerr != nil {
errc <- rerr
}
}()
select {
case err = <-errc:
close(sws.ctrlStream)
case <-stream.Context().Done():
err = stream.Context().Err()
// the only server-side cancellation is noleader for now.
if err == context.Canceled {
err = rpctypes.ErrGRPCNoLeader
}
}
sws.close()
return err
}
func (sws *serverWatchStream) isWatchPermitted(wcr *pb.WatchCreateRequest) bool {
authInfo, err := sws.ag.AuthInfoFromCtx(sws.gRPCStream.Context())
if err != nil {
return false
}
if authInfo == nil {
// if auth is enabled, IsRangePermitted() can cause an error
authInfo = &auth.AuthInfo{}
}
return sws.ag.AuthStore().IsRangePermitted(authInfo, wcr.Key, wcr.RangeEnd) == nil
}
func (sws *serverWatchStream) recvLoop() error {
for {
req, err := sws.gRPCStream.Recv()
if err == io.EOF {
return nil
}
if err != nil {
return err
}
switch uv := req.RequestUnion.(type) {
case *pb.WatchRequest_CreateRequest:
if uv.CreateRequest == nil {
break
}
creq := uv.CreateRequest
if len(creq.Key) == 0 {
// \x00 is the smallest key
creq.Key = []byte{0}
}
if len(creq.RangeEnd) == 0 {
// force nil since watchstream.Watch distinguishes
// between nil and []byte{} for single key / >=
creq.RangeEnd = nil
}
if len(creq.RangeEnd) == 1 && creq.RangeEnd[0] == 0 {
// support >= key queries
creq.RangeEnd = []byte{}
}
if !sws.isWatchPermitted(creq) {
wr := &pb.WatchResponse{
Header: sws.newResponseHeader(sws.watchStream.Rev()),
WatchId: -1,
Canceled: true,
Created: true,
CancelReason: rpctypes.ErrGRPCPermissionDenied.Error(),
}
select {
case sws.ctrlStream <- wr:
case <-sws.closec:
}
return nil
}
filters := FiltersFromRequest(creq)
wsrev := sws.watchStream.Rev()
rev := creq.StartRevision
if rev == 0 {
rev = wsrev + 1
}
id := sws.watchStream.Watch(creq.Key, creq.RangeEnd, rev, filters...)
if id != -1 {
sws.mu.Lock()
if creq.ProgressNotify {
sws.progress[id] = true
}
if creq.PrevKv {
sws.prevKV[id] = true
}
sws.mu.Unlock()
}
wr := &pb.WatchResponse{
Header: sws.newResponseHeader(wsrev),
WatchId: int64(id),
Created: true,
Canceled: id == -1,
}
select {
case sws.ctrlStream <- wr:
case <-sws.closec:
return nil
}
case *pb.WatchRequest_CancelRequest:
if uv.CancelRequest != nil {
id := uv.CancelRequest.WatchId
err := sws.watchStream.Cancel(mvcc.WatchID(id))
if err == nil {
sws.ctrlStream <- &pb.WatchResponse{
Header: sws.newResponseHeader(sws.watchStream.Rev()),
WatchId: id,
Canceled: true,
}
sws.mu.Lock()
delete(sws.progress, mvcc.WatchID(id))
delete(sws.prevKV, mvcc.WatchID(id))
sws.mu.Unlock()
}
}
default:
// we probably should not shutdown the entire stream when
// receive an valid command.
// so just do nothing instead.
continue
}
}
}
func (sws *serverWatchStream) sendLoop() {
// watch ids that are currently active
ids := make(map[mvcc.WatchID]struct{})
// watch responses pending on a watch id creation message
pending := make(map[mvcc.WatchID][]*pb.WatchResponse)
interval := GetProgressReportInterval()
progressTicker := time.NewTicker(interval)
defer func() {
progressTicker.Stop()
// drain the chan to clean up pending events
for ws := range sws.watchStream.Chan() {
mvcc.ReportEventReceived(len(ws.Events))
}
for _, wrs := range pending {
for _, ws := range wrs {
mvcc.ReportEventReceived(len(ws.Events))
}
}
}()
for {
select {
case wresp, ok := <-sws.watchStream.Chan():
if !ok {
return
}
// TODO: evs is []mvccpb.Event type
// either return []*mvccpb.Event from the mvcc package
// or define protocol buffer with []mvccpb.Event.
evs := wresp.Events
events := make([]*mvccpb.Event, len(evs))
sws.mu.Lock()
needPrevKV := sws.prevKV[wresp.WatchID]
sws.mu.Unlock()
for i := range evs {
events[i] = &evs[i]
if needPrevKV {
opt := mvcc.RangeOptions{Rev: evs[i].Kv.ModRevision - 1}
r, err := sws.watchable.Range(evs[i].Kv.Key, nil, opt)
if err == nil && len(r.KVs) != 0 {
events[i].PrevKv = &(r.KVs[0])
}
}
}
wr := &pb.WatchResponse{
Header: sws.newResponseHeader(wresp.Revision),
WatchId: int64(wresp.WatchID),
Events: events,
CompactRevision: wresp.CompactRevision,
}
if _, hasId := ids[wresp.WatchID]; !hasId {
// buffer if id not yet announced
wrs := append(pending[wresp.WatchID], wr)
pending[wresp.WatchID] = wrs
continue
}
mvcc.ReportEventReceived(len(evs))
if err := sws.gRPCStream.Send(wr); err != nil {
return
}
sws.mu.Lock()
if len(evs) > 0 && sws.progress[wresp.WatchID] {
// elide next progress update if sent a key update
sws.progress[wresp.WatchID] = false
}
sws.mu.Unlock()
case c, ok := <-sws.ctrlStream:
if !ok {
return
}
if err := sws.gRPCStream.Send(c); err != nil {
return
}
// track id creation
wid := mvcc.WatchID(c.WatchId)
if c.Canceled {
delete(ids, wid)
continue
}
if c.Created {
// flush buffered events
ids[wid] = struct{}{}
for _, v := range pending[wid] {
mvcc.ReportEventReceived(len(v.Events))
if err := sws.gRPCStream.Send(v); err != nil {
return
}
}
delete(pending, wid)
}
case <-progressTicker.C:
sws.mu.Lock()
for id, ok := range sws.progress {
if ok {
sws.watchStream.RequestProgress(id)
}
sws.progress[id] = true
}
sws.mu.Unlock()
case <-sws.closec:
return
}
}
}
func (sws *serverWatchStream) close() {
sws.watchStream.Close()
close(sws.closec)
sws.wg.Wait()
}
func (sws *serverWatchStream) newResponseHeader(rev int64) *pb.ResponseHeader {
return &pb.ResponseHeader{
ClusterId: uint64(sws.clusterID),
MemberId: uint64(sws.memberID),
Revision: rev,
RaftTerm: sws.raftTimer.Term(),
}
}
func filterNoDelete(e mvccpb.Event) bool {
return e.Type == mvccpb.DELETE
}
func filterNoPut(e mvccpb.Event) bool {
return e.Type == mvccpb.PUT
}
func FiltersFromRequest(creq *pb.WatchCreateRequest) []mvcc.FilterFunc {
filters := make([]mvcc.FilterFunc, 0, len(creq.Filters))
for _, ft := range creq.Filters {
switch ft {
case pb.WatchCreateRequest_NOPUT:
filters = append(filters, filterNoPut)
case pb.WatchCreateRequest_NODELETE:
filters = append(filters, filterNoDelete)
default:
}
}
return filters
}

View File

@@ -1,878 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"bytes"
"sort"
"time"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/mvcc"
"github.com/coreos/etcd/mvcc/mvccpb"
"github.com/coreos/etcd/pkg/types"
"github.com/gogo/protobuf/proto"
"golang.org/x/net/context"
)
const (
warnApplyDuration = 100 * time.Millisecond
)
type applyResult struct {
resp proto.Message
err error
// physc signals the physical effect of the request has completed in addition
// to being logically reflected by the node. Currently only used for
// Compaction requests.
physc <-chan struct{}
}
// applierV3 is the interface for processing V3 raft messages
type applierV3 interface {
Apply(r *pb.InternalRaftRequest) *applyResult
Put(txn mvcc.TxnWrite, p *pb.PutRequest) (*pb.PutResponse, error)
Range(txn mvcc.TxnRead, r *pb.RangeRequest) (*pb.RangeResponse, error)
DeleteRange(txn mvcc.TxnWrite, dr *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error)
Txn(rt *pb.TxnRequest) (*pb.TxnResponse, error)
Compaction(compaction *pb.CompactionRequest) (*pb.CompactionResponse, <-chan struct{}, error)
LeaseGrant(lc *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error)
LeaseRevoke(lc *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error)
Alarm(*pb.AlarmRequest) (*pb.AlarmResponse, error)
Authenticate(r *pb.InternalAuthenticateRequest) (*pb.AuthenticateResponse, error)
AuthEnable() (*pb.AuthEnableResponse, error)
AuthDisable() (*pb.AuthDisableResponse, error)
UserAdd(ua *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error)
UserDelete(ua *pb.AuthUserDeleteRequest) (*pb.AuthUserDeleteResponse, error)
UserChangePassword(ua *pb.AuthUserChangePasswordRequest) (*pb.AuthUserChangePasswordResponse, error)
UserGrantRole(ua *pb.AuthUserGrantRoleRequest) (*pb.AuthUserGrantRoleResponse, error)
UserGet(ua *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse, error)
UserRevokeRole(ua *pb.AuthUserRevokeRoleRequest) (*pb.AuthUserRevokeRoleResponse, error)
RoleAdd(ua *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, error)
RoleGrantPermission(ua *pb.AuthRoleGrantPermissionRequest) (*pb.AuthRoleGrantPermissionResponse, error)
RoleGet(ua *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse, error)
RoleRevokePermission(ua *pb.AuthRoleRevokePermissionRequest) (*pb.AuthRoleRevokePermissionResponse, error)
RoleDelete(ua *pb.AuthRoleDeleteRequest) (*pb.AuthRoleDeleteResponse, error)
UserList(ua *pb.AuthUserListRequest) (*pb.AuthUserListResponse, error)
RoleList(ua *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error)
}
type applierV3backend struct {
s *EtcdServer
}
func (s *EtcdServer) newApplierV3() applierV3 {
return newAuthApplierV3(
s.AuthStore(),
newQuotaApplierV3(s, &applierV3backend{s}),
)
}
func (a *applierV3backend) Apply(r *pb.InternalRaftRequest) *applyResult {
ar := &applyResult{}
// call into a.s.applyV3.F instead of a.F so upper appliers can check individual calls
switch {
case r.Range != nil:
ar.resp, ar.err = a.s.applyV3.Range(nil, r.Range)
case r.Put != nil:
ar.resp, ar.err = a.s.applyV3.Put(nil, r.Put)
case r.DeleteRange != nil:
ar.resp, ar.err = a.s.applyV3.DeleteRange(nil, r.DeleteRange)
case r.Txn != nil:
ar.resp, ar.err = a.s.applyV3.Txn(r.Txn)
case r.Compaction != nil:
ar.resp, ar.physc, ar.err = a.s.applyV3.Compaction(r.Compaction)
case r.LeaseGrant != nil:
ar.resp, ar.err = a.s.applyV3.LeaseGrant(r.LeaseGrant)
case r.LeaseRevoke != nil:
ar.resp, ar.err = a.s.applyV3.LeaseRevoke(r.LeaseRevoke)
case r.Alarm != nil:
ar.resp, ar.err = a.s.applyV3.Alarm(r.Alarm)
case r.Authenticate != nil:
ar.resp, ar.err = a.s.applyV3.Authenticate(r.Authenticate)
case r.AuthEnable != nil:
ar.resp, ar.err = a.s.applyV3.AuthEnable()
case r.AuthDisable != nil:
ar.resp, ar.err = a.s.applyV3.AuthDisable()
case r.AuthUserAdd != nil:
ar.resp, ar.err = a.s.applyV3.UserAdd(r.AuthUserAdd)
case r.AuthUserDelete != nil:
ar.resp, ar.err = a.s.applyV3.UserDelete(r.AuthUserDelete)
case r.AuthUserChangePassword != nil:
ar.resp, ar.err = a.s.applyV3.UserChangePassword(r.AuthUserChangePassword)
case r.AuthUserGrantRole != nil:
ar.resp, ar.err = a.s.applyV3.UserGrantRole(r.AuthUserGrantRole)
case r.AuthUserGet != nil:
ar.resp, ar.err = a.s.applyV3.UserGet(r.AuthUserGet)
case r.AuthUserRevokeRole != nil:
ar.resp, ar.err = a.s.applyV3.UserRevokeRole(r.AuthUserRevokeRole)
case r.AuthRoleAdd != nil:
ar.resp, ar.err = a.s.applyV3.RoleAdd(r.AuthRoleAdd)
case r.AuthRoleGrantPermission != nil:
ar.resp, ar.err = a.s.applyV3.RoleGrantPermission(r.AuthRoleGrantPermission)
case r.AuthRoleGet != nil:
ar.resp, ar.err = a.s.applyV3.RoleGet(r.AuthRoleGet)
case r.AuthRoleRevokePermission != nil:
ar.resp, ar.err = a.s.applyV3.RoleRevokePermission(r.AuthRoleRevokePermission)
case r.AuthRoleDelete != nil:
ar.resp, ar.err = a.s.applyV3.RoleDelete(r.AuthRoleDelete)
case r.AuthUserList != nil:
ar.resp, ar.err = a.s.applyV3.UserList(r.AuthUserList)
case r.AuthRoleList != nil:
ar.resp, ar.err = a.s.applyV3.RoleList(r.AuthRoleList)
default:
panic("not implemented")
}
return ar
}
func (a *applierV3backend) Put(txn mvcc.TxnWrite, p *pb.PutRequest) (resp *pb.PutResponse, err error) {
resp = &pb.PutResponse{}
resp.Header = &pb.ResponseHeader{}
val, leaseID := p.Value, lease.LeaseID(p.Lease)
if txn == nil {
if leaseID != lease.NoLease {
if l := a.s.lessor.Lookup(leaseID); l == nil {
return nil, lease.ErrLeaseNotFound
}
}
txn = a.s.KV().Write()
defer txn.End()
}
var rr *mvcc.RangeResult
if p.IgnoreValue || p.IgnoreLease || p.PrevKv {
rr, err = txn.Range(p.Key, nil, mvcc.RangeOptions{})
if err != nil {
return nil, err
}
}
if p.IgnoreValue || p.IgnoreLease {
if rr == nil || len(rr.KVs) == 0 {
// ignore_{lease,value} flag expects previous key-value pair
return nil, ErrKeyNotFound
}
}
if p.IgnoreValue {
val = rr.KVs[0].Value
}
if p.IgnoreLease {
leaseID = lease.LeaseID(rr.KVs[0].Lease)
}
if p.PrevKv {
if rr != nil && len(rr.KVs) != 0 {
resp.PrevKv = &rr.KVs[0]
}
}
resp.Header.Revision = txn.Put(p.Key, val, leaseID)
return resp, nil
}
func (a *applierV3backend) DeleteRange(txn mvcc.TxnWrite, dr *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error) {
resp := &pb.DeleteRangeResponse{}
resp.Header = &pb.ResponseHeader{}
if txn == nil {
txn = a.s.kv.Write()
defer txn.End()
}
if isGteRange(dr.RangeEnd) {
dr.RangeEnd = []byte{}
}
if dr.PrevKv {
rr, err := txn.Range(dr.Key, dr.RangeEnd, mvcc.RangeOptions{})
if err != nil {
return nil, err
}
if rr != nil {
for i := range rr.KVs {
resp.PrevKvs = append(resp.PrevKvs, &rr.KVs[i])
}
}
}
resp.Deleted, resp.Header.Revision = txn.DeleteRange(dr.Key, dr.RangeEnd)
return resp, nil
}
func (a *applierV3backend) Range(txn mvcc.TxnRead, r *pb.RangeRequest) (*pb.RangeResponse, error) {
resp := &pb.RangeResponse{}
resp.Header = &pb.ResponseHeader{}
if txn == nil {
txn = a.s.kv.Read()
defer txn.End()
}
if isGteRange(r.RangeEnd) {
r.RangeEnd = []byte{}
}
limit := r.Limit
if r.SortOrder != pb.RangeRequest_NONE ||
r.MinModRevision != 0 || r.MaxModRevision != 0 ||
r.MinCreateRevision != 0 || r.MaxCreateRevision != 0 {
// fetch everything; sort and truncate afterwards
limit = 0
}
if limit > 0 {
// fetch one extra for 'more' flag
limit = limit + 1
}
ro := mvcc.RangeOptions{
Limit: limit,
Rev: r.Revision,
Count: r.CountOnly,
}
rr, err := txn.Range(r.Key, r.RangeEnd, ro)
if err != nil {
return nil, err
}
if r.MaxModRevision != 0 {
f := func(kv *mvccpb.KeyValue) bool { return kv.ModRevision > r.MaxModRevision }
pruneKVs(rr, f)
}
if r.MinModRevision != 0 {
f := func(kv *mvccpb.KeyValue) bool { return kv.ModRevision < r.MinModRevision }
pruneKVs(rr, f)
}
if r.MaxCreateRevision != 0 {
f := func(kv *mvccpb.KeyValue) bool { return kv.CreateRevision > r.MaxCreateRevision }
pruneKVs(rr, f)
}
if r.MinCreateRevision != 0 {
f := func(kv *mvccpb.KeyValue) bool { return kv.CreateRevision < r.MinCreateRevision }
pruneKVs(rr, f)
}
sortOrder := r.SortOrder
if r.SortTarget != pb.RangeRequest_KEY && sortOrder == pb.RangeRequest_NONE {
// Since current mvcc.Range implementation returns results
// sorted by keys in lexiographically ascending order,
// sort ASCEND by default only when target is not 'KEY'
sortOrder = pb.RangeRequest_ASCEND
}
if sortOrder != pb.RangeRequest_NONE {
var sorter sort.Interface
switch {
case r.SortTarget == pb.RangeRequest_KEY:
sorter = &kvSortByKey{&kvSort{rr.KVs}}
case r.SortTarget == pb.RangeRequest_VERSION:
sorter = &kvSortByVersion{&kvSort{rr.KVs}}
case r.SortTarget == pb.RangeRequest_CREATE:
sorter = &kvSortByCreate{&kvSort{rr.KVs}}
case r.SortTarget == pb.RangeRequest_MOD:
sorter = &kvSortByMod{&kvSort{rr.KVs}}
case r.SortTarget == pb.RangeRequest_VALUE:
sorter = &kvSortByValue{&kvSort{rr.KVs}}
}
switch {
case sortOrder == pb.RangeRequest_ASCEND:
sort.Sort(sorter)
case sortOrder == pb.RangeRequest_DESCEND:
sort.Sort(sort.Reverse(sorter))
}
}
if r.Limit > 0 && len(rr.KVs) > int(r.Limit) {
rr.KVs = rr.KVs[:r.Limit]
resp.More = true
}
resp.Header.Revision = rr.Rev
resp.Count = int64(rr.Count)
for i := range rr.KVs {
if r.KeysOnly {
rr.KVs[i].Value = nil
}
resp.Kvs = append(resp.Kvs, &rr.KVs[i])
}
return resp, nil
}
func (a *applierV3backend) Txn(rt *pb.TxnRequest) (*pb.TxnResponse, error) {
isWrite := !isTxnReadonly(rt)
txn := mvcc.NewReadOnlyTxnWrite(a.s.KV().Read())
reqs, ok := a.compareToOps(txn, rt)
if isWrite {
if err := a.checkRequestPut(txn, reqs); err != nil {
txn.End()
return nil, err
}
}
if err := checkRequestRange(txn, reqs); err != nil {
txn.End()
return nil, err
}
resps := make([]*pb.ResponseOp, len(reqs))
txnResp := &pb.TxnResponse{
Responses: resps,
Succeeded: ok,
Header: &pb.ResponseHeader{},
}
// When executing mutable txn ops, etcd must hold the txn lock so
// readers do not see any intermediate results. Since writes are
// serialized on the raft loop, the revision in the read view will
// be the revision of the write txn.
if isWrite {
txn.End()
txn = a.s.KV().Write()
}
for i := range reqs {
resps[i] = a.applyUnion(txn, reqs[i])
}
rev := txn.Rev()
if len(txn.Changes()) != 0 {
rev++
}
txn.End()
txnResp.Header.Revision = rev
return txnResp, nil
}
func (a *applierV3backend) compareToOps(rv mvcc.ReadView, rt *pb.TxnRequest) ([]*pb.RequestOp, bool) {
for _, c := range rt.Compare {
if !applyCompare(rv, c) {
return rt.Failure, false
}
}
return rt.Success, true
}
// applyCompare applies the compare request.
// If the comparison succeeds, it returns true. Otherwise, returns false.
func applyCompare(rv mvcc.ReadView, c *pb.Compare) bool {
rr, err := rv.Range(c.Key, nil, mvcc.RangeOptions{})
if err != nil {
return false
}
var ckv mvccpb.KeyValue
if len(rr.KVs) != 0 {
ckv = rr.KVs[0]
} else {
// Use the zero value of ckv normally. However...
if c.Target == pb.Compare_VALUE {
// Always fail if we're comparing a value on a key that doesn't exist.
// We can treat non-existence as the empty set explicitly, such that
// even a key with a value of length 0 bytes is still a real key
// that was written that way
return false
}
}
// -1 is less, 0 is equal, 1 is greater
var result int
switch c.Target {
case pb.Compare_VALUE:
tv, _ := c.TargetUnion.(*pb.Compare_Value)
if tv != nil {
result = bytes.Compare(ckv.Value, tv.Value)
}
case pb.Compare_CREATE:
tv, _ := c.TargetUnion.(*pb.Compare_CreateRevision)
if tv != nil {
result = compareInt64(ckv.CreateRevision, tv.CreateRevision)
}
case pb.Compare_MOD:
tv, _ := c.TargetUnion.(*pb.Compare_ModRevision)
if tv != nil {
result = compareInt64(ckv.ModRevision, tv.ModRevision)
}
case pb.Compare_VERSION:
tv, _ := c.TargetUnion.(*pb.Compare_Version)
if tv != nil {
result = compareInt64(ckv.Version, tv.Version)
}
}
switch c.Result {
case pb.Compare_EQUAL:
return result == 0
case pb.Compare_NOT_EQUAL:
return result != 0
case pb.Compare_GREATER:
return result > 0
case pb.Compare_LESS:
return result < 0
}
return true
}
func (a *applierV3backend) applyUnion(txn mvcc.TxnWrite, union *pb.RequestOp) *pb.ResponseOp {
switch tv := union.Request.(type) {
case *pb.RequestOp_RequestRange:
if tv.RequestRange != nil {
resp, err := a.Range(txn, tv.RequestRange)
if err != nil {
plog.Panicf("unexpected error during txn: %v", err)
}
return &pb.ResponseOp{Response: &pb.ResponseOp_ResponseRange{ResponseRange: resp}}
}
case *pb.RequestOp_RequestPut:
if tv.RequestPut != nil {
resp, err := a.Put(txn, tv.RequestPut)
if err != nil {
plog.Panicf("unexpected error during txn: %v", err)
}
return &pb.ResponseOp{Response: &pb.ResponseOp_ResponsePut{ResponsePut: resp}}
}
case *pb.RequestOp_RequestDeleteRange:
if tv.RequestDeleteRange != nil {
resp, err := a.DeleteRange(txn, tv.RequestDeleteRange)
if err != nil {
plog.Panicf("unexpected error during txn: %v", err)
}
return &pb.ResponseOp{Response: &pb.ResponseOp_ResponseDeleteRange{ResponseDeleteRange: resp}}
}
default:
// empty union
return nil
}
return nil
}
func (a *applierV3backend) Compaction(compaction *pb.CompactionRequest) (*pb.CompactionResponse, <-chan struct{}, error) {
resp := &pb.CompactionResponse{}
resp.Header = &pb.ResponseHeader{}
ch, err := a.s.KV().Compact(compaction.Revision)
if err != nil {
return nil, ch, err
}
// get the current revision. which key to get is not important.
rr, _ := a.s.KV().Range([]byte("compaction"), nil, mvcc.RangeOptions{})
resp.Header.Revision = rr.Rev
return resp, ch, err
}
func (a *applierV3backend) LeaseGrant(lc *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error) {
l, err := a.s.lessor.Grant(lease.LeaseID(lc.ID), lc.TTL)
resp := &pb.LeaseGrantResponse{}
if err == nil {
resp.ID = int64(l.ID)
resp.TTL = l.TTL()
resp.Header = newHeader(a.s)
}
return resp, err
}
func (a *applierV3backend) LeaseRevoke(lc *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error) {
err := a.s.lessor.Revoke(lease.LeaseID(lc.ID))
return &pb.LeaseRevokeResponse{Header: newHeader(a.s)}, err
}
func (a *applierV3backend) Alarm(ar *pb.AlarmRequest) (*pb.AlarmResponse, error) {
resp := &pb.AlarmResponse{}
oldCount := len(a.s.alarmStore.Get(ar.Alarm))
switch ar.Action {
case pb.AlarmRequest_GET:
resp.Alarms = a.s.alarmStore.Get(ar.Alarm)
case pb.AlarmRequest_ACTIVATE:
m := a.s.alarmStore.Activate(types.ID(ar.MemberID), ar.Alarm)
if m == nil {
break
}
resp.Alarms = append(resp.Alarms, m)
activated := oldCount == 0 && len(a.s.alarmStore.Get(m.Alarm)) == 1
if !activated {
break
}
switch m.Alarm {
case pb.AlarmType_NOSPACE:
plog.Warningf("alarm raised %+v", m)
a.s.applyV3 = newApplierV3Capped(a)
default:
plog.Errorf("unimplemented alarm activation (%+v)", m)
}
case pb.AlarmRequest_DEACTIVATE:
m := a.s.alarmStore.Deactivate(types.ID(ar.MemberID), ar.Alarm)
if m == nil {
break
}
resp.Alarms = append(resp.Alarms, m)
deactivated := oldCount > 0 && len(a.s.alarmStore.Get(ar.Alarm)) == 0
if !deactivated {
break
}
switch m.Alarm {
case pb.AlarmType_NOSPACE:
plog.Infof("alarm disarmed %+v", ar)
a.s.applyV3 = a.s.newApplierV3()
default:
plog.Errorf("unimplemented alarm deactivation (%+v)", m)
}
default:
return nil, nil
}
return resp, nil
}
type applierV3Capped struct {
applierV3
q backendQuota
}
// newApplierV3Capped creates an applyV3 that will reject Puts and transactions
// with Puts so that the number of keys in the store is capped.
func newApplierV3Capped(base applierV3) applierV3 { return &applierV3Capped{applierV3: base} }
func (a *applierV3Capped) Put(txn mvcc.TxnWrite, p *pb.PutRequest) (*pb.PutResponse, error) {
return nil, ErrNoSpace
}
func (a *applierV3Capped) Txn(r *pb.TxnRequest) (*pb.TxnResponse, error) {
if a.q.Cost(r) > 0 {
return nil, ErrNoSpace
}
return a.applierV3.Txn(r)
}
func (a *applierV3Capped) LeaseGrant(lc *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error) {
return nil, ErrNoSpace
}
func (a *applierV3backend) AuthEnable() (*pb.AuthEnableResponse, error) {
err := a.s.AuthStore().AuthEnable()
if err != nil {
return nil, err
}
return &pb.AuthEnableResponse{Header: newHeader(a.s)}, nil
}
func (a *applierV3backend) AuthDisable() (*pb.AuthDisableResponse, error) {
a.s.AuthStore().AuthDisable()
return &pb.AuthDisableResponse{Header: newHeader(a.s)}, nil
}
func (a *applierV3backend) Authenticate(r *pb.InternalAuthenticateRequest) (*pb.AuthenticateResponse, error) {
ctx := context.WithValue(context.WithValue(a.s.ctx, "index", a.s.consistIndex.ConsistentIndex()), "simpleToken", r.SimpleToken)
resp, err := a.s.AuthStore().Authenticate(ctx, r.Name, r.Password)
if resp != nil {
resp.Header = newHeader(a.s)
}
return resp, err
}
func (a *applierV3backend) UserAdd(r *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error) {
resp, err := a.s.AuthStore().UserAdd(r)
if resp != nil {
resp.Header = newHeader(a.s)
}
return resp, err
}
func (a *applierV3backend) UserDelete(r *pb.AuthUserDeleteRequest) (*pb.AuthUserDeleteResponse, error) {
resp, err := a.s.AuthStore().UserDelete(r)
if resp != nil {
resp.Header = newHeader(a.s)
}
return resp, err
}
func (a *applierV3backend) UserChangePassword(r *pb.AuthUserChangePasswordRequest) (*pb.AuthUserChangePasswordResponse, error) {
resp, err := a.s.AuthStore().UserChangePassword(r)
if resp != nil {
resp.Header = newHeader(a.s)
}
return resp, err
}
func (a *applierV3backend) UserGrantRole(r *pb.AuthUserGrantRoleRequest) (*pb.AuthUserGrantRoleResponse, error) {
resp, err := a.s.AuthStore().UserGrantRole(r)
if resp != nil {
resp.Header = newHeader(a.s)
}
return resp, err
}
func (a *applierV3backend) UserGet(r *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse, error) {
resp, err := a.s.AuthStore().UserGet(r)
if resp != nil {
resp.Header = newHeader(a.s)
}
return resp, err
}
func (a *applierV3backend) UserRevokeRole(r *pb.AuthUserRevokeRoleRequest) (*pb.AuthUserRevokeRoleResponse, error) {
resp, err := a.s.AuthStore().UserRevokeRole(r)
if resp != nil {
resp.Header = newHeader(a.s)
}
return resp, err
}
func (a *applierV3backend) RoleAdd(r *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, error) {
resp, err := a.s.AuthStore().RoleAdd(r)
if resp != nil {
resp.Header = newHeader(a.s)
}
return resp, err
}
func (a *applierV3backend) RoleGrantPermission(r *pb.AuthRoleGrantPermissionRequest) (*pb.AuthRoleGrantPermissionResponse, error) {
resp, err := a.s.AuthStore().RoleGrantPermission(r)
if resp != nil {
resp.Header = newHeader(a.s)
}
return resp, err
}
func (a *applierV3backend) RoleGet(r *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse, error) {
resp, err := a.s.AuthStore().RoleGet(r)
if resp != nil {
resp.Header = newHeader(a.s)
}
return resp, err
}
func (a *applierV3backend) RoleRevokePermission(r *pb.AuthRoleRevokePermissionRequest) (*pb.AuthRoleRevokePermissionResponse, error) {
resp, err := a.s.AuthStore().RoleRevokePermission(r)
if resp != nil {
resp.Header = newHeader(a.s)
}
return resp, err
}
func (a *applierV3backend) RoleDelete(r *pb.AuthRoleDeleteRequest) (*pb.AuthRoleDeleteResponse, error) {
resp, err := a.s.AuthStore().RoleDelete(r)
if resp != nil {
resp.Header = newHeader(a.s)
}
return resp, err
}
func (a *applierV3backend) UserList(r *pb.AuthUserListRequest) (*pb.AuthUserListResponse, error) {
resp, err := a.s.AuthStore().UserList(r)
if resp != nil {
resp.Header = newHeader(a.s)
}
return resp, err
}
func (a *applierV3backend) RoleList(r *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error) {
resp, err := a.s.AuthStore().RoleList(r)
if resp != nil {
resp.Header = newHeader(a.s)
}
return resp, err
}
type quotaApplierV3 struct {
applierV3
q Quota
}
func newQuotaApplierV3(s *EtcdServer, app applierV3) applierV3 {
return &quotaApplierV3{app, NewBackendQuota(s)}
}
func (a *quotaApplierV3) Put(txn mvcc.TxnWrite, p *pb.PutRequest) (*pb.PutResponse, error) {
ok := a.q.Available(p)
resp, err := a.applierV3.Put(txn, p)
if err == nil && !ok {
err = ErrNoSpace
}
return resp, err
}
func (a *quotaApplierV3) Txn(rt *pb.TxnRequest) (*pb.TxnResponse, error) {
ok := a.q.Available(rt)
resp, err := a.applierV3.Txn(rt)
if err == nil && !ok {
err = ErrNoSpace
}
return resp, err
}
func (a *quotaApplierV3) LeaseGrant(lc *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error) {
ok := a.q.Available(lc)
resp, err := a.applierV3.LeaseGrant(lc)
if err == nil && !ok {
err = ErrNoSpace
}
return resp, err
}
type kvSort struct{ kvs []mvccpb.KeyValue }
func (s *kvSort) Swap(i, j int) {
t := s.kvs[i]
s.kvs[i] = s.kvs[j]
s.kvs[j] = t
}
func (s *kvSort) Len() int { return len(s.kvs) }
type kvSortByKey struct{ *kvSort }
func (s *kvSortByKey) Less(i, j int) bool {
return bytes.Compare(s.kvs[i].Key, s.kvs[j].Key) < 0
}
type kvSortByVersion struct{ *kvSort }
func (s *kvSortByVersion) Less(i, j int) bool {
return (s.kvs[i].Version - s.kvs[j].Version) < 0
}
type kvSortByCreate struct{ *kvSort }
func (s *kvSortByCreate) Less(i, j int) bool {
return (s.kvs[i].CreateRevision - s.kvs[j].CreateRevision) < 0
}
type kvSortByMod struct{ *kvSort }
func (s *kvSortByMod) Less(i, j int) bool {
return (s.kvs[i].ModRevision - s.kvs[j].ModRevision) < 0
}
type kvSortByValue struct{ *kvSort }
func (s *kvSortByValue) Less(i, j int) bool {
return bytes.Compare(s.kvs[i].Value, s.kvs[j].Value) < 0
}
func (a *applierV3backend) checkRequestPut(rv mvcc.ReadView, reqs []*pb.RequestOp) error {
for _, requ := range reqs {
tv, ok := requ.Request.(*pb.RequestOp_RequestPut)
if !ok {
continue
}
preq := tv.RequestPut
if preq == nil {
continue
}
if preq.IgnoreValue || preq.IgnoreLease {
// expects previous key-value, error if not exist
rr, err := rv.Range(preq.Key, nil, mvcc.RangeOptions{})
if err != nil {
return err
}
if rr == nil || len(rr.KVs) == 0 {
return ErrKeyNotFound
}
}
if lease.LeaseID(preq.Lease) == lease.NoLease {
continue
}
if l := a.s.lessor.Lookup(lease.LeaseID(preq.Lease)); l == nil {
return lease.ErrLeaseNotFound
}
}
return nil
}
func checkRequestRange(rv mvcc.ReadView, reqs []*pb.RequestOp) error {
for _, requ := range reqs {
tv, ok := requ.Request.(*pb.RequestOp_RequestRange)
if !ok {
continue
}
greq := tv.RequestRange
if greq == nil || greq.Revision == 0 {
continue
}
if greq.Revision > rv.Rev() {
return mvcc.ErrFutureRev
}
if greq.Revision < rv.FirstRev() {
return mvcc.ErrCompacted
}
}
return nil
}
func compareInt64(a, b int64) int {
switch {
case a < b:
return -1
case a > b:
return 1
default:
return 0
}
}
// isGteRange determines if the range end is a >= range. This works around grpc
// sending empty byte strings as nil; >= is encoded in the range end as '\0'.
func isGteRange(rangeEnd []byte) bool {
return len(rangeEnd) == 1 && rangeEnd[0] == 0
}
func noSideEffect(r *pb.InternalRaftRequest) bool {
return r.Range != nil || r.AuthUserGet != nil || r.AuthRoleGet != nil
}
func removeNeedlessRangeReqs(txn *pb.TxnRequest) {
f := func(ops []*pb.RequestOp) []*pb.RequestOp {
j := 0
for i := 0; i < len(ops); i++ {
if _, ok := ops[i].Request.(*pb.RequestOp_RequestRange); ok {
continue
}
ops[j] = ops[i]
j++
}
return ops[:j]
}
txn.Success = f(txn.Success)
txn.Failure = f(txn.Failure)
}
func pruneKVs(rr *mvcc.RangeResult, isPrunable func(*mvccpb.KeyValue) bool) {
j := 0
for i := range rr.KVs {
rr.KVs[j] = rr.KVs[i]
if !isPrunable(&rr.KVs[i]) {
j++
}
}
rr.KVs = rr.KVs[:j]
}
func newHeader(s *EtcdServer) *pb.ResponseHeader {
return &pb.ResponseHeader{
ClusterId: uint64(s.Cluster().ID()),
MemberId: uint64(s.ID()),
Revision: s.KV().Rev(),
RaftTerm: s.Term(),
}
}

View File

@@ -1,196 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"sync"
"github.com/coreos/etcd/auth"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/mvcc"
)
type authApplierV3 struct {
applierV3
as auth.AuthStore
// mu serializes Apply so that user isn't corrupted and so that
// serialized requests don't leak data from TOCTOU errors
mu sync.Mutex
authInfo auth.AuthInfo
}
func newAuthApplierV3(as auth.AuthStore, base applierV3) *authApplierV3 {
return &authApplierV3{applierV3: base, as: as}
}
func (aa *authApplierV3) Apply(r *pb.InternalRaftRequest) *applyResult {
aa.mu.Lock()
defer aa.mu.Unlock()
if r.Header != nil {
// backward-compatible with pre-3.0 releases when internalRaftRequest
// does not have header field
aa.authInfo.Username = r.Header.Username
aa.authInfo.Revision = r.Header.AuthRevision
}
if needAdminPermission(r) {
if err := aa.as.IsAdminPermitted(&aa.authInfo); err != nil {
aa.authInfo.Username = ""
aa.authInfo.Revision = 0
return &applyResult{err: err}
}
}
ret := aa.applierV3.Apply(r)
aa.authInfo.Username = ""
aa.authInfo.Revision = 0
return ret
}
func (aa *authApplierV3) Put(txn mvcc.TxnWrite, r *pb.PutRequest) (*pb.PutResponse, error) {
if err := aa.as.IsPutPermitted(&aa.authInfo, r.Key); err != nil {
return nil, err
}
if r.PrevKv {
err := aa.as.IsRangePermitted(&aa.authInfo, r.Key, nil)
if err != nil {
return nil, err
}
}
return aa.applierV3.Put(txn, r)
}
func (aa *authApplierV3) Range(txn mvcc.TxnRead, r *pb.RangeRequest) (*pb.RangeResponse, error) {
if err := aa.as.IsRangePermitted(&aa.authInfo, r.Key, r.RangeEnd); err != nil {
return nil, err
}
return aa.applierV3.Range(txn, r)
}
func (aa *authApplierV3) DeleteRange(txn mvcc.TxnWrite, r *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error) {
if err := aa.as.IsDeleteRangePermitted(&aa.authInfo, r.Key, r.RangeEnd); err != nil {
return nil, err
}
if r.PrevKv {
err := aa.as.IsRangePermitted(&aa.authInfo, r.Key, r.RangeEnd)
if err != nil {
return nil, err
}
}
return aa.applierV3.DeleteRange(txn, r)
}
func checkTxnReqsPermission(as auth.AuthStore, ai *auth.AuthInfo, reqs []*pb.RequestOp) error {
for _, requ := range reqs {
switch tv := requ.Request.(type) {
case *pb.RequestOp_RequestRange:
if tv.RequestRange == nil {
continue
}
if err := as.IsRangePermitted(ai, tv.RequestRange.Key, tv.RequestRange.RangeEnd); err != nil {
return err
}
case *pb.RequestOp_RequestPut:
if tv.RequestPut == nil {
continue
}
if err := as.IsPutPermitted(ai, tv.RequestPut.Key); err != nil {
return err
}
case *pb.RequestOp_RequestDeleteRange:
if tv.RequestDeleteRange == nil {
continue
}
if tv.RequestDeleteRange.PrevKv {
err := as.IsRangePermitted(ai, tv.RequestDeleteRange.Key, tv.RequestDeleteRange.RangeEnd)
if err != nil {
return err
}
}
err := as.IsDeleteRangePermitted(ai, tv.RequestDeleteRange.Key, tv.RequestDeleteRange.RangeEnd)
if err != nil {
return err
}
}
}
return nil
}
func checkTxnAuth(as auth.AuthStore, ai *auth.AuthInfo, rt *pb.TxnRequest) error {
for _, c := range rt.Compare {
if err := as.IsRangePermitted(ai, c.Key, nil); err != nil {
return err
}
}
if err := checkTxnReqsPermission(as, ai, rt.Success); err != nil {
return err
}
if err := checkTxnReqsPermission(as, ai, rt.Failure); err != nil {
return err
}
return nil
}
func (aa *authApplierV3) Txn(rt *pb.TxnRequest) (*pb.TxnResponse, error) {
if err := checkTxnAuth(aa.as, &aa.authInfo, rt); err != nil {
return nil, err
}
return aa.applierV3.Txn(rt)
}
func needAdminPermission(r *pb.InternalRaftRequest) bool {
switch {
case r.AuthEnable != nil:
return true
case r.AuthDisable != nil:
return true
case r.AuthUserAdd != nil:
return true
case r.AuthUserDelete != nil:
return true
case r.AuthUserChangePassword != nil:
return true
case r.AuthUserGrantRole != nil:
return true
case r.AuthUserGet != nil:
return true
case r.AuthUserRevokeRole != nil:
return true
case r.AuthRoleAdd != nil:
return true
case r.AuthRoleGrantPermission != nil:
return true
case r.AuthRoleGet != nil:
return true
case r.AuthRoleRevokePermission != nil:
return true
case r.AuthRoleDelete != nil:
return true
case r.AuthUserList != nil:
return true
case r.AuthRoleList != nil:
return true
default:
return false
}
}

View File

@@ -1,140 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"encoding/json"
"path"
"time"
"github.com/coreos/etcd/etcdserver/api"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/etcdserver/membership"
"github.com/coreos/etcd/pkg/pbutil"
"github.com/coreos/etcd/store"
"github.com/coreos/go-semver/semver"
)
// ApplierV2 is the interface for processing V2 raft messages
type ApplierV2 interface {
Delete(r *pb.Request) Response
Post(r *pb.Request) Response
Put(r *pb.Request) Response
QGet(r *pb.Request) Response
Sync(r *pb.Request) Response
}
func NewApplierV2(s store.Store, c *membership.RaftCluster) ApplierV2 {
return &applierV2store{store: s, cluster: c}
}
type applierV2store struct {
store store.Store
cluster *membership.RaftCluster
}
func (a *applierV2store) Delete(r *pb.Request) Response {
switch {
case r.PrevIndex > 0 || r.PrevValue != "":
return toResponse(a.store.CompareAndDelete(r.Path, r.PrevValue, r.PrevIndex))
default:
return toResponse(a.store.Delete(r.Path, r.Dir, r.Recursive))
}
}
func (a *applierV2store) Post(r *pb.Request) Response {
return toResponse(a.store.Create(r.Path, r.Dir, r.Val, true, toTTLOptions(r)))
}
func (a *applierV2store) Put(r *pb.Request) Response {
ttlOptions := toTTLOptions(r)
exists, existsSet := pbutil.GetBool(r.PrevExist)
switch {
case existsSet:
if exists {
if r.PrevIndex == 0 && r.PrevValue == "" {
return toResponse(a.store.Update(r.Path, r.Val, ttlOptions))
}
return toResponse(a.store.CompareAndSwap(r.Path, r.PrevValue, r.PrevIndex, r.Val, ttlOptions))
}
return toResponse(a.store.Create(r.Path, r.Dir, r.Val, false, ttlOptions))
case r.PrevIndex > 0 || r.PrevValue != "":
return toResponse(a.store.CompareAndSwap(r.Path, r.PrevValue, r.PrevIndex, r.Val, ttlOptions))
default:
if storeMemberAttributeRegexp.MatchString(r.Path) {
id := membership.MustParseMemberIDFromKey(path.Dir(r.Path))
var attr membership.Attributes
if err := json.Unmarshal([]byte(r.Val), &attr); err != nil {
plog.Panicf("unmarshal %s should never fail: %v", r.Val, err)
}
if a.cluster != nil {
a.cluster.UpdateAttributes(id, attr)
}
// return an empty response since there is no consumer.
return Response{}
}
if r.Path == membership.StoreClusterVersionKey() {
if a.cluster != nil {
a.cluster.SetVersion(semver.Must(semver.NewVersion(r.Val)), api.UpdateCapability)
}
// return an empty response since there is no consumer.
return Response{}
}
return toResponse(a.store.Set(r.Path, r.Dir, r.Val, ttlOptions))
}
}
func (a *applierV2store) QGet(r *pb.Request) Response {
return toResponse(a.store.Get(r.Path, r.Recursive, r.Sorted))
}
func (a *applierV2store) Sync(r *pb.Request) Response {
a.store.DeleteExpiredKeys(time.Unix(0, r.Time))
return Response{}
}
// applyV2Request interprets r as a call to store.X and returns a Response interpreted
// from store.Event
func (s *EtcdServer) applyV2Request(r *pb.Request) Response {
toTTLOptions(r)
switch r.Method {
case "POST":
return s.applyV2.Post(r)
case "PUT":
return s.applyV2.Put(r)
case "DELETE":
return s.applyV2.Delete(r)
case "QGET":
return s.applyV2.QGet(r)
case "SYNC":
return s.applyV2.Sync(r)
default:
// This should never be reached, but just in case:
return Response{err: ErrUnknownMethod}
}
}
func toTTLOptions(r *pb.Request) store.TTLOptionSet {
refresh, _ := pbutil.GetBool(r.Refresh)
ttlOptions := store.TTLOptionSet{Refresh: refresh}
if r.Expiration != 0 {
ttlOptions.ExpireTime = time.Unix(0, r.Expiration)
}
return ttlOptions
}
func toResponse(ev *store.Event, err error) Response {
return Response{Event: ev, err: err}
}

View File

@@ -1,81 +0,0 @@
// Copyright 2017 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"fmt"
"os"
"time"
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/mvcc"
"github.com/coreos/etcd/mvcc/backend"
"github.com/coreos/etcd/raft/raftpb"
"github.com/coreos/etcd/snap"
)
func newBackend(cfg *ServerConfig) backend.Backend {
bcfg := backend.DefaultBackendConfig()
bcfg.Path = cfg.backendPath()
if cfg.QuotaBackendBytes > 0 && cfg.QuotaBackendBytes != DefaultQuotaBytes {
// permit 10% excess over quota for disarm
bcfg.MmapSize = uint64(cfg.QuotaBackendBytes + cfg.QuotaBackendBytes/10)
}
return backend.New(bcfg)
}
// openSnapshotBackend renames a snapshot db to the current etcd db and opens it.
func openSnapshotBackend(cfg *ServerConfig, ss *snap.Snapshotter, snapshot raftpb.Snapshot) (backend.Backend, error) {
snapPath, err := ss.DBFilePath(snapshot.Metadata.Index)
if err != nil {
return nil, fmt.Errorf("database snapshot file path error: %v", err)
}
if err := os.Rename(snapPath, cfg.backendPath()); err != nil {
return nil, fmt.Errorf("rename snapshot file error: %v", err)
}
return openBackend(cfg), nil
}
// openBackend returns a backend using the current etcd db.
func openBackend(cfg *ServerConfig) backend.Backend {
fn := cfg.backendPath()
beOpened := make(chan backend.Backend)
go func() {
beOpened <- newBackend(cfg)
}()
select {
case be := <-beOpened:
return be
case <-time.After(time.Second):
plog.Warningf("another etcd process is using %q and holds the file lock.", fn)
plog.Warningf("waiting for it to exit before starting...")
}
return <-beOpened
}
// recoverBackendSnapshot recovers the DB from a snapshot in case etcd crashes
// before updating the backend db after persisting raft snapshot to disk,
// violating the invariant snapshot.Metadata.Index < db.consistentIndex. In this
// case, replace the db with the snapshot db sent by the leader.
func recoverSnapshotBackend(cfg *ServerConfig, oldbe backend.Backend, snapshot raftpb.Snapshot) (backend.Backend, error) {
var cIndex consistentIndex
kv := mvcc.New(oldbe, &lease.FakeLessor{}, &cIndex)
defer kv.Close()
if snapshot.Metadata.Index <= kv.ConsistentIndex() {
return oldbe, nil
}
oldbe.Close()
return openSnapshotBackend(cfg, snap.New(cfg.SnapDir()), snapshot)
}

View File

@@ -1,258 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"sort"
"time"
"github.com/coreos/etcd/etcdserver/membership"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/version"
"github.com/coreos/go-semver/semver"
)
// isMemberBootstrapped tries to check if the given member has been bootstrapped
// in the given cluster.
func isMemberBootstrapped(cl *membership.RaftCluster, member string, rt http.RoundTripper, timeout time.Duration) bool {
rcl, err := getClusterFromRemotePeers(getRemotePeerURLs(cl, member), timeout, false, rt)
if err != nil {
return false
}
id := cl.MemberByName(member).ID
m := rcl.Member(id)
if m == nil {
return false
}
if len(m.ClientURLs) > 0 {
return true
}
return false
}
// GetClusterFromRemotePeers takes a set of URLs representing etcd peers, and
// attempts to construct a Cluster by accessing the members endpoint on one of
// these URLs. The first URL to provide a response is used. If no URLs provide
// a response, or a Cluster cannot be successfully created from a received
// response, an error is returned.
// Each request has a 10-second timeout. Because the upper limit of TTL is 5s,
// 10 second is enough for building connection and finishing request.
func GetClusterFromRemotePeers(urls []string, rt http.RoundTripper) (*membership.RaftCluster, error) {
return getClusterFromRemotePeers(urls, 10*time.Second, true, rt)
}
// If logerr is true, it prints out more error messages.
func getClusterFromRemotePeers(urls []string, timeout time.Duration, logerr bool, rt http.RoundTripper) (*membership.RaftCluster, error) {
cc := &http.Client{
Transport: rt,
Timeout: timeout,
}
for _, u := range urls {
resp, err := cc.Get(u + "/members")
if err != nil {
if logerr {
plog.Warningf("could not get cluster response from %s: %v", u, err)
}
continue
}
b, err := ioutil.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
if logerr {
plog.Warningf("could not read the body of cluster response: %v", err)
}
continue
}
var membs []*membership.Member
if err = json.Unmarshal(b, &membs); err != nil {
if logerr {
plog.Warningf("could not unmarshal cluster response: %v", err)
}
continue
}
id, err := types.IDFromString(resp.Header.Get("X-Etcd-Cluster-ID"))
if err != nil {
if logerr {
plog.Warningf("could not parse the cluster ID from cluster res: %v", err)
}
continue
}
// check the length of membership members
// if the membership members are present then prepare and return raft cluster
// if membership members are not present then the raft cluster formed will be
// an invalid empty cluster hence return failed to get raft cluster member(s) from the given urls error
if len(membs) > 0 {
return membership.NewClusterFromMembers("", id, membs), nil
}
return nil, fmt.Errorf("failed to get raft cluster member(s) from the given urls.")
}
return nil, fmt.Errorf("could not retrieve cluster information from the given urls")
}
// getRemotePeerURLs returns peer urls of remote members in the cluster. The
// returned list is sorted in ascending lexicographical order.
func getRemotePeerURLs(cl *membership.RaftCluster, local string) []string {
us := make([]string, 0)
for _, m := range cl.Members() {
if m.Name == local {
continue
}
us = append(us, m.PeerURLs...)
}
sort.Strings(us)
return us
}
// getVersions returns the versions of the members in the given cluster.
// The key of the returned map is the member's ID. The value of the returned map
// is the semver versions string, including server and cluster.
// If it fails to get the version of a member, the key will be nil.
func getVersions(cl *membership.RaftCluster, local types.ID, rt http.RoundTripper) map[string]*version.Versions {
members := cl.Members()
vers := make(map[string]*version.Versions)
for _, m := range members {
if m.ID == local {
cv := "not_decided"
if cl.Version() != nil {
cv = cl.Version().String()
}
vers[m.ID.String()] = &version.Versions{Server: version.Version, Cluster: cv}
continue
}
ver, err := getVersion(m, rt)
if err != nil {
plog.Warningf("cannot get the version of member %s (%v)", m.ID, err)
vers[m.ID.String()] = nil
} else {
vers[m.ID.String()] = ver
}
}
return vers
}
// decideClusterVersion decides the cluster version based on the versions map.
// The returned version is the min server version in the map, or nil if the min
// version in unknown.
func decideClusterVersion(vers map[string]*version.Versions) *semver.Version {
var cv *semver.Version
lv := semver.Must(semver.NewVersion(version.Version))
for mid, ver := range vers {
if ver == nil {
return nil
}
v, err := semver.NewVersion(ver.Server)
if err != nil {
plog.Errorf("cannot understand the version of member %s (%v)", mid, err)
return nil
}
if lv.LessThan(*v) {
plog.Warningf("the local etcd version %s is not up-to-date", lv.String())
plog.Warningf("member %s has a higher version %s", mid, ver.Server)
}
if cv == nil {
cv = v
} else if v.LessThan(*cv) {
cv = v
}
}
return cv
}
// isCompatibleWithCluster return true if the local member has a compatible version with
// the current running cluster.
// The version is considered as compatible when at least one of the other members in the cluster has a
// cluster version in the range of [MinClusterVersion, Version] and no known members has a cluster version
// out of the range.
// We set this rule since when the local member joins, another member might be offline.
func isCompatibleWithCluster(cl *membership.RaftCluster, local types.ID, rt http.RoundTripper) bool {
vers := getVersions(cl, local, rt)
minV := semver.Must(semver.NewVersion(version.MinClusterVersion))
maxV := semver.Must(semver.NewVersion(version.Version))
maxV = &semver.Version{
Major: maxV.Major,
Minor: maxV.Minor,
}
return isCompatibleWithVers(vers, local, minV, maxV)
}
func isCompatibleWithVers(vers map[string]*version.Versions, local types.ID, minV, maxV *semver.Version) bool {
var ok bool
for id, v := range vers {
// ignore comparison with local version
if id == local.String() {
continue
}
if v == nil {
continue
}
clusterv, err := semver.NewVersion(v.Cluster)
if err != nil {
plog.Errorf("cannot understand the cluster version of member %s (%v)", id, err)
continue
}
if clusterv.LessThan(*minV) {
plog.Warningf("the running cluster version(%v) is lower than the minimal cluster version(%v) supported", clusterv.String(), minV.String())
return false
}
if maxV.LessThan(*clusterv) {
plog.Warningf("the running cluster version(%v) is higher than the maximum cluster version(%v) supported", clusterv.String(), maxV.String())
return false
}
ok = true
}
return ok
}
// getVersion returns the Versions of the given member via its
// peerURLs. Returns the last error if it fails to get the version.
func getVersion(m *membership.Member, rt http.RoundTripper) (*version.Versions, error) {
cc := &http.Client{
Transport: rt,
}
var (
err error
resp *http.Response
)
for _, u := range m.PeerURLs {
resp, err = cc.Get(u + "/version")
if err != nil {
plog.Warningf("failed to reach the peerURL(%s) of member %s (%v)", u, m.ID, err)
continue
}
var b []byte
b, err = ioutil.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
plog.Warningf("failed to read out the response body from the peerURL(%s) of member %s (%v)", u, m.ID, err)
continue
}
var vers version.Versions
if err = json.Unmarshal(b, &vers); err != nil {
plog.Warningf("failed to unmarshal the response body got from the peerURL(%s) of member %s (%v)", u, m.ID, err)
continue
}
return &vers, nil
}
return nil, err
}

View File

@@ -1,204 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"fmt"
"path/filepath"
"sort"
"strings"
"time"
"golang.org/x/net/context"
"github.com/coreos/etcd/pkg/netutil"
"github.com/coreos/etcd/pkg/transport"
"github.com/coreos/etcd/pkg/types"
)
// ServerConfig holds the configuration of etcd as taken from the command line or discovery.
type ServerConfig struct {
Name string
DiscoveryURL string
DiscoveryProxy string
ClientURLs types.URLs
PeerURLs types.URLs
DataDir string
// DedicatedWALDir config will make the etcd to write the WAL to the WALDir
// rather than the dataDir/member/wal.
DedicatedWALDir string
SnapCount uint64
MaxSnapFiles uint
MaxWALFiles uint
InitialPeerURLsMap types.URLsMap
InitialClusterToken string
NewCluster bool
ForceNewCluster bool
PeerTLSInfo transport.TLSInfo
TickMs uint
ElectionTicks int
BootstrapTimeout time.Duration
AutoCompactionRetention int
QuotaBackendBytes int64
StrictReconfigCheck bool
// ClientCertAuthEnabled is true when cert has been signed by the client CA.
ClientCertAuthEnabled bool
AuthToken string
}
// VerifyBootstrap sanity-checks the initial config for bootstrap case
// and returns an error for things that should never happen.
func (c *ServerConfig) VerifyBootstrap() error {
if err := c.hasLocalMember(); err != nil {
return err
}
if err := c.advertiseMatchesCluster(); err != nil {
return err
}
if checkDuplicateURL(c.InitialPeerURLsMap) {
return fmt.Errorf("initial cluster %s has duplicate url", c.InitialPeerURLsMap)
}
if c.InitialPeerURLsMap.String() == "" && c.DiscoveryURL == "" {
return fmt.Errorf("initial cluster unset and no discovery URL found")
}
return nil
}
// VerifyJoinExisting sanity-checks the initial config for join existing cluster
// case and returns an error for things that should never happen.
func (c *ServerConfig) VerifyJoinExisting() error {
// The member has announced its peer urls to the cluster before starting; no need to
// set the configuration again.
if err := c.hasLocalMember(); err != nil {
return err
}
if checkDuplicateURL(c.InitialPeerURLsMap) {
return fmt.Errorf("initial cluster %s has duplicate url", c.InitialPeerURLsMap)
}
if c.DiscoveryURL != "" {
return fmt.Errorf("discovery URL should not be set when joining existing initial cluster")
}
return nil
}
// hasLocalMember checks that the cluster at least contains the local server.
func (c *ServerConfig) hasLocalMember() error {
if urls := c.InitialPeerURLsMap[c.Name]; urls == nil {
return fmt.Errorf("couldn't find local name %q in the initial cluster configuration", c.Name)
}
return nil
}
// advertiseMatchesCluster confirms peer URLs match those in the cluster peer list.
func (c *ServerConfig) advertiseMatchesCluster() error {
urls, apurls := c.InitialPeerURLsMap[c.Name], c.PeerURLs.StringSlice()
urls.Sort()
sort.Strings(apurls)
ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
defer cancel()
if !netutil.URLStringsEqual(ctx, apurls, urls.StringSlice()) {
umap := map[string]types.URLs{c.Name: c.PeerURLs}
return fmt.Errorf("--initial-cluster must include %s given --initial-advertise-peer-urls=%s", types.URLsMap(umap).String(), strings.Join(apurls, ","))
}
return nil
}
func (c *ServerConfig) MemberDir() string { return filepath.Join(c.DataDir, "member") }
func (c *ServerConfig) WALDir() string {
if c.DedicatedWALDir != "" {
return c.DedicatedWALDir
}
return filepath.Join(c.MemberDir(), "wal")
}
func (c *ServerConfig) SnapDir() string { return filepath.Join(c.MemberDir(), "snap") }
func (c *ServerConfig) ShouldDiscover() bool { return c.DiscoveryURL != "" }
// ReqTimeout returns timeout for request to finish.
func (c *ServerConfig) ReqTimeout() time.Duration {
// 5s for queue waiting, computation and disk IO delay
// + 2 * election timeout for possible leader election
return 5*time.Second + 2*time.Duration(c.ElectionTicks)*time.Duration(c.TickMs)*time.Millisecond
}
func (c *ServerConfig) electionTimeout() time.Duration {
return time.Duration(c.ElectionTicks) * time.Duration(c.TickMs) * time.Millisecond
}
func (c *ServerConfig) peerDialTimeout() time.Duration {
// 1s for queue wait and system delay
// + one RTT, which is smaller than 1/5 election timeout
return time.Second + time.Duration(c.ElectionTicks)*time.Duration(c.TickMs)*time.Millisecond/5
}
func (c *ServerConfig) PrintWithInitial() { c.print(true) }
func (c *ServerConfig) Print() { c.print(false) }
func (c *ServerConfig) print(initial bool) {
plog.Infof("name = %s", c.Name)
if c.ForceNewCluster {
plog.Infof("force new cluster")
}
plog.Infof("data dir = %s", c.DataDir)
plog.Infof("member dir = %s", c.MemberDir())
if c.DedicatedWALDir != "" {
plog.Infof("dedicated WAL dir = %s", c.DedicatedWALDir)
}
plog.Infof("heartbeat = %dms", c.TickMs)
plog.Infof("election = %dms", c.ElectionTicks*int(c.TickMs))
plog.Infof("snapshot count = %d", c.SnapCount)
if len(c.DiscoveryURL) != 0 {
plog.Infof("discovery URL= %s", c.DiscoveryURL)
if len(c.DiscoveryProxy) != 0 {
plog.Infof("discovery proxy = %s", c.DiscoveryProxy)
}
}
plog.Infof("advertise client URLs = %s", c.ClientURLs)
if initial {
plog.Infof("initial advertise peer URLs = %s", c.PeerURLs)
plog.Infof("initial cluster = %s", c.InitialPeerURLsMap)
}
}
func checkDuplicateURL(urlsmap types.URLsMap) bool {
um := make(map[string]bool)
for _, urls := range urlsmap {
for _, url := range urls {
u := url.String()
if um[u] {
return true
}
um[u] = true
}
}
return false
}
func (c *ServerConfig) bootstrapTimeout() time.Duration {
if c.BootstrapTimeout != 0 {
return c.BootstrapTimeout
}
return time.Second
}
func (c *ServerConfig) backendPath() string { return filepath.Join(c.SnapDir(), "db") }

View File

@@ -1,33 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"sync/atomic"
)
// consistentIndex represents the offset of an entry in a consistent replica log.
// It implements the mvcc.ConsistentIndexGetter interface.
// It is always set to the offset of current entry before executing the entry,
// so ConsistentWatchableKV could get the consistent index from it.
type consistentIndex uint64
func (i *consistentIndex) setConsistentIndex(v uint64) {
atomic.StoreUint64((*uint64)(i), v)
}
func (i *consistentIndex) ConsistentIndex() uint64 {
return atomic.LoadUint64((*uint64)(i))
}

View File

@@ -1,16 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package etcdserver defines how etcd servers interact and store their states.
package etcdserver

View File

@@ -1,46 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"errors"
"fmt"
)
var (
ErrUnknownMethod = errors.New("etcdserver: unknown method")
ErrStopped = errors.New("etcdserver: server stopped")
ErrCanceled = errors.New("etcdserver: request cancelled")
ErrTimeout = errors.New("etcdserver: request timed out")
ErrTimeoutDueToLeaderFail = errors.New("etcdserver: request timed out, possibly due to previous leader failure")
ErrTimeoutDueToConnectionLost = errors.New("etcdserver: request timed out, possibly due to connection lost")
ErrTimeoutLeaderTransfer = errors.New("etcdserver: request timed out, leader transfer took too long")
ErrNotEnoughStartedMembers = errors.New("etcdserver: re-configuration failed due to not enough started members")
ErrNoLeader = errors.New("etcdserver: no leader")
ErrRequestTooLarge = errors.New("etcdserver: request is too large")
ErrNoSpace = errors.New("etcdserver: no space")
ErrTooManyRequests = errors.New("etcdserver: too many requests")
ErrUnhealthy = errors.New("etcdserver: unhealthy cluster")
ErrKeyNotFound = errors.New("etcdserver: key not found")
)
type DiscoveryError struct {
Op string
Err error
}
func (e DiscoveryError) Error() string {
return fmt.Sprintf("failed to %s discovery cluster (%v)", e.Op, e.Err)
}

View File

@@ -1,102 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"time"
"github.com/coreos/etcd/pkg/runtime"
"github.com/prometheus/client_golang/prometheus"
)
var (
hasLeader = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "has_leader",
Help: "Whether or not a leader exists. 1 is existence, 0 is not.",
})
leaderChanges = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "leader_changes_seen_total",
Help: "The number of leader changes seen.",
})
proposalsCommitted = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "proposals_committed_total",
Help: "The total number of consensus proposals committed.",
})
proposalsApplied = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "proposals_applied_total",
Help: "The total number of consensus proposals applied.",
})
proposalsPending = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "proposals_pending",
Help: "The current number of pending proposals to commit.",
})
proposalsFailed = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "proposals_failed_total",
Help: "The total number of failed proposals seen.",
})
leaseExpired = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "etcd_debugging",
Subsystem: "server",
Name: "lease_expired_total",
Help: "The total number of expired leases.",
})
)
func init() {
prometheus.MustRegister(hasLeader)
prometheus.MustRegister(leaderChanges)
prometheus.MustRegister(proposalsCommitted)
prometheus.MustRegister(proposalsApplied)
prometheus.MustRegister(proposalsPending)
prometheus.MustRegister(proposalsFailed)
prometheus.MustRegister(leaseExpired)
}
func monitorFileDescriptor(done <-chan struct{}) {
ticker := time.NewTicker(5 * time.Second)
defer ticker.Stop()
for {
used, err := runtime.FDUsage()
if err != nil {
plog.Errorf("cannot monitor file descriptor usage (%v)", err)
return
}
limit, err := runtime.FDLimit()
if err != nil {
plog.Errorf("cannot monitor file descriptor usage (%v)", err)
return
}
if used >= limit/5*4 {
plog.Warningf("80%% of the file descriptor limit is used [used = %d, limit = %d]", used, limit)
}
select {
case <-ticker.C:
case <-done:
return
}
}
}

View File

@@ -1,121 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
)
const (
// DefaultQuotaBytes is the number of bytes the backend Size may
// consume before exceeding the space quota.
DefaultQuotaBytes = int64(2 * 1024 * 1024 * 1024) // 2GB
// MaxQuotaBytes is the maximum number of bytes suggested for a backend
// quota. A larger quota may lead to degraded performance.
MaxQuotaBytes = int64(8 * 1024 * 1024 * 1024) // 8GB
)
// Quota represents an arbitrary quota against arbitrary requests. Each request
// costs some charge; if there is not enough remaining charge, then there are
// too few resources available within the quota to apply the request.
type Quota interface {
// Available judges whether the given request fits within the quota.
Available(req interface{}) bool
// Cost computes the charge against the quota for a given request.
Cost(req interface{}) int
// Remaining is the amount of charge left for the quota.
Remaining() int64
}
type passthroughQuota struct{}
func (*passthroughQuota) Available(interface{}) bool { return true }
func (*passthroughQuota) Cost(interface{}) int { return 0 }
func (*passthroughQuota) Remaining() int64 { return 1 }
type backendQuota struct {
s *EtcdServer
maxBackendBytes int64
}
const (
// leaseOverhead is an estimate for the cost of storing a lease
leaseOverhead = 64
// kvOverhead is an estimate for the cost of storing a key's metadata
kvOverhead = 256
)
func NewBackendQuota(s *EtcdServer) Quota {
if s.Cfg.QuotaBackendBytes < 0 {
// disable quotas if negative
plog.Warningf("disabling backend quota")
return &passthroughQuota{}
}
if s.Cfg.QuotaBackendBytes == 0 {
// use default size if no quota size given
return &backendQuota{s, DefaultQuotaBytes}
}
if s.Cfg.QuotaBackendBytes > MaxQuotaBytes {
plog.Warningf("backend quota %v exceeds maximum recommended quota %v", s.Cfg.QuotaBackendBytes, MaxQuotaBytes)
}
return &backendQuota{s, s.Cfg.QuotaBackendBytes}
}
func (b *backendQuota) Available(v interface{}) bool {
// TODO: maybe optimize backend.Size()
return b.s.Backend().Size()+int64(b.Cost(v)) < b.maxBackendBytes
}
func (b *backendQuota) Cost(v interface{}) int {
switch r := v.(type) {
case *pb.PutRequest:
return costPut(r)
case *pb.TxnRequest:
return costTxn(r)
case *pb.LeaseGrantRequest:
return leaseOverhead
default:
panic("unexpected cost")
}
}
func costPut(r *pb.PutRequest) int { return kvOverhead + len(r.Key) + len(r.Value) }
func costTxnReq(u *pb.RequestOp) int {
r := u.GetRequestPut()
if r == nil {
return 0
}
return costPut(r)
}
func costTxn(r *pb.TxnRequest) int {
sizeSuccess := 0
for _, u := range r.Success {
sizeSuccess += costTxnReq(u)
}
sizeFailure := 0
for _, u := range r.Failure {
sizeFailure += costTxnReq(u)
}
if sizeFailure > sizeSuccess {
return sizeFailure
}
return sizeSuccess
}
func (b *backendQuota) Remaining() int64 {
return b.maxBackendBytes - b.s.Backend().Size()
}

View File

@@ -1,594 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"encoding/json"
"expvar"
"sort"
"sync"
"sync/atomic"
"time"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/etcdserver/membership"
"github.com/coreos/etcd/pkg/contention"
"github.com/coreos/etcd/pkg/pbutil"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/raft"
"github.com/coreos/etcd/raft/raftpb"
"github.com/coreos/etcd/rafthttp"
"github.com/coreos/etcd/wal"
"github.com/coreos/etcd/wal/walpb"
"github.com/coreos/pkg/capnslog"
)
const (
// Number of entries for slow follower to catch-up after compacting
// the raft storage entries.
// We expect the follower has a millisecond level latency with the leader.
// The max throughput is around 10K. Keep a 5K entries is enough for helping
// follower to catch up.
numberOfCatchUpEntries = 5000
// The max throughput of etcd will not exceed 100MB/s (100K * 1KB value).
// Assuming the RTT is around 10ms, 1MB max size is large enough.
maxSizePerMsg = 1 * 1024 * 1024
// Never overflow the rafthttp buffer, which is 4096.
// TODO: a better const?
maxInflightMsgs = 4096 / 8
)
var (
// protects raftStatus
raftStatusMu sync.Mutex
// indirection for expvar func interface
// expvar panics when publishing duplicate name
// expvar does not support remove a registered name
// so only register a func that calls raftStatus
// and change raftStatus as we need.
raftStatus func() raft.Status
)
func init() {
raft.SetLogger(capnslog.NewPackageLogger("github.com/coreos/etcd", "raft"))
expvar.Publish("raft.status", expvar.Func(func() interface{} {
raftStatusMu.Lock()
defer raftStatusMu.Unlock()
return raftStatus()
}))
}
type RaftTimer interface {
Index() uint64
Term() uint64
}
// apply contains entries, snapshot to be applied. Once
// an apply is consumed, the entries will be persisted to
// to raft storage concurrently; the application must read
// raftDone before assuming the raft messages are stable.
type apply struct {
entries []raftpb.Entry
snapshot raftpb.Snapshot
// notifyc synchronizes etcd server applies with the raft node
notifyc chan struct{}
}
type raftNode struct {
// Cache of the latest raft index and raft term the server has seen.
// These three unit64 fields must be the first elements to keep 64-bit
// alignment for atomic access to the fields.
index uint64
term uint64
lead uint64
raftNodeConfig
// a chan to send/receive snapshot
msgSnapC chan raftpb.Message
// a chan to send out apply
applyc chan apply
// a chan to send out readState
readStateC chan raft.ReadState
// utility
ticker *time.Ticker
// contention detectors for raft heartbeat message
td *contention.TimeoutDetector
stopped chan struct{}
done chan struct{}
}
type raftNodeConfig struct {
// to check if msg receiver is removed from cluster
isIDRemoved func(id uint64) bool
raft.Node
raftStorage *raft.MemoryStorage
storage Storage
heartbeat time.Duration // for logging
// transport specifies the transport to send and receive msgs to members.
// Sending messages MUST NOT block. It is okay to drop messages, since
// clients should timeout and reissue their messages.
// If transport is nil, server will panic.
transport rafthttp.Transporter
}
func newRaftNode(cfg raftNodeConfig) *raftNode {
r := &raftNode{
raftNodeConfig: cfg,
// set up contention detectors for raft heartbeat message.
// expect to send a heartbeat within 2 heartbeat intervals.
td: contention.NewTimeoutDetector(2 * cfg.heartbeat),
readStateC: make(chan raft.ReadState, 1),
msgSnapC: make(chan raftpb.Message, maxInFlightMsgSnap),
applyc: make(chan apply),
stopped: make(chan struct{}),
done: make(chan struct{}),
}
if r.heartbeat == 0 {
r.ticker = &time.Ticker{}
} else {
r.ticker = time.NewTicker(r.heartbeat)
}
return r
}
// start prepares and starts raftNode in a new goroutine. It is no longer safe
// to modify the fields after it has been started.
func (r *raftNode) start(rh *raftReadyHandler) {
internalTimeout := time.Second
go func() {
defer r.onStop()
islead := false
for {
select {
case <-r.ticker.C:
r.Tick()
case rd := <-r.Ready():
if rd.SoftState != nil {
newLeader := rd.SoftState.Lead != raft.None && atomic.LoadUint64(&r.lead) != rd.SoftState.Lead
if newLeader {
leaderChanges.Inc()
}
if rd.SoftState.Lead == raft.None {
hasLeader.Set(0)
} else {
hasLeader.Set(1)
}
atomic.StoreUint64(&r.lead, rd.SoftState.Lead)
islead = rd.RaftState == raft.StateLeader
rh.updateLeadership(newLeader)
r.td.Reset()
}
if len(rd.ReadStates) != 0 {
select {
case r.readStateC <- rd.ReadStates[len(rd.ReadStates)-1]:
case <-time.After(internalTimeout):
plog.Warningf("timed out sending read state")
case <-r.stopped:
return
}
}
notifyc := make(chan struct{}, 1)
ap := apply{
entries: rd.CommittedEntries,
snapshot: rd.Snapshot,
notifyc: notifyc,
}
updateCommittedIndex(&ap, rh)
select {
case r.applyc <- ap:
case <-r.stopped:
return
}
// the leader can write to its disk in parallel with replicating to the followers and them
// writing to their disks.
// For more details, check raft thesis 10.2.1
if islead {
// gofail: var raftBeforeLeaderSend struct{}
r.transport.Send(r.processMessages(rd.Messages))
}
// gofail: var raftBeforeSave struct{}
if err := r.storage.Save(rd.HardState, rd.Entries); err != nil {
plog.Fatalf("raft save state and entries error: %v", err)
}
if !raft.IsEmptyHardState(rd.HardState) {
proposalsCommitted.Set(float64(rd.HardState.Commit))
}
// gofail: var raftAfterSave struct{}
if !raft.IsEmptySnap(rd.Snapshot) {
// gofail: var raftBeforeSaveSnap struct{}
if err := r.storage.SaveSnap(rd.Snapshot); err != nil {
plog.Fatalf("raft save snapshot error: %v", err)
}
// etcdserver now claim the snapshot has been persisted onto the disk
notifyc <- struct{}{}
// gofail: var raftAfterSaveSnap struct{}
r.raftStorage.ApplySnapshot(rd.Snapshot)
plog.Infof("raft applied incoming snapshot at index %d", rd.Snapshot.Metadata.Index)
// gofail: var raftAfterApplySnap struct{}
}
r.raftStorage.Append(rd.Entries)
if !islead {
// finish processing incoming messages before we signal raftdone chan
msgs := r.processMessages(rd.Messages)
// now unblocks 'applyAll' that waits on Raft log disk writes before triggering snapshots
notifyc <- struct{}{}
// Candidate or follower needs to wait for all pending configuration
// changes to be applied before sending messages.
// Otherwise we might incorrectly count votes (e.g. votes from removed members).
// Also slow machine's follower raft-layer could proceed to become the leader
// on its own single-node cluster, before apply-layer applies the config change.
// We simply wait for ALL pending entries to be applied for now.
// We might improve this later on if it causes unnecessary long blocking issues.
waitApply := false
for _, ent := range rd.CommittedEntries {
if ent.Type == raftpb.EntryConfChange {
waitApply = true
break
}
}
if waitApply {
// blocks until 'applyAll' calls 'applyWait.Trigger'
// to be in sync with scheduled config-change job
// (assume notifyc has cap of 1)
select {
case notifyc <- struct{}{}:
case <-r.stopped:
return
}
}
// gofail: var raftBeforeFollowerSend struct{}
r.transport.Send(msgs)
} else {
// leader already processed 'MsgSnap' and signaled
notifyc <- struct{}{}
}
r.Advance()
case <-r.stopped:
return
}
}
}()
}
func updateCommittedIndex(ap *apply, rh *raftReadyHandler) {
var ci uint64
if len(ap.entries) != 0 {
ci = ap.entries[len(ap.entries)-1].Index
}
if ap.snapshot.Metadata.Index > ci {
ci = ap.snapshot.Metadata.Index
}
if ci != 0 {
rh.updateCommittedIndex(ci)
}
}
func (r *raftNode) processMessages(ms []raftpb.Message) []raftpb.Message {
sentAppResp := false
for i := len(ms) - 1; i >= 0; i-- {
if r.isIDRemoved(ms[i].To) {
ms[i].To = 0
}
if ms[i].Type == raftpb.MsgAppResp {
if sentAppResp {
ms[i].To = 0
} else {
sentAppResp = true
}
}
if ms[i].Type == raftpb.MsgSnap {
// There are two separate data store: the store for v2, and the KV for v3.
// The msgSnap only contains the most recent snapshot of store without KV.
// So we need to redirect the msgSnap to etcd server main loop for merging in the
// current store snapshot and KV snapshot.
select {
case r.msgSnapC <- ms[i]:
default:
// drop msgSnap if the inflight chan if full.
}
ms[i].To = 0
}
if ms[i].Type == raftpb.MsgHeartbeat {
ok, exceed := r.td.Observe(ms[i].To)
if !ok {
// TODO: limit request rate.
plog.Warningf("failed to send out heartbeat on time (exceeded the %v timeout for %v)", r.heartbeat, exceed)
plog.Warningf("server is likely overloaded")
}
}
}
return ms
}
func (r *raftNode) apply() chan apply {
return r.applyc
}
func (r *raftNode) stop() {
r.stopped <- struct{}{}
<-r.done
}
func (r *raftNode) onStop() {
r.Stop()
r.ticker.Stop()
r.transport.Stop()
if err := r.storage.Close(); err != nil {
plog.Panicf("raft close storage error: %v", err)
}
close(r.done)
}
// for testing
func (r *raftNode) pauseSending() {
p := r.transport.(rafthttp.Pausable)
p.Pause()
}
func (r *raftNode) resumeSending() {
p := r.transport.(rafthttp.Pausable)
p.Resume()
}
// advanceTicksForElection advances ticks to the node for fast election.
// This reduces the time to wait for first leader election if bootstrapping the whole
// cluster, while leaving at least 1 heartbeat for possible existing leader
// to contact it.
func advanceTicksForElection(n raft.Node, electionTicks int) {
for i := 0; i < electionTicks-1; i++ {
n.Tick()
}
}
func startNode(cfg *ServerConfig, cl *membership.RaftCluster, ids []types.ID) (id types.ID, n raft.Node, s *raft.MemoryStorage, w *wal.WAL) {
var err error
member := cl.MemberByName(cfg.Name)
metadata := pbutil.MustMarshal(
&pb.Metadata{
NodeID: uint64(member.ID),
ClusterID: uint64(cl.ID()),
},
)
if w, err = wal.Create(cfg.WALDir(), metadata); err != nil {
plog.Fatalf("create wal error: %v", err)
}
peers := make([]raft.Peer, len(ids))
for i, id := range ids {
ctx, err := json.Marshal((*cl).Member(id))
if err != nil {
plog.Panicf("marshal member should never fail: %v", err)
}
peers[i] = raft.Peer{ID: uint64(id), Context: ctx}
}
id = member.ID
plog.Infof("starting member %s in cluster %s", id, cl.ID())
s = raft.NewMemoryStorage()
c := &raft.Config{
ID: uint64(id),
ElectionTick: cfg.ElectionTicks,
HeartbeatTick: 1,
Storage: s,
MaxSizePerMsg: maxSizePerMsg,
MaxInflightMsgs: maxInflightMsgs,
CheckQuorum: true,
}
n = raft.StartNode(c, peers)
raftStatusMu.Lock()
raftStatus = n.Status
raftStatusMu.Unlock()
advanceTicksForElection(n, c.ElectionTick)
return
}
func restartNode(cfg *ServerConfig, snapshot *raftpb.Snapshot) (types.ID, *membership.RaftCluster, raft.Node, *raft.MemoryStorage, *wal.WAL) {
var walsnap walpb.Snapshot
if snapshot != nil {
walsnap.Index, walsnap.Term = snapshot.Metadata.Index, snapshot.Metadata.Term
}
w, id, cid, st, ents := readWAL(cfg.WALDir(), walsnap)
plog.Infof("restarting member %s in cluster %s at commit index %d", id, cid, st.Commit)
cl := membership.NewCluster("")
cl.SetID(cid)
s := raft.NewMemoryStorage()
if snapshot != nil {
s.ApplySnapshot(*snapshot)
}
s.SetHardState(st)
s.Append(ents)
c := &raft.Config{
ID: uint64(id),
ElectionTick: cfg.ElectionTicks,
HeartbeatTick: 1,
Storage: s,
MaxSizePerMsg: maxSizePerMsg,
MaxInflightMsgs: maxInflightMsgs,
CheckQuorum: true,
}
n := raft.RestartNode(c)
raftStatusMu.Lock()
raftStatus = n.Status
raftStatusMu.Unlock()
advanceTicksForElection(n, c.ElectionTick)
return id, cl, n, s, w
}
func restartAsStandaloneNode(cfg *ServerConfig, snapshot *raftpb.Snapshot) (types.ID, *membership.RaftCluster, raft.Node, *raft.MemoryStorage, *wal.WAL) {
var walsnap walpb.Snapshot
if snapshot != nil {
walsnap.Index, walsnap.Term = snapshot.Metadata.Index, snapshot.Metadata.Term
}
w, id, cid, st, ents := readWAL(cfg.WALDir(), walsnap)
// discard the previously uncommitted entries
for i, ent := range ents {
if ent.Index > st.Commit {
plog.Infof("discarding %d uncommitted WAL entries ", len(ents)-i)
ents = ents[:i]
break
}
}
// force append the configuration change entries
toAppEnts := createConfigChangeEnts(getIDs(snapshot, ents), uint64(id), st.Term, st.Commit)
ents = append(ents, toAppEnts...)
// force commit newly appended entries
err := w.Save(raftpb.HardState{}, toAppEnts)
if err != nil {
plog.Fatalf("%v", err)
}
if len(ents) != 0 {
st.Commit = ents[len(ents)-1].Index
}
plog.Printf("forcing restart of member %s in cluster %s at commit index %d", id, cid, st.Commit)
cl := membership.NewCluster("")
cl.SetID(cid)
s := raft.NewMemoryStorage()
if snapshot != nil {
s.ApplySnapshot(*snapshot)
}
s.SetHardState(st)
s.Append(ents)
c := &raft.Config{
ID: uint64(id),
ElectionTick: cfg.ElectionTicks,
HeartbeatTick: 1,
Storage: s,
MaxSizePerMsg: maxSizePerMsg,
MaxInflightMsgs: maxInflightMsgs,
}
n := raft.RestartNode(c)
raftStatus = n.Status
return id, cl, n, s, w
}
// getIDs returns an ordered set of IDs included in the given snapshot and
// the entries. The given snapshot/entries can contain two kinds of
// ID-related entry:
// - ConfChangeAddNode, in which case the contained ID will be added into the set.
// - ConfChangeRemoveNode, in which case the contained ID will be removed from the set.
func getIDs(snap *raftpb.Snapshot, ents []raftpb.Entry) []uint64 {
ids := make(map[uint64]bool)
if snap != nil {
for _, id := range snap.Metadata.ConfState.Nodes {
ids[id] = true
}
}
for _, e := range ents {
if e.Type != raftpb.EntryConfChange {
continue
}
var cc raftpb.ConfChange
pbutil.MustUnmarshal(&cc, e.Data)
switch cc.Type {
case raftpb.ConfChangeAddNode:
ids[cc.NodeID] = true
case raftpb.ConfChangeRemoveNode:
delete(ids, cc.NodeID)
case raftpb.ConfChangeUpdateNode:
// do nothing
default:
plog.Panicf("ConfChange Type should be either ConfChangeAddNode or ConfChangeRemoveNode!")
}
}
sids := make(types.Uint64Slice, 0, len(ids))
for id := range ids {
sids = append(sids, id)
}
sort.Sort(sids)
return []uint64(sids)
}
// createConfigChangeEnts creates a series of Raft entries (i.e.
// EntryConfChange) to remove the set of given IDs from the cluster. The ID
// `self` is _not_ removed, even if present in the set.
// If `self` is not inside the given ids, it creates a Raft entry to add a
// default member with the given `self`.
func createConfigChangeEnts(ids []uint64, self uint64, term, index uint64) []raftpb.Entry {
ents := make([]raftpb.Entry, 0)
next := index + 1
found := false
for _, id := range ids {
if id == self {
found = true
continue
}
cc := &raftpb.ConfChange{
Type: raftpb.ConfChangeRemoveNode,
NodeID: id,
}
e := raftpb.Entry{
Type: raftpb.EntryConfChange,
Data: pbutil.MustMarshal(cc),
Term: term,
Index: next,
}
ents = append(ents, e)
next++
}
if !found {
m := membership.Member{
ID: types.ID(self),
RaftAttributes: membership.RaftAttributes{PeerURLs: []string{"http://localhost:2380"}},
}
ctx, err := json.Marshal(m)
if err != nil {
plog.Panicf("marshal member should never fail: %v", err)
}
cc := &raftpb.ConfChange{
Type: raftpb.ConfChangeAddNode,
NodeID: self,
Context: ctx,
}
e := raftpb.Entry{
Type: raftpb.EntryConfChange,
Data: pbutil.MustMarshal(cc),
Term: term,
Index: next,
}
ents = append(ents, e)
}
return ents
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,73 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"io"
"github.com/coreos/etcd/mvcc/backend"
"github.com/coreos/etcd/raft/raftpb"
"github.com/coreos/etcd/snap"
)
// createMergedSnapshotMessage creates a snapshot message that contains: raft status (term, conf),
// a snapshot of v2 store inside raft.Snapshot as []byte, a snapshot of v3 KV in the top level message
// as ReadCloser.
func (s *EtcdServer) createMergedSnapshotMessage(m raftpb.Message, snapt, snapi uint64, confState raftpb.ConfState) snap.Message {
// get a snapshot of v2 store as []byte
clone := s.store.Clone()
d, err := clone.SaveNoCopy()
if err != nil {
plog.Panicf("store save should never fail: %v", err)
}
// commit kv to write metadata(for example: consistent index).
s.KV().Commit()
dbsnap := s.be.Snapshot()
// get a snapshot of v3 KV as readCloser
rc := newSnapshotReaderCloser(dbsnap)
// put the []byte snapshot of store into raft snapshot and return the merged snapshot with
// KV readCloser snapshot.
snapshot := raftpb.Snapshot{
Metadata: raftpb.SnapshotMetadata{
Index: snapi,
Term: snapt,
ConfState: confState,
},
Data: d,
}
m.Snapshot = snapshot
return *snap.NewMessage(m, rc, dbsnap.Size())
}
func newSnapshotReaderCloser(snapshot backend.Snapshot) io.ReadCloser {
pr, pw := io.Pipe()
go func() {
n, err := snapshot.WriteTo(pw)
if err == nil {
plog.Infof("wrote database snapshot out [total bytes: %d]", n)
} else {
plog.Warningf("failed to write database snapshot out [written bytes: %d]: %v", n, err)
}
pw.CloseWithError(err)
err = snapshot.Close()
if err != nil {
plog.Panicf("failed to close database snapshot: %v", err)
}
}()
return pr
}

View File

@@ -1,98 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"io"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/pkg/pbutil"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/raft/raftpb"
"github.com/coreos/etcd/snap"
"github.com/coreos/etcd/wal"
"github.com/coreos/etcd/wal/walpb"
)
type Storage interface {
// Save function saves ents and state to the underlying stable storage.
// Save MUST block until st and ents are on stable storage.
Save(st raftpb.HardState, ents []raftpb.Entry) error
// SaveSnap function saves snapshot to the underlying stable storage.
SaveSnap(snap raftpb.Snapshot) error
// Close closes the Storage and performs finalization.
Close() error
}
type storage struct {
*wal.WAL
*snap.Snapshotter
}
func NewStorage(w *wal.WAL, s *snap.Snapshotter) Storage {
return &storage{w, s}
}
// SaveSnap saves the snapshot to disk and release the locked
// wal files since they will not be used.
func (st *storage) SaveSnap(snap raftpb.Snapshot) error {
walsnap := walpb.Snapshot{
Index: snap.Metadata.Index,
Term: snap.Metadata.Term,
}
err := st.WAL.SaveSnapshot(walsnap)
if err != nil {
return err
}
err = st.Snapshotter.SaveSnap(snap)
if err != nil {
return err
}
return st.WAL.ReleaseLockTo(snap.Metadata.Index)
}
func readWAL(waldir string, snap walpb.Snapshot) (w *wal.WAL, id, cid types.ID, st raftpb.HardState, ents []raftpb.Entry) {
var (
err error
wmetadata []byte
)
repaired := false
for {
if w, err = wal.Open(waldir, snap); err != nil {
plog.Fatalf("open wal error: %v", err)
}
if wmetadata, st, ents, err = w.ReadAll(); err != nil {
w.Close()
// we can only repair ErrUnexpectedEOF and we never repair twice.
if repaired || err != io.ErrUnexpectedEOF {
plog.Fatalf("read wal error (%v) and cannot be repaired", err)
}
if !wal.Repair(waldir) {
plog.Fatalf("WAL error (%v) cannot be repaired", err)
} else {
plog.Infof("repaired WAL error (%v)", err)
repaired = true
}
continue
}
break
}
var metadata pb.Metadata
pbutil.MustUnmarshal(&metadata, wmetadata)
id = types.ID(metadata.NodeID)
cid = types.ID(metadata.ClusterID)
return
}

View File

@@ -1,97 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"time"
"github.com/coreos/etcd/etcdserver/membership"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/rafthttp"
)
// isConnectedToQuorumSince checks whether the local member is connected to the
// quorum of the cluster since the given time.
func isConnectedToQuorumSince(transport rafthttp.Transporter, since time.Time, self types.ID, members []*membership.Member) bool {
return numConnectedSince(transport, since, self, members) >= (len(members)/2)+1
}
// isConnectedSince checks whether the local member is connected to the
// remote member since the given time.
func isConnectedSince(transport rafthttp.Transporter, since time.Time, remote types.ID) bool {
t := transport.ActiveSince(remote)
return !t.IsZero() && t.Before(since)
}
// isConnectedFullySince checks whether the local member is connected to all
// members in the cluster since the given time.
func isConnectedFullySince(transport rafthttp.Transporter, since time.Time, self types.ID, members []*membership.Member) bool {
return numConnectedSince(transport, since, self, members) == len(members)
}
// numConnectedSince counts how many members are connected to the local member
// since the given time.
func numConnectedSince(transport rafthttp.Transporter, since time.Time, self types.ID, members []*membership.Member) int {
connectedNum := 0
for _, m := range members {
if m.ID == self || isConnectedSince(transport, since, m.ID) {
connectedNum++
}
}
return connectedNum
}
// longestConnected chooses the member with longest active-since-time.
// It returns false, if nothing is active.
func longestConnected(tp rafthttp.Transporter, membs []types.ID) (types.ID, bool) {
var longest types.ID
var oldest time.Time
for _, id := range membs {
tm := tp.ActiveSince(id)
if tm.IsZero() { // inactive
continue
}
if oldest.IsZero() { // first longest candidate
oldest = tm
longest = id
}
if tm.Before(oldest) {
oldest = tm
longest = id
}
}
if uint64(longest) == 0 {
return longest, false
}
return longest, true
}
type notifier struct {
c chan struct{}
err error
}
func newNotifier() *notifier {
return &notifier{
c: make(chan struct{}),
}
}
func (nc *notifier) notify(err error) {
nc.err = err
close(nc.c)
}

View File

@@ -1,125 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"time"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"golang.org/x/net/context"
)
type v2API interface {
Post(ctx context.Context, r *pb.Request) (Response, error)
Put(ctx context.Context, r *pb.Request) (Response, error)
Delete(ctx context.Context, r *pb.Request) (Response, error)
QGet(ctx context.Context, r *pb.Request) (Response, error)
Get(ctx context.Context, r *pb.Request) (Response, error)
Head(ctx context.Context, r *pb.Request) (Response, error)
}
type v2apiStore struct{ s *EtcdServer }
func (a *v2apiStore) Post(ctx context.Context, r *pb.Request) (Response, error) {
return a.processRaftRequest(ctx, r)
}
func (a *v2apiStore) Put(ctx context.Context, r *pb.Request) (Response, error) {
return a.processRaftRequest(ctx, r)
}
func (a *v2apiStore) Delete(ctx context.Context, r *pb.Request) (Response, error) {
return a.processRaftRequest(ctx, r)
}
func (a *v2apiStore) QGet(ctx context.Context, r *pb.Request) (Response, error) {
return a.processRaftRequest(ctx, r)
}
func (a *v2apiStore) processRaftRequest(ctx context.Context, r *pb.Request) (Response, error) {
data, err := r.Marshal()
if err != nil {
return Response{}, err
}
ch := a.s.w.Register(r.ID)
start := time.Now()
a.s.r.Propose(ctx, data)
proposalsPending.Inc()
defer proposalsPending.Dec()
select {
case x := <-ch:
resp := x.(Response)
return resp, resp.err
case <-ctx.Done():
proposalsFailed.Inc()
a.s.w.Trigger(r.ID, nil) // GC wait
return Response{}, a.s.parseProposeCtxErr(ctx.Err(), start)
case <-a.s.stopping:
}
return Response{}, ErrStopped
}
func (a *v2apiStore) Get(ctx context.Context, r *pb.Request) (Response, error) {
if r.Wait {
wc, err := a.s.store.Watch(r.Path, r.Recursive, r.Stream, r.Since)
if err != nil {
return Response{}, err
}
return Response{Watcher: wc}, nil
}
ev, err := a.s.store.Get(r.Path, r.Recursive, r.Sorted)
if err != nil {
return Response{}, err
}
return Response{Event: ev}, nil
}
func (a *v2apiStore) Head(ctx context.Context, r *pb.Request) (Response, error) {
ev, err := a.s.store.Get(r.Path, r.Recursive, r.Sorted)
if err != nil {
return Response{}, err
}
return Response{Event: ev}, nil
}
// Do interprets r and performs an operation on s.store according to r.Method
// and other fields. If r.Method is "POST", "PUT", "DELETE", or a "GET" with
// Quorum == true, r will be sent through consensus before performing its
// respective operation. Do will block until an action is performed or there is
// an error.
func (s *EtcdServer) Do(ctx context.Context, r pb.Request) (Response, error) {
r.ID = s.reqIDGen.Next()
if r.Method == "GET" && r.Quorum {
r.Method = "QGET"
}
v2api := (v2API)(&v2apiStore{s})
switch r.Method {
case "POST":
return v2api.Post(ctx, &r)
case "PUT":
return v2api.Put(ctx, &r)
case "DELETE":
return v2api.Delete(ctx, &r)
case "QGET":
return v2api.QGet(ctx, &r)
case "GET":
return v2api.Get(ctx, &r)
case "HEAD":
return v2api.Head(ctx, &r)
}
return Response{}, ErrUnknownMethod
}

View File

@@ -1,692 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"bytes"
"encoding/binary"
"time"
"github.com/gogo/protobuf/proto"
"github.com/coreos/etcd/auth"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/etcdserver/membership"
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/lease/leasehttp"
"github.com/coreos/etcd/mvcc"
"github.com/coreos/etcd/raft"
"golang.org/x/net/context"
)
const (
// the max request size that raft accepts.
// TODO: make this a flag? But we probably do not want to
// accept large request which might block raft stream. User
// specify a large value might end up with shooting in the foot.
maxRequestBytes = 1.5 * 1024 * 1024
// In the health case, there might be a small gap (10s of entries) between
// the applied index and committed index.
// However, if the committed entries are very heavy to apply, the gap might grow.
// We should stop accepting new proposals if the gap growing to a certain point.
maxGapBetweenApplyAndCommitIndex = 5000
)
type RaftKV interface {
Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeResponse, error)
Put(ctx context.Context, r *pb.PutRequest) (*pb.PutResponse, error)
DeleteRange(ctx context.Context, r *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error)
Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse, error)
Compact(ctx context.Context, r *pb.CompactionRequest) (*pb.CompactionResponse, error)
}
type Lessor interface {
// LeaseGrant sends LeaseGrant request to raft and apply it after committed.
LeaseGrant(ctx context.Context, r *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error)
// LeaseRevoke sends LeaseRevoke request to raft and apply it after committed.
LeaseRevoke(ctx context.Context, r *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error)
// LeaseRenew renews the lease with given ID. The renewed TTL is returned. Or an error
// is returned.
LeaseRenew(ctx context.Context, id lease.LeaseID) (int64, error)
// LeaseTimeToLive retrieves lease information.
LeaseTimeToLive(ctx context.Context, r *pb.LeaseTimeToLiveRequest) (*pb.LeaseTimeToLiveResponse, error)
}
type Authenticator interface {
AuthEnable(ctx context.Context, r *pb.AuthEnableRequest) (*pb.AuthEnableResponse, error)
AuthDisable(ctx context.Context, r *pb.AuthDisableRequest) (*pb.AuthDisableResponse, error)
Authenticate(ctx context.Context, r *pb.AuthenticateRequest) (*pb.AuthenticateResponse, error)
UserAdd(ctx context.Context, r *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error)
UserDelete(ctx context.Context, r *pb.AuthUserDeleteRequest) (*pb.AuthUserDeleteResponse, error)
UserChangePassword(ctx context.Context, r *pb.AuthUserChangePasswordRequest) (*pb.AuthUserChangePasswordResponse, error)
UserGrantRole(ctx context.Context, r *pb.AuthUserGrantRoleRequest) (*pb.AuthUserGrantRoleResponse, error)
UserGet(ctx context.Context, r *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse, error)
UserRevokeRole(ctx context.Context, r *pb.AuthUserRevokeRoleRequest) (*pb.AuthUserRevokeRoleResponse, error)
RoleAdd(ctx context.Context, r *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, error)
RoleGrantPermission(ctx context.Context, r *pb.AuthRoleGrantPermissionRequest) (*pb.AuthRoleGrantPermissionResponse, error)
RoleGet(ctx context.Context, r *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse, error)
RoleRevokePermission(ctx context.Context, r *pb.AuthRoleRevokePermissionRequest) (*pb.AuthRoleRevokePermissionResponse, error)
RoleDelete(ctx context.Context, r *pb.AuthRoleDeleteRequest) (*pb.AuthRoleDeleteResponse, error)
UserList(ctx context.Context, r *pb.AuthUserListRequest) (*pb.AuthUserListResponse, error)
RoleList(ctx context.Context, r *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error)
}
func (s *EtcdServer) Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeResponse, error) {
if !r.Serializable {
err := s.linearizableReadNotify(ctx)
if err != nil {
return nil, err
}
}
var resp *pb.RangeResponse
var err error
chk := func(ai *auth.AuthInfo) error {
return s.authStore.IsRangePermitted(ai, r.Key, r.RangeEnd)
}
get := func() { resp, err = s.applyV3Base.Range(nil, r) }
if serr := s.doSerialize(ctx, chk, get); serr != nil {
return nil, serr
}
return resp, err
}
func (s *EtcdServer) Put(ctx context.Context, r *pb.PutRequest) (*pb.PutResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{Put: r})
if err != nil {
return nil, err
}
return resp.(*pb.PutResponse), nil
}
func (s *EtcdServer) DeleteRange(ctx context.Context, r *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{DeleteRange: r})
if err != nil {
return nil, err
}
return resp.(*pb.DeleteRangeResponse), nil
}
func (s *EtcdServer) Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse, error) {
if isTxnReadonly(r) {
if !isTxnSerializable(r) {
err := s.linearizableReadNotify(ctx)
if err != nil {
return nil, err
}
}
var resp *pb.TxnResponse
var err error
chk := func(ai *auth.AuthInfo) error {
return checkTxnAuth(s.authStore, ai, r)
}
get := func() { resp, err = s.applyV3Base.Txn(r) }
if serr := s.doSerialize(ctx, chk, get); serr != nil {
return nil, serr
}
return resp, err
}
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{Txn: r})
if err != nil {
return nil, err
}
return resp.(*pb.TxnResponse), nil
}
func isTxnSerializable(r *pb.TxnRequest) bool {
for _, u := range r.Success {
if r := u.GetRequestRange(); r == nil || !r.Serializable {
return false
}
}
for _, u := range r.Failure {
if r := u.GetRequestRange(); r == nil || !r.Serializable {
return false
}
}
return true
}
func isTxnReadonly(r *pb.TxnRequest) bool {
for _, u := range r.Success {
if r := u.GetRequestRange(); r == nil {
return false
}
}
for _, u := range r.Failure {
if r := u.GetRequestRange(); r == nil {
return false
}
}
return true
}
func (s *EtcdServer) Compact(ctx context.Context, r *pb.CompactionRequest) (*pb.CompactionResponse, error) {
result, err := s.processInternalRaftRequestOnce(ctx, pb.InternalRaftRequest{Compaction: r})
if r.Physical && result != nil && result.physc != nil {
<-result.physc
// The compaction is done deleting keys; the hash is now settled
// but the data is not necessarily committed. If there's a crash,
// the hash may revert to a hash prior to compaction completing
// if the compaction resumes. Force the finished compaction to
// commit so it won't resume following a crash.
s.be.ForceCommit()
}
if err != nil {
return nil, err
}
if result.err != nil {
return nil, result.err
}
resp := result.resp.(*pb.CompactionResponse)
if resp == nil {
resp = &pb.CompactionResponse{}
}
if resp.Header == nil {
resp.Header = &pb.ResponseHeader{}
}
resp.Header.Revision = s.kv.Rev()
return resp, nil
}
func (s *EtcdServer) LeaseGrant(ctx context.Context, r *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error) {
// no id given? choose one
for r.ID == int64(lease.NoLease) {
// only use positive int64 id's
r.ID = int64(s.reqIDGen.Next() & ((1 << 63) - 1))
}
resp, err := s.raftRequestOnce(ctx, pb.InternalRaftRequest{LeaseGrant: r})
if err != nil {
return nil, err
}
return resp.(*pb.LeaseGrantResponse), nil
}
func (s *EtcdServer) LeaseRevoke(ctx context.Context, r *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error) {
resp, err := s.raftRequestOnce(ctx, pb.InternalRaftRequest{LeaseRevoke: r})
if err != nil {
return nil, err
}
return resp.(*pb.LeaseRevokeResponse), nil
}
func (s *EtcdServer) LeaseRenew(ctx context.Context, id lease.LeaseID) (int64, error) {
ttl, err := s.lessor.Renew(id)
if err == nil { // already requested to primary lessor(leader)
return ttl, nil
}
if err != lease.ErrNotPrimary {
return -1, err
}
cctx, cancel := context.WithTimeout(ctx, s.Cfg.ReqTimeout())
defer cancel()
// renewals don't go through raft; forward to leader manually
for cctx.Err() == nil && err != nil {
leader, lerr := s.waitLeader(cctx)
if lerr != nil {
return -1, lerr
}
for _, url := range leader.PeerURLs {
lurl := url + leasehttp.LeasePrefix
ttl, err = leasehttp.RenewHTTP(cctx, id, lurl, s.peerRt)
if err == nil || err == lease.ErrLeaseNotFound {
return ttl, err
}
}
}
return -1, ErrTimeout
}
func (s *EtcdServer) LeaseTimeToLive(ctx context.Context, r *pb.LeaseTimeToLiveRequest) (*pb.LeaseTimeToLiveResponse, error) {
if s.Leader() == s.ID() {
// primary; timetolive directly from leader
le := s.lessor.Lookup(lease.LeaseID(r.ID))
if le == nil {
return nil, lease.ErrLeaseNotFound
}
// TODO: fill out ResponseHeader
resp := &pb.LeaseTimeToLiveResponse{Header: &pb.ResponseHeader{}, ID: r.ID, TTL: int64(le.Remaining().Seconds()), GrantedTTL: le.TTL()}
if r.Keys {
ks := le.Keys()
kbs := make([][]byte, len(ks))
for i := range ks {
kbs[i] = []byte(ks[i])
}
resp.Keys = kbs
}
return resp, nil
}
cctx, cancel := context.WithTimeout(ctx, s.Cfg.ReqTimeout())
defer cancel()
// forward to leader
for cctx.Err() == nil {
leader, err := s.waitLeader(cctx)
if err != nil {
return nil, err
}
for _, url := range leader.PeerURLs {
lurl := url + leasehttp.LeaseInternalPrefix
resp, err := leasehttp.TimeToLiveHTTP(cctx, lease.LeaseID(r.ID), r.Keys, lurl, s.peerRt)
if err == nil {
return resp.LeaseTimeToLiveResponse, nil
}
if err == lease.ErrLeaseNotFound {
return nil, err
}
}
}
return nil, ErrTimeout
}
func (s *EtcdServer) waitLeader(ctx context.Context) (*membership.Member, error) {
leader := s.cluster.Member(s.Leader())
for leader == nil {
// wait an election
dur := time.Duration(s.Cfg.ElectionTicks) * time.Duration(s.Cfg.TickMs) * time.Millisecond
select {
case <-time.After(dur):
leader = s.cluster.Member(s.Leader())
case <-s.stopping:
return nil, ErrStopped
case <-ctx.Done():
return nil, ErrNoLeader
}
}
if leader == nil || len(leader.PeerURLs) == 0 {
return nil, ErrNoLeader
}
return leader, nil
}
func (s *EtcdServer) Alarm(ctx context.Context, r *pb.AlarmRequest) (*pb.AlarmResponse, error) {
resp, err := s.raftRequestOnce(ctx, pb.InternalRaftRequest{Alarm: r})
if err != nil {
return nil, err
}
return resp.(*pb.AlarmResponse), nil
}
func (s *EtcdServer) AuthEnable(ctx context.Context, r *pb.AuthEnableRequest) (*pb.AuthEnableResponse, error) {
resp, err := s.raftRequestOnce(ctx, pb.InternalRaftRequest{AuthEnable: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthEnableResponse), nil
}
func (s *EtcdServer) AuthDisable(ctx context.Context, r *pb.AuthDisableRequest) (*pb.AuthDisableResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthDisable: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthDisableResponse), nil
}
func (s *EtcdServer) Authenticate(ctx context.Context, r *pb.AuthenticateRequest) (*pb.AuthenticateResponse, error) {
if err := s.linearizableReadNotify(ctx); err != nil {
return nil, err
}
var resp proto.Message
for {
checkedRevision, err := s.AuthStore().CheckPassword(r.Name, r.Password)
if err != nil {
if err != auth.ErrAuthNotEnabled {
plog.Errorf("invalid authentication request to user %s was issued", r.Name)
}
return nil, err
}
st, err := s.AuthStore().GenTokenPrefix()
if err != nil {
return nil, err
}
internalReq := &pb.InternalAuthenticateRequest{
Name: r.Name,
Password: r.Password,
SimpleToken: st,
}
resp, err = s.raftRequestOnce(ctx, pb.InternalRaftRequest{Authenticate: internalReq})
if err != nil {
return nil, err
}
if checkedRevision == s.AuthStore().Revision() {
break
}
plog.Infof("revision when password checked is obsolete, retrying")
}
return resp.(*pb.AuthenticateResponse), nil
}
func (s *EtcdServer) UserAdd(ctx context.Context, r *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthUserAdd: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthUserAddResponse), nil
}
func (s *EtcdServer) UserDelete(ctx context.Context, r *pb.AuthUserDeleteRequest) (*pb.AuthUserDeleteResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthUserDelete: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthUserDeleteResponse), nil
}
func (s *EtcdServer) UserChangePassword(ctx context.Context, r *pb.AuthUserChangePasswordRequest) (*pb.AuthUserChangePasswordResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthUserChangePassword: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthUserChangePasswordResponse), nil
}
func (s *EtcdServer) UserGrantRole(ctx context.Context, r *pb.AuthUserGrantRoleRequest) (*pb.AuthUserGrantRoleResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthUserGrantRole: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthUserGrantRoleResponse), nil
}
func (s *EtcdServer) UserGet(ctx context.Context, r *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthUserGet: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthUserGetResponse), nil
}
func (s *EtcdServer) UserList(ctx context.Context, r *pb.AuthUserListRequest) (*pb.AuthUserListResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthUserList: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthUserListResponse), nil
}
func (s *EtcdServer) UserRevokeRole(ctx context.Context, r *pb.AuthUserRevokeRoleRequest) (*pb.AuthUserRevokeRoleResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthUserRevokeRole: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthUserRevokeRoleResponse), nil
}
func (s *EtcdServer) RoleAdd(ctx context.Context, r *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthRoleAdd: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthRoleAddResponse), nil
}
func (s *EtcdServer) RoleGrantPermission(ctx context.Context, r *pb.AuthRoleGrantPermissionRequest) (*pb.AuthRoleGrantPermissionResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthRoleGrantPermission: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthRoleGrantPermissionResponse), nil
}
func (s *EtcdServer) RoleGet(ctx context.Context, r *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthRoleGet: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthRoleGetResponse), nil
}
func (s *EtcdServer) RoleList(ctx context.Context, r *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthRoleList: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthRoleListResponse), nil
}
func (s *EtcdServer) RoleRevokePermission(ctx context.Context, r *pb.AuthRoleRevokePermissionRequest) (*pb.AuthRoleRevokePermissionResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthRoleRevokePermission: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthRoleRevokePermissionResponse), nil
}
func (s *EtcdServer) RoleDelete(ctx context.Context, r *pb.AuthRoleDeleteRequest) (*pb.AuthRoleDeleteResponse, error) {
resp, err := s.raftRequest(ctx, pb.InternalRaftRequest{AuthRoleDelete: r})
if err != nil {
return nil, err
}
return resp.(*pb.AuthRoleDeleteResponse), nil
}
func (s *EtcdServer) raftRequestOnce(ctx context.Context, r pb.InternalRaftRequest) (proto.Message, error) {
result, err := s.processInternalRaftRequestOnce(ctx, r)
if err != nil {
return nil, err
}
if result.err != nil {
return nil, result.err
}
return result.resp, nil
}
func (s *EtcdServer) raftRequest(ctx context.Context, r pb.InternalRaftRequest) (proto.Message, error) {
for {
resp, err := s.raftRequestOnce(ctx, r)
if err != auth.ErrAuthOldRevision {
return resp, err
}
}
}
// doSerialize handles the auth logic, with permissions checked by "chk", for a serialized request "get". Returns a non-nil error on authentication failure.
func (s *EtcdServer) doSerialize(ctx context.Context, chk func(*auth.AuthInfo) error, get func()) error {
for {
ai, err := s.AuthInfoFromCtx(ctx)
if err != nil {
return err
}
if ai == nil {
// chk expects non-nil AuthInfo; use empty credentials
ai = &auth.AuthInfo{}
}
if err = chk(ai); err != nil {
if err == auth.ErrAuthOldRevision {
continue
}
return err
}
// fetch response for serialized request
get()
// empty credentials or current auth info means no need to retry
if ai.Revision == 0 || ai.Revision == s.authStore.Revision() {
return nil
}
// avoid TOCTOU error, retry of the request is required.
}
}
func (s *EtcdServer) processInternalRaftRequestOnce(ctx context.Context, r pb.InternalRaftRequest) (*applyResult, error) {
ai := s.getAppliedIndex()
ci := s.getCommittedIndex()
if ci > ai+maxGapBetweenApplyAndCommitIndex {
return nil, ErrTooManyRequests
}
r.Header = &pb.RequestHeader{
ID: s.reqIDGen.Next(),
}
authInfo, err := s.AuthInfoFromCtx(ctx)
if err != nil {
return nil, err
}
if authInfo != nil {
r.Header.Username = authInfo.Username
r.Header.AuthRevision = authInfo.Revision
}
data, err := r.Marshal()
if err != nil {
return nil, err
}
if len(data) > maxRequestBytes {
return nil, ErrRequestTooLarge
}
id := r.ID
if id == 0 {
id = r.Header.ID
}
ch := s.w.Register(id)
cctx, cancel := context.WithTimeout(ctx, s.Cfg.ReqTimeout())
defer cancel()
start := time.Now()
s.r.Propose(cctx, data)
proposalsPending.Inc()
defer proposalsPending.Dec()
select {
case x := <-ch:
return x.(*applyResult), nil
case <-cctx.Done():
proposalsFailed.Inc()
s.w.Trigger(id, nil) // GC wait
return nil, s.parseProposeCtxErr(cctx.Err(), start)
case <-s.done:
return nil, ErrStopped
}
}
// Watchable returns a watchable interface attached to the etcdserver.
func (s *EtcdServer) Watchable() mvcc.WatchableKV { return s.KV() }
func (s *EtcdServer) linearizableReadLoop() {
var rs raft.ReadState
for {
ctx := make([]byte, 8)
binary.BigEndian.PutUint64(ctx, s.reqIDGen.Next())
select {
case <-s.readwaitc:
case <-s.stopping:
return
}
nextnr := newNotifier()
s.readMu.Lock()
nr := s.readNotifier
s.readNotifier = nextnr
s.readMu.Unlock()
cctx, cancel := context.WithTimeout(context.Background(), s.Cfg.ReqTimeout())
if err := s.r.ReadIndex(cctx, ctx); err != nil {
cancel()
if err == raft.ErrStopped {
return
}
plog.Errorf("failed to get read index from raft: %v", err)
nr.notify(err)
continue
}
cancel()
var (
timeout bool
done bool
)
for !timeout && !done {
select {
case rs = <-s.r.readStateC:
done = bytes.Equal(rs.RequestCtx, ctx)
if !done {
// a previous request might time out. now we should ignore the response of it and
// continue waiting for the response of the current requests.
plog.Warningf("ignored out-of-date read index response (want %v, got %v)", rs.RequestCtx, ctx)
}
case <-time.After(s.Cfg.ReqTimeout()):
plog.Warningf("timed out waiting for read index response")
nr.notify(ErrTimeout)
timeout = true
case <-s.stopping:
return
}
}
if !done {
continue
}
if ai := s.getAppliedIndex(); ai < rs.Index {
select {
case <-s.applyWait.Wait(rs.Index):
case <-s.stopping:
return
}
}
// unblock all l-reads requested at indices before rs.Index
nr.notify(nil)
}
}
func (s *EtcdServer) linearizableReadNotify(ctx context.Context) error {
s.readMu.RLock()
nc := s.readNotifier
s.readMu.RUnlock()
// signal linearizable loop for current notify if it hasn't been already
select {
case s.readwaitc <- struct{}{}:
default:
}
// wait for read state notification
select {
case <-nc.c:
return nc.err
case <-ctx.Done():
return ctx.Err()
case <-s.done:
return ErrStopped
}
}
func (s *EtcdServer) AuthInfoFromCtx(ctx context.Context) (*auth.AuthInfo, error) {
if s.Cfg.ClientCertAuthEnabled {
authInfo := s.AuthStore().AuthInfoFromTLS(ctx)
if authInfo != nil {
return authInfo, nil
}
}
return s.AuthStore().AuthInfoFromCtx(ctx)
}

View File

@@ -1,29 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package main is a simple wrapper of the real etcd entrypoint package
// (located at github.com/coreos/etcd/etcdmain) to ensure that etcd is still
// "go getable"; e.g. `go get github.com/coreos/etcd` works as expected and
// builds a binary in $GOBIN/etcd
//
// This package should NOT be extended or modified in any way; to modify the
// etcd binary, work in the `github.com/coreos/etcd/etcdmain` package.
//
package main
import "github.com/coreos/etcd/etcdmain"
func main() {
etcdmain.Main()
}

View File

@@ -1,16 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package mvcc defines etcd's stable MVCC storage.
package mvcc

View File

@@ -1,219 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mvcc
import (
"sort"
"sync"
"github.com/google/btree"
)
type index interface {
Get(key []byte, atRev int64) (rev, created revision, ver int64, err error)
Range(key, end []byte, atRev int64) ([][]byte, []revision)
Put(key []byte, rev revision)
Tombstone(key []byte, rev revision) error
RangeSince(key, end []byte, rev int64) []revision
Compact(rev int64) map[revision]struct{}
Equal(b index) bool
Insert(ki *keyIndex)
KeyIndex(ki *keyIndex) *keyIndex
}
type treeIndex struct {
sync.RWMutex
tree *btree.BTree
}
func newTreeIndex() index {
return &treeIndex{
tree: btree.New(32),
}
}
func (ti *treeIndex) Put(key []byte, rev revision) {
keyi := &keyIndex{key: key}
ti.Lock()
defer ti.Unlock()
item := ti.tree.Get(keyi)
if item == nil {
keyi.put(rev.main, rev.sub)
ti.tree.ReplaceOrInsert(keyi)
return
}
okeyi := item.(*keyIndex)
okeyi.put(rev.main, rev.sub)
}
func (ti *treeIndex) Get(key []byte, atRev int64) (modified, created revision, ver int64, err error) {
keyi := &keyIndex{key: key}
ti.RLock()
defer ti.RUnlock()
if keyi = ti.keyIndex(keyi); keyi == nil {
return revision{}, revision{}, 0, ErrRevisionNotFound
}
return keyi.get(atRev)
}
func (ti *treeIndex) KeyIndex(keyi *keyIndex) *keyIndex {
ti.RLock()
defer ti.RUnlock()
return ti.keyIndex(keyi)
}
func (ti *treeIndex) keyIndex(keyi *keyIndex) *keyIndex {
if item := ti.tree.Get(keyi); item != nil {
return item.(*keyIndex)
}
return nil
}
func (ti *treeIndex) Range(key, end []byte, atRev int64) (keys [][]byte, revs []revision) {
if end == nil {
rev, _, _, err := ti.Get(key, atRev)
if err != nil {
return nil, nil
}
return [][]byte{key}, []revision{rev}
}
keyi := &keyIndex{key: key}
endi := &keyIndex{key: end}
ti.RLock()
defer ti.RUnlock()
ti.tree.AscendGreaterOrEqual(keyi, func(item btree.Item) bool {
if len(endi.key) > 0 && !item.Less(endi) {
return false
}
curKeyi := item.(*keyIndex)
rev, _, _, err := curKeyi.get(atRev)
if err != nil {
return true
}
revs = append(revs, rev)
keys = append(keys, curKeyi.key)
return true
})
return keys, revs
}
func (ti *treeIndex) Tombstone(key []byte, rev revision) error {
keyi := &keyIndex{key: key}
ti.Lock()
defer ti.Unlock()
item := ti.tree.Get(keyi)
if item == nil {
return ErrRevisionNotFound
}
ki := item.(*keyIndex)
return ki.tombstone(rev.main, rev.sub)
}
// RangeSince returns all revisions from key(including) to end(excluding)
// at or after the given rev. The returned slice is sorted in the order
// of revision.
func (ti *treeIndex) RangeSince(key, end []byte, rev int64) []revision {
ti.RLock()
defer ti.RUnlock()
keyi := &keyIndex{key: key}
if end == nil {
item := ti.tree.Get(keyi)
if item == nil {
return nil
}
keyi = item.(*keyIndex)
return keyi.since(rev)
}
endi := &keyIndex{key: end}
var revs []revision
ti.tree.AscendGreaterOrEqual(keyi, func(item btree.Item) bool {
if len(endi.key) > 0 && !item.Less(endi) {
return false
}
curKeyi := item.(*keyIndex)
revs = append(revs, curKeyi.since(rev)...)
return true
})
sort.Sort(revisions(revs))
return revs
}
func (ti *treeIndex) Compact(rev int64) map[revision]struct{} {
available := make(map[revision]struct{})
var emptyki []*keyIndex
plog.Printf("store.index: compact %d", rev)
// TODO: do not hold the lock for long time?
// This is probably OK. Compacting 10M keys takes O(10ms).
ti.Lock()
defer ti.Unlock()
ti.tree.Ascend(compactIndex(rev, available, &emptyki))
for _, ki := range emptyki {
item := ti.tree.Delete(ki)
if item == nil {
plog.Panic("store.index: unexpected delete failure during compaction")
}
}
return available
}
func compactIndex(rev int64, available map[revision]struct{}, emptyki *[]*keyIndex) func(i btree.Item) bool {
return func(i btree.Item) bool {
keyi := i.(*keyIndex)
keyi.compact(rev, available)
if keyi.isEmpty() {
*emptyki = append(*emptyki, keyi)
}
return true
}
}
func (a *treeIndex) Equal(bi index) bool {
b := bi.(*treeIndex)
if a.tree.Len() != b.tree.Len() {
return false
}
equal := true
a.tree.Ascend(func(item btree.Item) bool {
aki := item.(*keyIndex)
bki := b.tree.Get(item).(*keyIndex)
if !aki.equal(bki) {
equal = false
return false
}
return true
})
return equal
}
func (ti *treeIndex) Insert(ki *keyIndex) {
ti.Lock()
defer ti.Unlock()
ti.tree.ReplaceOrInsert(ki)
}

View File

@@ -1,332 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mvcc
import (
"bytes"
"errors"
"fmt"
"github.com/google/btree"
)
var (
ErrRevisionNotFound = errors.New("mvcc: revision not found")
)
// keyIndex stores the revisions of a key in the backend.
// Each keyIndex has at least one key generation.
// Each generation might have several key versions.
// Tombstone on a key appends an tombstone version at the end
// of the current generation and creates a new empty generation.
// Each version of a key has an index pointing to the backend.
//
// For example: put(1.0);put(2.0);tombstone(3.0);put(4.0);tombstone(5.0) on key "foo"
// generate a keyIndex:
// key: "foo"
// rev: 5
// generations:
// {empty}
// {4.0, 5.0(t)}
// {1.0, 2.0, 3.0(t)}
//
// Compact a keyIndex removes the versions with smaller or equal to
// rev except the largest one. If the generation becomes empty
// during compaction, it will be removed. if all the generations get
// removed, the keyIndex should be removed.
// For example:
// compact(2) on the previous example
// generations:
// {empty}
// {4.0, 5.0(t)}
// {2.0, 3.0(t)}
//
// compact(4)
// generations:
// {empty}
// {4.0, 5.0(t)}
//
// compact(5):
// generations:
// {empty} -> key SHOULD be removed.
//
// compact(6):
// generations:
// {empty} -> key SHOULD be removed.
type keyIndex struct {
key []byte
modified revision // the main rev of the last modification
generations []generation
}
// put puts a revision to the keyIndex.
func (ki *keyIndex) put(main int64, sub int64) {
rev := revision{main: main, sub: sub}
if !rev.GreaterThan(ki.modified) {
plog.Panicf("store.keyindex: put with unexpected smaller revision [%v / %v]", rev, ki.modified)
}
if len(ki.generations) == 0 {
ki.generations = append(ki.generations, generation{})
}
g := &ki.generations[len(ki.generations)-1]
if len(g.revs) == 0 { // create a new key
keysGauge.Inc()
g.created = rev
}
g.revs = append(g.revs, rev)
g.ver++
ki.modified = rev
}
func (ki *keyIndex) restore(created, modified revision, ver int64) {
if len(ki.generations) != 0 {
plog.Panicf("store.keyindex: cannot restore non-empty keyIndex")
}
ki.modified = modified
g := generation{created: created, ver: ver, revs: []revision{modified}}
ki.generations = append(ki.generations, g)
keysGauge.Inc()
}
// tombstone puts a revision, pointing to a tombstone, to the keyIndex.
// It also creates a new empty generation in the keyIndex.
// It returns ErrRevisionNotFound when tombstone on an empty generation.
func (ki *keyIndex) tombstone(main int64, sub int64) error {
if ki.isEmpty() {
plog.Panicf("store.keyindex: unexpected tombstone on empty keyIndex %s", string(ki.key))
}
if ki.generations[len(ki.generations)-1].isEmpty() {
return ErrRevisionNotFound
}
ki.put(main, sub)
ki.generations = append(ki.generations, generation{})
keysGauge.Dec()
return nil
}
// get gets the modified, created revision and version of the key that satisfies the given atRev.
// Rev must be higher than or equal to the given atRev.
func (ki *keyIndex) get(atRev int64) (modified, created revision, ver int64, err error) {
if ki.isEmpty() {
plog.Panicf("store.keyindex: unexpected get on empty keyIndex %s", string(ki.key))
}
g := ki.findGeneration(atRev)
if g.isEmpty() {
return revision{}, revision{}, 0, ErrRevisionNotFound
}
n := g.walk(func(rev revision) bool { return rev.main > atRev })
if n != -1 {
return g.revs[n], g.created, g.ver - int64(len(g.revs)-n-1), nil
}
return revision{}, revision{}, 0, ErrRevisionNotFound
}
// since returns revisions since the given rev. Only the revision with the
// largest sub revision will be returned if multiple revisions have the same
// main revision.
func (ki *keyIndex) since(rev int64) []revision {
if ki.isEmpty() {
plog.Panicf("store.keyindex: unexpected get on empty keyIndex %s", string(ki.key))
}
since := revision{rev, 0}
var gi int
// find the generations to start checking
for gi = len(ki.generations) - 1; gi > 0; gi-- {
g := ki.generations[gi]
if g.isEmpty() {
continue
}
if since.GreaterThan(g.created) {
break
}
}
var revs []revision
var last int64
for ; gi < len(ki.generations); gi++ {
for _, r := range ki.generations[gi].revs {
if since.GreaterThan(r) {
continue
}
if r.main == last {
// replace the revision with a new one that has higher sub value,
// because the original one should not be seen by external
revs[len(revs)-1] = r
continue
}
revs = append(revs, r)
last = r.main
}
}
return revs
}
// compact compacts a keyIndex by removing the versions with smaller or equal
// revision than the given atRev except the largest one (If the largest one is
// a tombstone, it will not be kept).
// If a generation becomes empty during compaction, it will be removed.
func (ki *keyIndex) compact(atRev int64, available map[revision]struct{}) {
if ki.isEmpty() {
plog.Panicf("store.keyindex: unexpected compact on empty keyIndex %s", string(ki.key))
}
// walk until reaching the first revision that has an revision smaller or equal to
// the atRev.
// add it to the available map
f := func(rev revision) bool {
if rev.main <= atRev {
available[rev] = struct{}{}
return false
}
return true
}
i, g := 0, &ki.generations[0]
// find first generation includes atRev or created after atRev
for i < len(ki.generations)-1 {
if tomb := g.revs[len(g.revs)-1].main; tomb > atRev {
break
}
i++
g = &ki.generations[i]
}
if !g.isEmpty() {
n := g.walk(f)
// remove the previous contents.
if n != -1 {
g.revs = g.revs[n:]
}
// remove any tombstone
if len(g.revs) == 1 && i != len(ki.generations)-1 {
delete(available, g.revs[0])
i++
}
}
// remove the previous generations.
ki.generations = ki.generations[i:]
}
func (ki *keyIndex) isEmpty() bool {
return len(ki.generations) == 1 && ki.generations[0].isEmpty()
}
// findGeneration finds out the generation of the keyIndex that the
// given rev belongs to. If the given rev is at the gap of two generations,
// which means that the key does not exist at the given rev, it returns nil.
func (ki *keyIndex) findGeneration(rev int64) *generation {
lastg := len(ki.generations) - 1
cg := lastg
for cg >= 0 {
if len(ki.generations[cg].revs) == 0 {
cg--
continue
}
g := ki.generations[cg]
if cg != lastg {
if tomb := g.revs[len(g.revs)-1].main; tomb <= rev {
return nil
}
}
if g.revs[0].main <= rev {
return &ki.generations[cg]
}
cg--
}
return nil
}
func (a *keyIndex) Less(b btree.Item) bool {
return bytes.Compare(a.key, b.(*keyIndex).key) == -1
}
func (a *keyIndex) equal(b *keyIndex) bool {
if !bytes.Equal(a.key, b.key) {
return false
}
if a.modified != b.modified {
return false
}
if len(a.generations) != len(b.generations) {
return false
}
for i := range a.generations {
ag, bg := a.generations[i], b.generations[i]
if !ag.equal(bg) {
return false
}
}
return true
}
func (ki *keyIndex) String() string {
var s string
for _, g := range ki.generations {
s += g.String()
}
return s
}
// generation contains multiple revisions of a key.
type generation struct {
ver int64
created revision // when the generation is created (put in first revision).
revs []revision
}
func (g *generation) isEmpty() bool { return g == nil || len(g.revs) == 0 }
// walk walks through the revisions in the generation in descending order.
// It passes the revision to the given function.
// walk returns until: 1. it finishes walking all pairs 2. the function returns false.
// walk returns the position at where it stopped. If it stopped after
// finishing walking, -1 will be returned.
func (g *generation) walk(f func(rev revision) bool) int {
l := len(g.revs)
for i := range g.revs {
ok := f(g.revs[l-i-1])
if !ok {
return l - i - 1
}
}
return -1
}
func (g *generation) String() string {
return fmt.Sprintf("g: created[%d] ver[%d], revs %#v\n", g.created, g.ver, g.revs)
}
func (a generation) equal(b generation) bool {
if a.ver != b.ver {
return false
}
if len(a.revs) != len(b.revs) {
return false
}
for i := range a.revs {
ar, br := a.revs[i], b.revs[i]
if ar != br {
return false
}
}
return true
}

View File

@@ -1,147 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mvcc
import (
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/mvcc/backend"
"github.com/coreos/etcd/mvcc/mvccpb"
)
type RangeOptions struct {
Limit int64
Rev int64
Count bool
}
type RangeResult struct {
KVs []mvccpb.KeyValue
Rev int64
Count int
}
type ReadView interface {
// FirstRev returns the first KV revision at the time of opening the txn.
// After a compaction, the first revision increases to the compaction
// revision.
FirstRev() int64
// Rev returns the revision of the KV at the time of opening the txn.
Rev() int64
// Range gets the keys in the range at rangeRev.
// The returned rev is the current revision of the KV when the operation is executed.
// If rangeRev <=0, range gets the keys at currentRev.
// If `end` is nil, the request returns the key.
// If `end` is not nil and not empty, it gets the keys in range [key, range_end).
// If `end` is not nil and empty, it gets the keys greater than or equal to key.
// Limit limits the number of keys returned.
// If the required rev is compacted, ErrCompacted will be returned.
Range(key, end []byte, ro RangeOptions) (r *RangeResult, err error)
}
// TxnRead represents a read-only transaction with operations that will not
// block other read transactions.
type TxnRead interface {
ReadView
// End marks the transaction is complete and ready to commit.
End()
}
type WriteView interface {
// DeleteRange deletes the given range from the store.
// A deleteRange increases the rev of the store if any key in the range exists.
// The number of key deleted will be returned.
// The returned rev is the current revision of the KV when the operation is executed.
// It also generates one event for each key delete in the event history.
// if the `end` is nil, deleteRange deletes the key.
// if the `end` is not nil, deleteRange deletes the keys in range [key, range_end).
DeleteRange(key, end []byte) (n, rev int64)
// Put puts the given key, value into the store. Put also takes additional argument lease to
// attach a lease to a key-value pair as meta-data. KV implementation does not validate the lease
// id.
// A put also increases the rev of the store, and generates one event in the event history.
// The returned rev is the current revision of the KV when the operation is executed.
Put(key, value []byte, lease lease.LeaseID) (rev int64)
}
// TxnWrite represents a transaction that can modify the store.
type TxnWrite interface {
TxnRead
WriteView
// Changes gets the changes made since opening the write txn.
Changes() []mvccpb.KeyValue
}
// txnReadWrite coerces a read txn to a write, panicking on any write operation.
type txnReadWrite struct{ TxnRead }
func (trw *txnReadWrite) DeleteRange(key, end []byte) (n, rev int64) { panic("unexpected DeleteRange") }
func (trw *txnReadWrite) Put(key, value []byte, lease lease.LeaseID) (rev int64) {
panic("unexpected Put")
}
func (trw *txnReadWrite) Changes() []mvccpb.KeyValue { return nil }
func NewReadOnlyTxnWrite(txn TxnRead) TxnWrite { return &txnReadWrite{txn} }
type KV interface {
ReadView
WriteView
// Read creates a read transaction.
Read() TxnRead
// Write creates a write transaction.
Write() TxnWrite
// Hash retrieves the hash of KV state and revision.
// This method is designed for consistency checking purposes.
Hash() (hash uint32, revision int64, err error)
// Compact frees all superseded keys with revisions less than rev.
Compact(rev int64) (<-chan struct{}, error)
// Commit commits outstanding txns into the underlying backend.
Commit()
// Restore restores the KV store from a backend.
Restore(b backend.Backend) error
Close() error
}
// WatchableKV is a KV that can be watched.
type WatchableKV interface {
KV
Watchable
}
// Watchable is the interface that wraps the NewWatchStream function.
type Watchable interface {
// NewWatchStream returns a WatchStream that can be used to
// watch events happened or happening on the KV.
NewWatchStream() WatchStream
}
// ConsistentWatchableKV is a WatchableKV that understands the consistency
// algorithm and consistent index.
// If the consistent index of executing entry is not larger than the
// consistent index of ConsistentWatchableKV, all operations in
// this entry are skipped and return empty response.
type ConsistentWatchableKV interface {
WatchableKV
// ConsistentIndex returns the current consistent index of the KV.
ConsistentIndex() uint64
}

View File

@@ -1,53 +0,0 @@
// Copyright 2017 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mvcc
import (
"github.com/coreos/etcd/lease"
)
type readView struct{ kv KV }
func (rv *readView) FirstRev() int64 {
tr := rv.kv.Read()
defer tr.End()
return tr.FirstRev()
}
func (rv *readView) Rev() int64 {
tr := rv.kv.Read()
defer tr.End()
return tr.Rev()
}
func (rv *readView) Range(key, end []byte, ro RangeOptions) (r *RangeResult, err error) {
tr := rv.kv.Read()
defer tr.End()
return tr.Range(key, end, ro)
}
type writeView struct{ kv KV }
func (wv *writeView) DeleteRange(key, end []byte) (n, rev int64) {
tw := wv.kv.Write()
defer tw.End()
return tw.DeleteRange(key, end)
}
func (wv *writeView) Put(key, value []byte, lease lease.LeaseID) (rev int64) {
tw := wv.kv.Write()
defer tw.End()
return tw.Put(key, value, lease)
}

View File

@@ -1,459 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mvcc
import (
"encoding/binary"
"errors"
"math"
"sync"
"time"
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/mvcc/backend"
"github.com/coreos/etcd/mvcc/mvccpb"
"github.com/coreos/etcd/pkg/schedule"
"github.com/coreos/pkg/capnslog"
"golang.org/x/net/context"
)
var (
keyBucketName = []byte("key")
metaBucketName = []byte("meta")
consistentIndexKeyName = []byte("consistent_index")
scheduledCompactKeyName = []byte("scheduledCompactRev")
finishedCompactKeyName = []byte("finishedCompactRev")
ErrCompacted = errors.New("mvcc: required revision has been compacted")
ErrFutureRev = errors.New("mvcc: required revision is a future revision")
ErrCanceled = errors.New("mvcc: watcher is canceled")
ErrClosed = errors.New("mvcc: closed")
plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "mvcc")
)
const (
// markedRevBytesLen is the byte length of marked revision.
// The first `revBytesLen` bytes represents a normal revision. The last
// one byte is the mark.
markedRevBytesLen = revBytesLen + 1
markBytePosition = markedRevBytesLen - 1
markTombstone byte = 't'
)
var restoreChunkKeys = 10000 // non-const for testing
// ConsistentIndexGetter is an interface that wraps the Get method.
// Consistent index is the offset of an entry in a consistent replicated log.
type ConsistentIndexGetter interface {
// ConsistentIndex returns the consistent index of current executing entry.
ConsistentIndex() uint64
}
type store struct {
ReadView
WriteView
// mu read locks for txns and write locks for non-txn store changes.
mu sync.RWMutex
ig ConsistentIndexGetter
b backend.Backend
kvindex index
le lease.Lessor
// revMuLock protects currentRev and compactMainRev.
// Locked at end of write txn and released after write txn unlock lock.
// Locked before locking read txn and released after locking.
revMu sync.RWMutex
// currentRev is the revision of the last completed transaction.
currentRev int64
// compactMainRev is the main revision of the last compaction.
compactMainRev int64
// bytesBuf8 is a byte slice of length 8
// to avoid a repetitive allocation in saveIndex.
bytesBuf8 []byte
fifoSched schedule.Scheduler
stopc chan struct{}
}
// NewStore returns a new store. It is useful to create a store inside
// mvcc pkg. It should only be used for testing externally.
func NewStore(b backend.Backend, le lease.Lessor, ig ConsistentIndexGetter) *store {
s := &store{
b: b,
ig: ig,
kvindex: newTreeIndex(),
le: le,
currentRev: 1,
compactMainRev: -1,
bytesBuf8: make([]byte, 8),
fifoSched: schedule.NewFIFOScheduler(),
stopc: make(chan struct{}),
}
s.ReadView = &readView{s}
s.WriteView = &writeView{s}
if s.le != nil {
s.le.SetRangeDeleter(func() lease.TxnDelete { return s.Write() })
}
tx := s.b.BatchTx()
tx.Lock()
tx.UnsafeCreateBucket(keyBucketName)
tx.UnsafeCreateBucket(metaBucketName)
tx.Unlock()
s.b.ForceCommit()
if err := s.restore(); err != nil {
// TODO: return the error instead of panic here?
panic("failed to recover store from backend")
}
return s
}
func (s *store) compactBarrier(ctx context.Context, ch chan struct{}) {
if ctx == nil || ctx.Err() != nil {
s.mu.Lock()
select {
case <-s.stopc:
default:
f := func(ctx context.Context) { s.compactBarrier(ctx, ch) }
s.fifoSched.Schedule(f)
}
s.mu.Unlock()
return
}
close(ch)
}
func (s *store) Hash() (hash uint32, revision int64, err error) {
s.b.ForceCommit()
h, err := s.b.Hash(DefaultIgnores)
return h, s.currentRev, err
}
func (s *store) Compact(rev int64) (<-chan struct{}, error) {
s.mu.Lock()
defer s.mu.Unlock()
s.revMu.Lock()
defer s.revMu.Unlock()
if rev <= s.compactMainRev {
ch := make(chan struct{})
f := func(ctx context.Context) { s.compactBarrier(ctx, ch) }
s.fifoSched.Schedule(f)
return ch, ErrCompacted
}
if rev > s.currentRev {
return nil, ErrFutureRev
}
start := time.Now()
s.compactMainRev = rev
rbytes := newRevBytes()
revToBytes(revision{main: rev}, rbytes)
tx := s.b.BatchTx()
tx.Lock()
tx.UnsafePut(metaBucketName, scheduledCompactKeyName, rbytes)
tx.Unlock()
// ensure that desired compaction is persisted
s.b.ForceCommit()
keep := s.kvindex.Compact(rev)
ch := make(chan struct{})
var j = func(ctx context.Context) {
if ctx.Err() != nil {
s.compactBarrier(ctx, ch)
return
}
if !s.scheduleCompaction(rev, keep) {
s.compactBarrier(nil, ch)
return
}
close(ch)
}
s.fifoSched.Schedule(j)
indexCompactionPauseDurations.Observe(float64(time.Since(start) / time.Millisecond))
return ch, nil
}
// DefaultIgnores is a map of keys to ignore in hash checking.
var DefaultIgnores map[backend.IgnoreKey]struct{}
func init() {
DefaultIgnores = map[backend.IgnoreKey]struct{}{
// consistent index might be changed due to v2 internal sync, which
// is not controllable by the user.
{Bucket: string(metaBucketName), Key: string(consistentIndexKeyName)}: {},
}
}
func (s *store) Commit() {
s.mu.Lock()
defer s.mu.Unlock()
tx := s.b.BatchTx()
tx.Lock()
s.saveIndex(tx)
tx.Unlock()
s.b.ForceCommit()
}
func (s *store) Restore(b backend.Backend) error {
s.mu.Lock()
defer s.mu.Unlock()
close(s.stopc)
s.fifoSched.Stop()
s.b = b
s.kvindex = newTreeIndex()
s.currentRev = 1
s.compactMainRev = -1
s.fifoSched = schedule.NewFIFOScheduler()
s.stopc = make(chan struct{})
return s.restore()
}
func (s *store) restore() error {
reportDbTotalSizeInBytesMu.Lock()
b := s.b
reportDbTotalSizeInBytes = func() float64 { return float64(b.Size()) }
reportDbTotalSizeInBytesMu.Unlock()
min, max := newRevBytes(), newRevBytes()
revToBytes(revision{main: 1}, min)
revToBytes(revision{main: math.MaxInt64, sub: math.MaxInt64}, max)
keyToLease := make(map[string]lease.LeaseID)
// restore index
tx := s.b.BatchTx()
tx.Lock()
_, finishedCompactBytes := tx.UnsafeRange(metaBucketName, finishedCompactKeyName, nil, 0)
if len(finishedCompactBytes) != 0 {
s.compactMainRev = bytesToRev(finishedCompactBytes[0]).main
plog.Printf("restore compact to %d", s.compactMainRev)
}
_, scheduledCompactBytes := tx.UnsafeRange(metaBucketName, scheduledCompactKeyName, nil, 0)
scheduledCompact := int64(0)
if len(scheduledCompactBytes) != 0 {
scheduledCompact = bytesToRev(scheduledCompactBytes[0]).main
}
// index keys concurrently as they're loaded in from tx
keysGauge.Set(0)
rkvc, revc := restoreIntoIndex(s.kvindex)
for {
keys, vals := tx.UnsafeRange(keyBucketName, min, max, int64(restoreChunkKeys))
if len(keys) == 0 {
break
}
// rkvc blocks if the total pending keys exceeds the restore
// chunk size to keep keys from consuming too much memory.
restoreChunk(rkvc, keys, vals, keyToLease)
if len(keys) < restoreChunkKeys {
// partial set implies final set
break
}
// next set begins after where this one ended
newMin := bytesToRev(keys[len(keys)-1][:revBytesLen])
newMin.sub++
revToBytes(newMin, min)
}
close(rkvc)
s.currentRev = <-revc
// keys in the range [compacted revision -N, compaction] might all be deleted due to compaction.
// the correct revision should be set to compaction revision in the case, not the largest revision
// we have seen.
if s.currentRev < s.compactMainRev {
s.currentRev = s.compactMainRev
}
if scheduledCompact <= s.compactMainRev {
scheduledCompact = 0
}
for key, lid := range keyToLease {
if s.le == nil {
panic("no lessor to attach lease")
}
err := s.le.Attach(lid, []lease.LeaseItem{{Key: key}})
if err != nil {
plog.Errorf("unexpected Attach error: %v", err)
}
}
tx.Unlock()
if scheduledCompact != 0 {
s.Compact(scheduledCompact)
plog.Printf("resume scheduled compaction at %d", scheduledCompact)
}
return nil
}
type revKeyValue struct {
key []byte
kv mvccpb.KeyValue
kstr string
}
func restoreIntoIndex(idx index) (chan<- revKeyValue, <-chan int64) {
rkvc, revc := make(chan revKeyValue, restoreChunkKeys), make(chan int64, 1)
go func() {
currentRev := int64(1)
defer func() { revc <- currentRev }()
// restore the tree index from streaming the unordered index.
kiCache := make(map[string]*keyIndex, restoreChunkKeys)
for rkv := range rkvc {
ki, ok := kiCache[rkv.kstr]
// purge kiCache if many keys but still missing in the cache
if !ok && len(kiCache) >= restoreChunkKeys {
i := 10
for k := range kiCache {
delete(kiCache, k)
if i--; i == 0 {
break
}
}
}
// cache miss, fetch from tree index if there
if !ok {
ki = &keyIndex{key: rkv.kv.Key}
if idxKey := idx.KeyIndex(ki); idxKey != nil {
kiCache[rkv.kstr], ki = idxKey, idxKey
ok = true
}
}
rev := bytesToRev(rkv.key)
currentRev = rev.main
if ok {
if isTombstone(rkv.key) {
ki.tombstone(rev.main, rev.sub)
continue
}
ki.put(rev.main, rev.sub)
} else if !isTombstone(rkv.key) {
ki.restore(revision{rkv.kv.CreateRevision, 0}, rev, rkv.kv.Version)
idx.Insert(ki)
kiCache[rkv.kstr] = ki
}
}
}()
return rkvc, revc
}
func restoreChunk(kvc chan<- revKeyValue, keys, vals [][]byte, keyToLease map[string]lease.LeaseID) {
for i, key := range keys {
rkv := revKeyValue{key: key}
if err := rkv.kv.Unmarshal(vals[i]); err != nil {
plog.Fatalf("cannot unmarshal event: %v", err)
}
rkv.kstr = string(rkv.kv.Key)
if isTombstone(key) {
delete(keyToLease, rkv.kstr)
} else if lid := lease.LeaseID(rkv.kv.Lease); lid != lease.NoLease {
keyToLease[rkv.kstr] = lid
} else {
delete(keyToLease, rkv.kstr)
}
kvc <- rkv
}
}
func (s *store) Close() error {
close(s.stopc)
s.fifoSched.Stop()
return nil
}
func (a *store) Equal(b *store) bool {
if a.currentRev != b.currentRev {
return false
}
if a.compactMainRev != b.compactMainRev {
return false
}
return a.kvindex.Equal(b.kvindex)
}
func (s *store) saveIndex(tx backend.BatchTx) {
if s.ig == nil {
return
}
bs := s.bytesBuf8
binary.BigEndian.PutUint64(bs, s.ig.ConsistentIndex())
// put the index into the underlying backend
// tx has been locked in TxnBegin, so there is no need to lock it again
tx.UnsafePut(metaBucketName, consistentIndexKeyName, bs)
}
func (s *store) ConsistentIndex() uint64 {
// TODO: cache index in a uint64 field?
tx := s.b.BatchTx()
tx.Lock()
defer tx.Unlock()
_, vs := tx.UnsafeRange(metaBucketName, consistentIndexKeyName, nil, 0)
if len(vs) == 0 {
return 0
}
return binary.BigEndian.Uint64(vs[0])
}
// appendMarkTombstone appends tombstone mark to normal revision bytes.
func appendMarkTombstone(b []byte) []byte {
if len(b) != revBytesLen {
plog.Panicf("cannot append mark to non normal revision bytes")
}
return append(b, markTombstone)
}
// isTombstone checks whether the revision bytes is a tombstone.
func isTombstone(b []byte) bool {
return len(b) == markedRevBytesLen && b[markBytePosition] == markTombstone
}
// revBytesRange returns the range of revision bytes at
// the given revision.
func revBytesRange(rev revision) (start, end []byte) {
start = newRevBytes()
revToBytes(rev, start)
end = newRevBytes()
endRev := revision{main: rev.main, sub: rev.sub + 1}
revToBytes(endRev, end)
return start, end
}

View File

@@ -1,66 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mvcc
import (
"encoding/binary"
"time"
)
func (s *store) scheduleCompaction(compactMainRev int64, keep map[revision]struct{}) bool {
totalStart := time.Now()
defer dbCompactionTotalDurations.Observe(float64(time.Since(totalStart) / time.Millisecond))
end := make([]byte, 8)
binary.BigEndian.PutUint64(end, uint64(compactMainRev+1))
batchsize := int64(10000)
last := make([]byte, 8+1+8)
for {
var rev revision
start := time.Now()
tx := s.b.BatchTx()
tx.Lock()
keys, _ := tx.UnsafeRange(keyBucketName, last, end, batchsize)
for _, key := range keys {
rev = bytesToRev(key)
if _, ok := keep[rev]; !ok {
tx.UnsafeDelete(keyBucketName, key)
}
}
if len(keys) < int(batchsize) {
rbytes := make([]byte, 8+1+8)
revToBytes(revision{main: compactMainRev}, rbytes)
tx.UnsafePut(metaBucketName, finishedCompactKeyName, rbytes)
tx.Unlock()
plog.Printf("finished scheduled compaction at %d (took %v)", compactMainRev, time.Since(totalStart))
return true
}
// update last
revToBytes(revision{main: rev.main, sub: rev.sub + 1}, last)
tx.Unlock()
dbCompactionPauseDurations.Observe(float64(time.Since(start) / time.Millisecond))
select {
case <-time.After(100 * time.Millisecond):
case <-s.stopc:
return false
}
}
}

View File

@@ -1,253 +0,0 @@
// Copyright 2017 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mvcc
import (
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/mvcc/backend"
"github.com/coreos/etcd/mvcc/mvccpb"
)
type storeTxnRead struct {
s *store
tx backend.ReadTx
firstRev int64
rev int64
}
func (s *store) Read() TxnRead {
s.mu.RLock()
tx := s.b.ReadTx()
s.revMu.RLock()
tx.Lock()
firstRev, rev := s.compactMainRev, s.currentRev
s.revMu.RUnlock()
return newMetricsTxnRead(&storeTxnRead{s, tx, firstRev, rev})
}
func (tr *storeTxnRead) FirstRev() int64 { return tr.firstRev }
func (tr *storeTxnRead) Rev() int64 { return tr.rev }
func (tr *storeTxnRead) Range(key, end []byte, ro RangeOptions) (r *RangeResult, err error) {
return tr.rangeKeys(key, end, tr.Rev(), ro)
}
func (tr *storeTxnRead) End() {
tr.tx.Unlock()
tr.s.mu.RUnlock()
}
type storeTxnWrite struct {
*storeTxnRead
tx backend.BatchTx
// beginRev is the revision where the txn begins; it will write to the next revision.
beginRev int64
changes []mvccpb.KeyValue
}
func (s *store) Write() TxnWrite {
s.mu.RLock()
tx := s.b.BatchTx()
tx.Lock()
tw := &storeTxnWrite{
storeTxnRead: &storeTxnRead{s, tx, 0, 0},
tx: tx,
beginRev: s.currentRev,
changes: make([]mvccpb.KeyValue, 0, 4),
}
return newMetricsTxnWrite(tw)
}
func (tw *storeTxnWrite) Rev() int64 { return tw.beginRev }
func (tw *storeTxnWrite) Range(key, end []byte, ro RangeOptions) (r *RangeResult, err error) {
rev := tw.beginRev
if len(tw.changes) > 0 {
rev++
}
return tw.rangeKeys(key, end, rev, ro)
}
func (tw *storeTxnWrite) DeleteRange(key, end []byte) (int64, int64) {
if n := tw.deleteRange(key, end); n != 0 || len(tw.changes) > 0 {
return n, int64(tw.beginRev + 1)
}
return 0, int64(tw.beginRev)
}
func (tw *storeTxnWrite) Put(key, value []byte, lease lease.LeaseID) int64 {
tw.put(key, value, lease)
return int64(tw.beginRev + 1)
}
func (tw *storeTxnWrite) End() {
// only update index if the txn modifies the mvcc state.
if len(tw.changes) != 0 {
tw.s.saveIndex(tw.tx)
// hold revMu lock to prevent new read txns from opening until writeback.
tw.s.revMu.Lock()
tw.s.currentRev++
}
tw.tx.Unlock()
if len(tw.changes) != 0 {
tw.s.revMu.Unlock()
}
tw.s.mu.RUnlock()
}
func (tr *storeTxnRead) rangeKeys(key, end []byte, curRev int64, ro RangeOptions) (*RangeResult, error) {
rev := ro.Rev
if rev > curRev {
return &RangeResult{KVs: nil, Count: -1, Rev: curRev}, ErrFutureRev
}
if rev <= 0 {
rev = curRev
}
if rev < tr.s.compactMainRev {
return &RangeResult{KVs: nil, Count: -1, Rev: 0}, ErrCompacted
}
_, revpairs := tr.s.kvindex.Range(key, end, int64(rev))
if len(revpairs) == 0 {
return &RangeResult{KVs: nil, Count: 0, Rev: curRev}, nil
}
if ro.Count {
return &RangeResult{KVs: nil, Count: len(revpairs), Rev: curRev}, nil
}
var kvs []mvccpb.KeyValue
for _, revpair := range revpairs {
start, end := revBytesRange(revpair)
_, vs := tr.tx.UnsafeRange(keyBucketName, start, end, 0)
if len(vs) != 1 {
plog.Fatalf("range cannot find rev (%d,%d)", revpair.main, revpair.sub)
}
var kv mvccpb.KeyValue
if err := kv.Unmarshal(vs[0]); err != nil {
plog.Fatalf("cannot unmarshal event: %v", err)
}
kvs = append(kvs, kv)
if ro.Limit > 0 && len(kvs) >= int(ro.Limit) {
break
}
}
return &RangeResult{KVs: kvs, Count: len(revpairs), Rev: curRev}, nil
}
func (tw *storeTxnWrite) put(key, value []byte, leaseID lease.LeaseID) {
rev := tw.beginRev + 1
c := rev
oldLease := lease.NoLease
// if the key exists before, use its previous created and
// get its previous leaseID
_, created, ver, err := tw.s.kvindex.Get(key, rev)
if err == nil {
c = created.main
oldLease = tw.s.le.GetLease(lease.LeaseItem{Key: string(key)})
}
ibytes := newRevBytes()
idxRev := revision{main: rev, sub: int64(len(tw.changes))}
revToBytes(idxRev, ibytes)
ver = ver + 1
kv := mvccpb.KeyValue{
Key: key,
Value: value,
CreateRevision: c,
ModRevision: rev,
Version: ver,
Lease: int64(leaseID),
}
d, err := kv.Marshal()
if err != nil {
plog.Fatalf("cannot marshal event: %v", err)
}
tw.tx.UnsafeSeqPut(keyBucketName, ibytes, d)
tw.s.kvindex.Put(key, idxRev)
tw.changes = append(tw.changes, kv)
if oldLease != lease.NoLease {
if tw.s.le == nil {
panic("no lessor to detach lease")
}
err = tw.s.le.Detach(oldLease, []lease.LeaseItem{{Key: string(key)}})
if err != nil {
plog.Errorf("unexpected error from lease detach: %v", err)
}
}
if leaseID != lease.NoLease {
if tw.s.le == nil {
panic("no lessor to attach lease")
}
err = tw.s.le.Attach(leaseID, []lease.LeaseItem{{Key: string(key)}})
if err != nil {
panic("unexpected error from lease Attach")
}
}
}
func (tw *storeTxnWrite) deleteRange(key, end []byte) int64 {
rrev := tw.beginRev
if len(tw.changes) > 0 {
rrev += 1
}
keys, revs := tw.s.kvindex.Range(key, end, rrev)
if len(keys) == 0 {
return 0
}
for i, key := range keys {
tw.delete(key, revs[i])
}
return int64(len(keys))
}
func (tw *storeTxnWrite) delete(key []byte, rev revision) {
ibytes := newRevBytes()
idxRev := revision{main: tw.beginRev + 1, sub: int64(len(tw.changes))}
revToBytes(idxRev, ibytes)
ibytes = appendMarkTombstone(ibytes)
kv := mvccpb.KeyValue{Key: key}
d, err := kv.Marshal()
if err != nil {
plog.Fatalf("cannot marshal event: %v", err)
}
tw.tx.UnsafeSeqPut(keyBucketName, ibytes, d)
err = tw.s.kvindex.Tombstone(key, idxRev)
if err != nil {
plog.Fatalf("cannot tombstone an existing key (%s): %v", string(key), err)
}
tw.changes = append(tw.changes, kv)
item := lease.LeaseItem{Key: string(key)}
leaseID := tw.s.le.GetLease(item)
if leaseID != lease.NoLease {
err = tw.s.le.Detach(leaseID, []lease.LeaseItem{item})
if err != nil {
plog.Errorf("cannot detach %v", err)
}
}
}
func (tw *storeTxnWrite) Changes() []mvccpb.KeyValue { return tw.changes }

View File

@@ -1,174 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mvcc
import (
"sync"
"github.com/prometheus/client_golang/prometheus"
)
var (
rangeCounter = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "range_total",
Help: "Total number of ranges seen by this member.",
})
putCounter = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "put_total",
Help: "Total number of puts seen by this member.",
})
deleteCounter = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "delete_total",
Help: "Total number of deletes seen by this member.",
})
txnCounter = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "txn_total",
Help: "Total number of txns seen by this member.",
})
keysGauge = prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "keys_total",
Help: "Total number of keys.",
})
watchStreamGauge = prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "watch_stream_total",
Help: "Total number of watch streams.",
})
watcherGauge = prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "watcher_total",
Help: "Total number of watchers.",
})
slowWatcherGauge = prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "slow_watcher_total",
Help: "Total number of unsynced slow watchers.",
})
totalEventsCounter = prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "events_total",
Help: "Total number of events sent by this member.",
})
pendingEventsGauge = prometheus.NewGauge(
prometheus.GaugeOpts{
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "pending_events_total",
Help: "Total number of pending events to be sent.",
})
indexCompactionPauseDurations = prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "index_compaction_pause_duration_milliseconds",
Help: "Bucketed histogram of index compaction pause duration.",
// 0.5ms -> 1second
Buckets: prometheus.ExponentialBuckets(0.5, 2, 12),
})
dbCompactionPauseDurations = prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "db_compaction_pause_duration_milliseconds",
Help: "Bucketed histogram of db compaction pause duration.",
// 1ms -> 4second
Buckets: prometheus.ExponentialBuckets(1, 2, 13),
})
dbCompactionTotalDurations = prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "db_compaction_total_duration_milliseconds",
Help: "Bucketed histogram of db compaction total duration.",
// 100ms -> 800second
Buckets: prometheus.ExponentialBuckets(100, 2, 14),
})
dbTotalSize = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "db_total_size_in_bytes",
Help: "Total size of the underlying database in bytes.",
},
func() float64 {
reportDbTotalSizeInBytesMu.RLock()
defer reportDbTotalSizeInBytesMu.RUnlock()
return reportDbTotalSizeInBytes()
},
)
// overridden by mvcc initialization
reportDbTotalSizeInBytesMu sync.RWMutex
reportDbTotalSizeInBytes func() float64 = func() float64 { return 0 }
)
func init() {
prometheus.MustRegister(rangeCounter)
prometheus.MustRegister(putCounter)
prometheus.MustRegister(deleteCounter)
prometheus.MustRegister(txnCounter)
prometheus.MustRegister(keysGauge)
prometheus.MustRegister(watchStreamGauge)
prometheus.MustRegister(watcherGauge)
prometheus.MustRegister(slowWatcherGauge)
prometheus.MustRegister(totalEventsCounter)
prometheus.MustRegister(pendingEventsGauge)
prometheus.MustRegister(indexCompactionPauseDurations)
prometheus.MustRegister(dbCompactionPauseDurations)
prometheus.MustRegister(dbCompactionTotalDurations)
prometheus.MustRegister(dbTotalSize)
}
// ReportEventReceived reports that an event is received.
// This function should be called when the external systems received an
// event from mvcc.Watcher.
func ReportEventReceived(n int) {
pendingEventsGauge.Sub(float64(n))
totalEventsCounter.Add(float64(n))
}

View File

@@ -1,67 +0,0 @@
// Copyright 2017 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mvcc
import (
"github.com/coreos/etcd/lease"
)
type metricsTxnWrite struct {
TxnWrite
ranges uint
puts uint
deletes uint
}
func newMetricsTxnRead(tr TxnRead) TxnRead {
return &metricsTxnWrite{&txnReadWrite{tr}, 0, 0, 0}
}
func newMetricsTxnWrite(tw TxnWrite) TxnWrite {
return &metricsTxnWrite{tw, 0, 0, 0}
}
func (tw *metricsTxnWrite) Range(key, end []byte, ro RangeOptions) (*RangeResult, error) {
tw.ranges++
return tw.TxnWrite.Range(key, end, ro)
}
func (tw *metricsTxnWrite) DeleteRange(key, end []byte) (n, rev int64) {
tw.deletes++
return tw.TxnWrite.DeleteRange(key, end)
}
func (tw *metricsTxnWrite) Put(key, value []byte, lease lease.LeaseID) (rev int64) {
tw.puts++
return tw.TxnWrite.Put(key, value, lease)
}
func (tw *metricsTxnWrite) End() {
defer tw.TxnWrite.End()
if sum := tw.ranges + tw.puts + tw.deletes; sum != 1 {
if sum > 1 {
txnCounter.Inc()
}
return
}
switch {
case tw.ranges == 1:
rangeCounter.Inc()
case tw.puts == 1:
putCounter.Inc()
case tw.deletes == 1:
deleteCounter.Inc()
}
}

View File

@@ -1,67 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mvcc
import "encoding/binary"
// revBytesLen is the byte length of a normal revision.
// First 8 bytes is the revision.main in big-endian format. The 9th byte
// is a '_'. The last 8 bytes is the revision.sub in big-endian format.
const revBytesLen = 8 + 1 + 8
// A revision indicates modification of the key-value space.
// The set of changes that share same main revision changes the key-value space atomically.
type revision struct {
// main is the main revision of a set of changes that happen atomically.
main int64
// sub is the the sub revision of a change in a set of changes that happen
// atomically. Each change has different increasing sub revision in that
// set.
sub int64
}
func (a revision) GreaterThan(b revision) bool {
if a.main > b.main {
return true
}
if a.main < b.main {
return false
}
return a.sub > b.sub
}
func newRevBytes() []byte {
return make([]byte, revBytesLen, markedRevBytesLen)
}
func revToBytes(rev revision, bytes []byte) {
binary.BigEndian.PutUint64(bytes, uint64(rev.main))
bytes[8] = '_'
binary.BigEndian.PutUint64(bytes[9:], uint64(rev.sub))
}
func bytesToRev(bytes []byte) revision {
return revision{
main: int64(binary.BigEndian.Uint64(bytes[0:8])),
sub: int64(binary.BigEndian.Uint64(bytes[9:])),
}
}
type revisions []revision
func (a revisions) Len() int { return len(a) }
func (a revisions) Less(i, j int) bool { return a[j].GreaterThan(a[i]) }
func (a revisions) Swap(i, j int) { a[i], a[j] = a[j], a[i] }

View File

@@ -1,56 +0,0 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mvcc
import (
"encoding/binary"
"github.com/coreos/etcd/mvcc/backend"
"github.com/coreos/etcd/mvcc/mvccpb"
)
func UpdateConsistentIndex(be backend.Backend, index uint64) {
tx := be.BatchTx()
tx.Lock()
defer tx.Unlock()
var oldi uint64
_, vs := tx.UnsafeRange(metaBucketName, consistentIndexKeyName, nil, 0)
if len(vs) != 0 {
oldi = binary.BigEndian.Uint64(vs[0])
}
if index <= oldi {
return
}
bs := make([]byte, 8)
binary.BigEndian.PutUint64(bs, index)
tx.UnsafePut(metaBucketName, consistentIndexKeyName, bs)
}
func WriteKV(be backend.Backend, kv mvccpb.KeyValue) {
ibytes := newRevBytes()
revToBytes(revision{main: kv.ModRevision}, ibytes)
d, err := kv.Marshal()
if err != nil {
plog.Fatalf("cannot marshal event: %v", err)
}
be.BatchTx().Lock()
be.BatchTx().UnsafePut(keyBucketName, ibytes, d)
be.BatchTx().Unlock()
}

View File

@@ -1,522 +0,0 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mvcc
import (
"sync"
"time"
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/mvcc/backend"
"github.com/coreos/etcd/mvcc/mvccpb"
)
const (
// chanBufLen is the length of the buffered chan
// for sending out watched events.
// TODO: find a good buf value. 1024 is just a random one that
// seems to be reasonable.
chanBufLen = 1024
// maxWatchersPerSync is the number of watchers to sync in a single batch
maxWatchersPerSync = 512
)
type watchable interface {
watch(key, end []byte, startRev int64, id WatchID, ch chan<- WatchResponse, fcs ...FilterFunc) (*watcher, cancelFunc)
progress(w *watcher)
rev() int64
}
type watchableStore struct {
*store
// mu protects watcher groups and batches. It should never be locked
// before locking store.mu to avoid deadlock.
mu sync.RWMutex
// victims are watcher batches that were blocked on the watch channel
victims []watcherBatch
victimc chan struct{}
// contains all unsynced watchers that needs to sync with events that have happened
unsynced watcherGroup
// contains all synced watchers that are in sync with the progress of the store.
// The key of the map is the key that the watcher watches on.
synced watcherGroup
stopc chan struct{}
wg sync.WaitGroup
}
// cancelFunc updates unsynced and synced maps when running
// cancel operations.
type cancelFunc func()
func New(b backend.Backend, le lease.Lessor, ig ConsistentIndexGetter) ConsistentWatchableKV {
return newWatchableStore(b, le, ig)
}
func newWatchableStore(b backend.Backend, le lease.Lessor, ig ConsistentIndexGetter) *watchableStore {
s := &watchableStore{
store: NewStore(b, le, ig),
victimc: make(chan struct{}, 1),
unsynced: newWatcherGroup(),
synced: newWatcherGroup(),
stopc: make(chan struct{}),
}
s.store.ReadView = &readView{s}
s.store.WriteView = &writeView{s}
if s.le != nil {
// use this store as the deleter so revokes trigger watch events
s.le.SetRangeDeleter(func() lease.TxnDelete { return s.Write() })
}
s.wg.Add(2)
go s.syncWatchersLoop()
go s.syncVictimsLoop()
return s
}
func (s *watchableStore) Close() error {
close(s.stopc)
s.wg.Wait()
return s.store.Close()
}
func (s *watchableStore) NewWatchStream() WatchStream {
watchStreamGauge.Inc()
return &watchStream{
watchable: s,
ch: make(chan WatchResponse, chanBufLen),
cancels: make(map[WatchID]cancelFunc),
watchers: make(map[WatchID]*watcher),
}
}
func (s *watchableStore) watch(key, end []byte, startRev int64, id WatchID, ch chan<- WatchResponse, fcs ...FilterFunc) (*watcher, cancelFunc) {
wa := &watcher{
key: key,
end: end,
minRev: startRev,
id: id,
ch: ch,
fcs: fcs,
}
s.mu.Lock()
s.revMu.RLock()
synced := startRev > s.store.currentRev || startRev == 0
if synced {
wa.minRev = s.store.currentRev + 1
if startRev > wa.minRev {
wa.minRev = startRev
}
}
if synced {
s.synced.add(wa)
} else {
slowWatcherGauge.Inc()
s.unsynced.add(wa)
}
s.revMu.RUnlock()
s.mu.Unlock()
watcherGauge.Inc()
return wa, func() { s.cancelWatcher(wa) }
}
// cancelWatcher removes references of the watcher from the watchableStore
func (s *watchableStore) cancelWatcher(wa *watcher) {
for {
s.mu.Lock()
if s.unsynced.delete(wa) {
slowWatcherGauge.Dec()
break
} else if s.synced.delete(wa) {
break
} else if wa.compacted {
break
}
if !wa.victim {
panic("watcher not victim but not in watch groups")
}
var victimBatch watcherBatch
for _, wb := range s.victims {
if wb[wa] != nil {
victimBatch = wb
break
}
}
if victimBatch != nil {
slowWatcherGauge.Dec()
delete(victimBatch, wa)
break
}
// victim being processed so not accessible; retry
s.mu.Unlock()
time.Sleep(time.Millisecond)
}
watcherGauge.Dec()
s.mu.Unlock()
}
func (s *watchableStore) Restore(b backend.Backend) error {
s.mu.Lock()
defer s.mu.Unlock()
err := s.store.Restore(b)
if err != nil {
return err
}
for wa := range s.synced.watchers {
s.unsynced.watchers.add(wa)
}
s.synced = newWatcherGroup()
return nil
}
// syncWatchersLoop syncs the watcher in the unsynced map every 100ms.
func (s *watchableStore) syncWatchersLoop() {
defer s.wg.Done()
for {
s.mu.RLock()
st := time.Now()
lastUnsyncedWatchers := s.unsynced.size()
s.mu.RUnlock()
unsyncedWatchers := 0
if lastUnsyncedWatchers > 0 {
unsyncedWatchers = s.syncWatchers()
}
syncDuration := time.Since(st)
waitDuration := 100 * time.Millisecond
// more work pending?
if unsyncedWatchers != 0 && lastUnsyncedWatchers > unsyncedWatchers {
// be fair to other store operations by yielding time taken
waitDuration = syncDuration
}
select {
case <-time.After(waitDuration):
case <-s.stopc:
return
}
}
}
// syncVictimsLoop tries to write precomputed watcher responses to
// watchers that had a blocked watcher channel
func (s *watchableStore) syncVictimsLoop() {
defer s.wg.Done()
for {
for s.moveVictims() != 0 {
// try to update all victim watchers
}
s.mu.RLock()
isEmpty := len(s.victims) == 0
s.mu.RUnlock()
var tickc <-chan time.Time
if !isEmpty {
tickc = time.After(10 * time.Millisecond)
}
select {
case <-tickc:
case <-s.victimc:
case <-s.stopc:
return
}
}
}
// moveVictims tries to update watches with already pending event data
func (s *watchableStore) moveVictims() (moved int) {
s.mu.Lock()
victims := s.victims
s.victims = nil
s.mu.Unlock()
var newVictim watcherBatch
for _, wb := range victims {
// try to send responses again
for w, eb := range wb {
// watcher has observed the store up to, but not including, w.minRev
rev := w.minRev - 1
if w.send(WatchResponse{WatchID: w.id, Events: eb.evs, Revision: rev}) {
pendingEventsGauge.Add(float64(len(eb.evs)))
} else {
if newVictim == nil {
newVictim = make(watcherBatch)
}
newVictim[w] = eb
continue
}
moved++
}
// assign completed victim watchers to unsync/sync
s.mu.Lock()
s.store.revMu.RLock()
curRev := s.store.currentRev
for w, eb := range wb {
if newVictim != nil && newVictim[w] != nil {
// couldn't send watch response; stays victim
continue
}
w.victim = false
if eb.moreRev != 0 {
w.minRev = eb.moreRev
}
if w.minRev <= curRev {
s.unsynced.add(w)
} else {
slowWatcherGauge.Dec()
s.synced.add(w)
}
}
s.store.revMu.RUnlock()
s.mu.Unlock()
}
if len(newVictim) > 0 {
s.mu.Lock()
s.victims = append(s.victims, newVictim)
s.mu.Unlock()
}
return moved
}
// syncWatchers syncs unsynced watchers by:
// 1. choose a set of watchers from the unsynced watcher group
// 2. iterate over the set to get the minimum revision and remove compacted watchers
// 3. use minimum revision to get all key-value pairs and send those events to watchers
// 4. remove synced watchers in set from unsynced group and move to synced group
func (s *watchableStore) syncWatchers() int {
s.mu.Lock()
defer s.mu.Unlock()
if s.unsynced.size() == 0 {
return 0
}
s.store.revMu.RLock()
defer s.store.revMu.RUnlock()
// in order to find key-value pairs from unsynced watchers, we need to
// find min revision index, and these revisions can be used to
// query the backend store of key-value pairs
curRev := s.store.currentRev
compactionRev := s.store.compactMainRev
wg, minRev := s.unsynced.choose(maxWatchersPerSync, curRev, compactionRev)
minBytes, maxBytes := newRevBytes(), newRevBytes()
revToBytes(revision{main: minRev}, minBytes)
revToBytes(revision{main: curRev + 1}, maxBytes)
// UnsafeRange returns keys and values. And in boltdb, keys are revisions.
// values are actual key-value pairs in backend.
tx := s.store.b.ReadTx()
tx.Lock()
revs, vs := tx.UnsafeRange(keyBucketName, minBytes, maxBytes, 0)
evs := kvsToEvents(wg, revs, vs)
tx.Unlock()
var victims watcherBatch
wb := newWatcherBatch(wg, evs)
for w := range wg.watchers {
w.minRev = curRev + 1
eb, ok := wb[w]
if !ok {
// bring un-notified watcher to synced
s.synced.add(w)
s.unsynced.delete(w)
continue
}
if eb.moreRev != 0 {
w.minRev = eb.moreRev
}
if w.send(WatchResponse{WatchID: w.id, Events: eb.evs, Revision: curRev}) {
pendingEventsGauge.Add(float64(len(eb.evs)))
} else {
if victims == nil {
victims = make(watcherBatch)
}
w.victim = true
}
if w.victim {
victims[w] = eb
} else {
if eb.moreRev != 0 {
// stay unsynced; more to read
continue
}
s.synced.add(w)
}
s.unsynced.delete(w)
}
s.addVictim(victims)
vsz := 0
for _, v := range s.victims {
vsz += len(v)
}
slowWatcherGauge.Set(float64(s.unsynced.size() + vsz))
return s.unsynced.size()
}
// kvsToEvents gets all events for the watchers from all key-value pairs
func kvsToEvents(wg *watcherGroup, revs, vals [][]byte) (evs []mvccpb.Event) {
for i, v := range vals {
var kv mvccpb.KeyValue
if err := kv.Unmarshal(v); err != nil {
plog.Panicf("cannot unmarshal event: %v", err)
}
if !wg.contains(string(kv.Key)) {
continue
}
ty := mvccpb.PUT
if isTombstone(revs[i]) {
ty = mvccpb.DELETE
// patch in mod revision so watchers won't skip
kv.ModRevision = bytesToRev(revs[i]).main
}
evs = append(evs, mvccpb.Event{Kv: &kv, Type: ty})
}
return evs
}
// notify notifies the fact that given event at the given rev just happened to
// watchers that watch on the key of the event.
func (s *watchableStore) notify(rev int64, evs []mvccpb.Event) {
var victim watcherBatch
for w, eb := range newWatcherBatch(&s.synced, evs) {
if eb.revs != 1 {
plog.Panicf("unexpected multiple revisions in notification")
}
if w.send(WatchResponse{WatchID: w.id, Events: eb.evs, Revision: rev}) {
pendingEventsGauge.Add(float64(len(eb.evs)))
} else {
// move slow watcher to victims
w.minRev = rev + 1
if victim == nil {
victim = make(watcherBatch)
}
w.victim = true
victim[w] = eb
s.synced.delete(w)
slowWatcherGauge.Inc()
}
}
s.addVictim(victim)
}
func (s *watchableStore) addVictim(victim watcherBatch) {
if victim == nil {
return
}
s.victims = append(s.victims, victim)
select {
case s.victimc <- struct{}{}:
default:
}
}
func (s *watchableStore) rev() int64 { return s.store.Rev() }
func (s *watchableStore) progress(w *watcher) {
s.mu.RLock()
defer s.mu.RUnlock()
if _, ok := s.synced.watchers[w]; ok {
w.send(WatchResponse{WatchID: w.id, Revision: s.rev()})
// If the ch is full, this watcher is receiving events.
// We do not need to send progress at all.
}
}
type watcher struct {
// the watcher key
key []byte
// end indicates the end of the range to watch.
// If end is set, the watcher is on a range.
end []byte
// victim is set when ch is blocked and undergoing victim processing
victim bool
// compacted is set when the watcher is removed because of compaction
compacted bool
// minRev is the minimum revision update the watcher will accept
minRev int64
id WatchID
fcs []FilterFunc
// a chan to send out the watch response.
// The chan might be shared with other watchers.
ch chan<- WatchResponse
}
func (w *watcher) send(wr WatchResponse) bool {
progressEvent := len(wr.Events) == 0
if len(w.fcs) != 0 {
ne := make([]mvccpb.Event, 0, len(wr.Events))
for i := range wr.Events {
filtered := false
for _, filter := range w.fcs {
if filter(wr.Events[i]) {
filtered = true
break
}
}
if !filtered {
ne = append(ne, wr.Events[i])
}
}
wr.Events = ne
}
// if all events are filtered out, we should send nothing.
if !progressEvent && len(wr.Events) == 0 {
return true
}
select {
case w.ch <- wr:
return true
default:
return false
}
}

Some files were not shown because too many files have changed in this diff Show More