aboutsummaryrefslogtreecommitdiffstats
path: root/cmd/swarm
diff options
context:
space:
mode:
authorElad <theman@elad.im>2019-03-06 00:43:05 +0800
committerAnton Evangelatov <anton.evangelatov@gmail.com>2019-03-06 00:43:05 +0800
commit34c85def3eec09ee50ea2499824aeaa48705eae2 (patch)
tree4f00f54edc49c8b286fa467c3826510568b4c1a6 /cmd/swarm
parent81ed70015704737b3e5329314a62a3a5aaa74e8a (diff)
downloadgo-tangerine-34c85def3eec09ee50ea2499824aeaa48705eae2.tar
go-tangerine-34c85def3eec09ee50ea2499824aeaa48705eae2.tar.gz
go-tangerine-34c85def3eec09ee50ea2499824aeaa48705eae2.tar.bz2
go-tangerine-34c85def3eec09ee50ea2499824aeaa48705eae2.tar.lz
go-tangerine-34c85def3eec09ee50ea2499824aeaa48705eae2.tar.xz
go-tangerine-34c85def3eec09ee50ea2499824aeaa48705eae2.tar.zst
go-tangerine-34c85def3eec09ee50ea2499824aeaa48705eae2.zip
cmd/swarm/swarm-smoke: sliding window test should not time out (#19152)
Diffstat (limited to 'cmd/swarm')
-rw-r--r--cmd/swarm/flags.go3
-rw-r--r--cmd/swarm/swarm-smoke/sliding_window.go72
2 files changed, 45 insertions, 30 deletions
diff --git a/cmd/swarm/flags.go b/cmd/swarm/flags.go
index 39a273d87..5e1ada632 100644
--- a/cmd/swarm/flags.go
+++ b/cmd/swarm/flags.go
@@ -149,8 +149,9 @@ var (
}
SwarmStoreCacheCapacity = cli.UintFlag{
Name: "store.cache.size",
- Usage: "Number of recent chunks cached in memory (default 5000)",
+ Usage: "Number of recent chunks cached in memory",
EnvVar: SwarmEnvStoreCacheCapacity,
+ Value: 10000,
}
SwarmCompressedFlag = cli.BoolFlag{
Name: "compressed",
diff --git a/cmd/swarm/swarm-smoke/sliding_window.go b/cmd/swarm/swarm-smoke/sliding_window.go
index 750d96939..d589124bd 100644
--- a/cmd/swarm/swarm-smoke/sliding_window.go
+++ b/cmd/swarm/swarm-smoke/sliding_window.go
@@ -42,23 +42,16 @@ func slidingWindowCmd(ctx *cli.Context, tuid string) error {
errc <- slidingWindow(ctx, tuid)
}()
- select {
- case err := <-errc:
- if err != nil {
- metrics.GetOrRegisterCounter(fmt.Sprintf("%s.fail", commandName), nil).Inc(1)
- }
- return err
- case <-time.After(time.Duration(timeout) * time.Second):
- metrics.GetOrRegisterCounter(fmt.Sprintf("%s.timeout", commandName), nil).Inc(1)
-
- return fmt.Errorf("timeout after %v sec", timeout)
+ err := <-errc
+ if err != nil {
+ metrics.GetOrRegisterCounter(fmt.Sprintf("%s.fail", commandName), nil).Inc(1)
}
+ return err
}
func slidingWindow(ctx *cli.Context, tuid string) error {
var hashes []uploadResult //swarm hashes of the uploads
nodes := len(hosts)
- const iterationTimeout = 30 * time.Second
log.Info("sliding window test started", "tuid", tuid, "nodes", nodes, "filesize(kb)", filesize, "timeout", timeout)
uploadedBytes := 0
networkDepth := 0
@@ -66,6 +59,7 @@ func slidingWindow(ctx *cli.Context, tuid string) error {
outer:
for {
+ seed = int(time.Now().UTC().UnixNano())
log.Info("uploading to "+httpEndpoint(hosts[0])+" and syncing", "seed", seed)
t1 := time.Now()
@@ -79,6 +73,7 @@ outer:
}
metrics.GetOrRegisterResettingTimer("sliding-window.upload-time", nil).UpdateSince(t1)
+ metrics.GetOrRegisterGauge("sliding-window.upload-depth", nil).Update(int64(len(hashes)))
fhash, err := digest(bytes.NewReader(randomBytes))
if err != nil {
@@ -90,37 +85,56 @@ outer:
hashes = append(hashes, uploadResult{hash: hash, digest: fhash})
time.Sleep(time.Duration(syncDelay) * time.Second)
uploadedBytes += filesize * 1000
-
+ q := make(chan struct{}, 1)
+ d := make(chan struct{})
+ defer close(q)
+ defer close(d)
for i, v := range hashes {
- timeout := time.After(time.Duration(timeout) * time.Second)
+ timeoutC := time.After(time.Duration(timeout) * time.Second)
errored = false
- inner:
+ task:
for {
select {
- case <-timeout:
+ case q <- struct{}{}:
+ go func() {
+ var start time.Time
+ done := false
+ for !done {
+ log.Info("trying to retrieve hash", "hash", v.hash)
+ idx := 1 + rand.Intn(len(hosts)-1)
+ ruid := uuid.New()[:8]
+ start = time.Now()
+ // fetch hangs when swarm dies out, so we have to jump through a bit more hoops to actually
+ // catch the timeout, but also allow this retry logic
+ err := fetch(v.hash, httpEndpoint(hosts[idx]), v.digest, ruid, "")
+ if err != nil {
+ log.Error("error fetching hash", "err", err)
+ continue
+ }
+ done = true
+ }
+ metrics.GetOrRegisterResettingTimer("sliding-window.single.fetch-time", nil).UpdateSince(start)
+ d <- struct{}{}
+ }()
+ case <-d:
+ <-q
+ break task
+ case <-timeoutC:
errored = true
- log.Error("error retrieving hash. timeout", "hash idx", i, "err", err)
+ log.Error("error retrieving hash. timeout", "hash idx", i)
metrics.GetOrRegisterCounter("sliding-window.single.error", nil).Inc(1)
- break inner
+ break outer
default:
- idx := 1 + rand.Intn(len(hosts)-1)
- ruid := uuid.New()[:8]
- start := time.Now()
- err := fetch(v.hash, httpEndpoint(hosts[idx]), v.digest, ruid, "")
- if err != nil {
- continue inner
- }
- metrics.GetOrRegisterResettingTimer("sliding-window.single.fetch-time", nil).UpdateSince(start)
- break inner
}
}
- if errored {
- break outer
- }
networkDepth = i
metrics.GetOrRegisterGauge("sliding-window.network-depth", nil).Update(int64(networkDepth))
+ log.Info("sliding window test successfully fetched file", "currentDepth", networkDepth)
+ // this test might take a long time to finish - but we'd like to see metrics while they accumulate and not just when
+ // the test finishes. therefore emit the metrics on each iteration
+ emitMetrics(ctx)
}
}