Suppress test failures due to logs after tests complete (cadence-work…

…flow#6067) "Fixes" test failures like this, by sweeping them under the rug: ``` 2024/05/28 21:39:29 ----- Done ----- 2024/05/28 21:39:29 Schema setup complete PASS coverage: 27.3% of statements in github.com/uber/cadence/client/..., github.com/uber/cadence/common/..., github.com/uber/cadence/host/..., github.com/uber/cadence/service/..., github.com/uber/cadence/tools/... panic: Log in goroutine after TestIntegrationSuite has completed: 2024-05-28T21:39:39.501Z DEBUG Selected default store shard for tasklist {"store-shard": "NonShardedStore", "wf-task-list-name": "9985f719-4b6a-4f0a-97c7-41a9e00d2414", "logging-call-at": "sharding_policy.go:100"} goroutine 72245 [running]: testing.(*common).logDepth(0xc0016ee680, {0xc00695ae00, 0xd5}, 0x3) /usr/local/go/src/testing/testing.go:1028 +0x6d4 testing.(*common).log(...) /usr/local/go/src/testing/testing.go:1010 testing.(*common).Logf(0xc0016ee680, {0x4a884b4, 0x2}, {0xc0021324b0, 0x1, 0x1}) /usr/local/go/src/testing/testing.go:1061 +0xa5 go.uber.org/zap/zaptest.testingWriter.Write({{0x662be10?, 0xc0016ee680?}, 0xa0?}, {0xc003ecf400, 0xd6, 0x400}) /go/pkg/mod/go.uber.org/[email protected]/zaptest/logger.go:130 +0x11e go.uber.org/zap/zapcore.(*ioCore).Write(0xc0030ad920, {0xff, {0xc18db1a6dddeef12, 0xa3b5c069bd, 0x8268f40}, {0x0, 0x0}, {0x4af6670, 0x29}, {0x0, ...}, ...}, ...) ... ``` When the test completes, it will simply log to stderr rather than the test logger: ``` ❯ go test -count 1 -v ./... === RUN TestLoggerShouldNotFailIfLoggedLate --- PASS: TestLoggerShouldNotFailIfLoggedLate (0.00s) PASS 2024-05-29T16:20:50.742-0500 INFO COULD FAIL TEST "TestLoggerShouldNotFailIfLoggedLate", logged too late: too late, orig{"logging-call-at": "testlogger_test.go:41", "log_stack": "github.com/uber/cadence/common/log/testlogger.(*fallbackTestCore).Write\n\t/User... 2024-05-29T16:20:50.742-0500 INFO COULD FAIL TEST "TestLoggerShouldNotFailIfLoggedLate", logged too late: too late, with{"actor-id": "testing", "logging-call-at": "testlogger_test.go:42", "log_stack": "github.com/uber/cadence/common/log/testlogger.(*fallbackTestCore).Write\n\t/User... ok github.com/uber/cadence/common/log/testlogger 0.586s ``` Ignoring the correctness part of the problem, this gives us the best of both worlds: - most logs are grouped by the test that produced them - logs produced due to incomplete shutdowns are still produced (as long as `-v` or some other test fails), but will not directly fail the test(s) I am not overly fond of this, but some of our largest tests are quite flaky due to these logs, and that really isn't helping anything. Seems like a reasonable tradeoff, and we can move to an opt-in model eventually instead of applying it everywhere like this PR does.
xinghui · May 31, 2024 · 4edac98 · 4edac98
1 parent 795b1ec
commit 4edac98
Show file tree

Hide file tree

Showing 6 changed files with 234 additions and 12 deletions.
diff --git a/canary/workflow_test.go b/canary/workflow_test.go
@@ -31,7 +31,8 @@ import (
 	"go.uber.org/cadence/mocks"
 	"go.uber.org/cadence/testsuite"
 	"go.uber.org/cadence/worker"
-	"go.uber.org/zap/zaptest"
+
+	"github.com/uber/cadence/common/log/testlogger"
 )
 
 type (
@@ -52,7 +53,7 @@ func TestWorkflowTestSuite(t *testing.T) {
 }
 
 func (s *workflowTestSuite) SetupTest() {
-	s.SetLogger(zaptest.NewLogger(s.T()))
+	s.SetLogger(testlogger.NewZap(s.T()))
 	s.env = s.NewTestWorkflowEnvironment()
 	s.env.Test(s.T())
 }

diff --git a/common/log/testlogger/testlogger.go b/common/log/testlogger/testlogger.go
@@ -23,14 +23,122 @@
 package testlogger
 
 import (
+	"fmt"
+	"slices"
+	"strings"
+
+	"github.com/stretchr/testify/require"
+	"go.uber.org/atomic"
+	"go.uber.org/zap"
+	"go.uber.org/zap/zapcore"
 	"go.uber.org/zap/zaptest"
 
 	"github.com/uber/cadence/common/log"
 	"github.com/uber/cadence/common/log/loggerimpl"
 )
 
+type TestingT interface {
+	zaptest.TestingT
+	Cleanup(func()) // not currently part of zaptest.TestingT
+}
+
 // New is a helper to create new development logger in unit test
-func New(t zaptest.TestingT) log.Logger {
+func New(t TestingT) log.Logger {
 	// test logger that emits all logs (none dropped / sample func never returns false)
-	return loggerimpl.NewLogger(zaptest.NewLogger(t), loggerimpl.WithSampleFunc(func(int) bool { return true }))
+	return loggerimpl.NewLogger(NewZap(t), loggerimpl.WithSampleFunc(func(int) bool { return true }))
+}
+
+// NewZap makes a new test-oriented logger that prevents bad-lifecycle logs from failing tests.
+func NewZap(t TestingT) *zap.Logger {
+	/*
+		HORRIBLE HACK due to async shutdown, both in our code and in libraries (e.g. gocql):
+		normally, logs produced after a test finishes will *intentionally* fail the test and/or
+		cause data to race on the test's internal `t.done` field.
+
+		that's a good thing, it reveals possibly-dangerously-flawed lifecycle management.
+
+		unfortunately some of our code and some libraries do not have good lifecycle management,
+		and this cannot easily be patched from the outside.
+
+		so this logger cheats: after a test completes, it logs to stderr rather than TestingT.
+		EVERY ONE of these logs is bad and we should not produce them, but it's causing many
+		otherwise-useful tests to be flaky, and that's a larger interruption than is useful.
+	*/
+	logAfterComplete, err := zap.NewDevelopment()
+	require.NoError(t, err, "could not build a fallback zap logger")
+	replaced := &fallbackTestCore{
+		t:         t,
+		fallback:  logAfterComplete.Core(),
+		testing:   zaptest.NewLogger(t).Core(),
+		completed: &atomic.Bool{},
+	}
+
+	t.Cleanup(replaced.UseFallback) // switch to fallback before ending the test
+
+	return zap.New(replaced)
 }
+
+type fallbackTestCore struct {
+	t         TestingT
+	fallback  zapcore.Core
+	testing   zapcore.Core
+	completed *atomic.Bool
+}
+
+var _ zapcore.Core = (*fallbackTestCore)(nil)
+
+func (f *fallbackTestCore) UseFallback() {
+	f.completed.Store(true)
+}
+
+func (f *fallbackTestCore) Enabled(level zapcore.Level) bool {
+	if f.completed.Load() {
+		return f.fallback.Enabled(level)
+	}
+	return f.testing.Enabled(level)
+}
+
+func (f *fallbackTestCore) With(fields []zapcore.Field) zapcore.Core {
+	// need to copy and defer, else the returned core will be used at an
+	// arbitrarily later point in time, possibly after the test has completed.
+	return &fallbackTestCore{
+		t:         f.t,
+		fallback:  f.fallback.With(fields),
+		testing:   f.testing.With(fields),
+		completed: f.completed,
+	}
+}
+
+func (f *fallbackTestCore) Check(entry zapcore.Entry, checked *zapcore.CheckedEntry) *zapcore.CheckedEntry {
+	// see other Check impls, all look similar.
+	// this defers the "where to log" decision to Write, as `f` is the core that will write.
+	if f.fallback.Enabled(entry.Level) {
+		return checked.AddCore(entry, f)
+	}
+	return checked // do not add any cores
+}
+
+func (f *fallbackTestCore) Write(entry zapcore.Entry, fields []zapcore.Field) error {
+	if f.completed.Load() {
+		entry.Message = fmt.Sprintf("COULD FAIL TEST %q, logged too late: %v", f.t.Name(), entry.Message)
+
+		hasStack := slices.ContainsFunc(fields, func(field zapcore.Field) bool {
+			// no specific stack-trace type, so just look for probable fields.
+			return strings.Contains(strings.ToLower(field.Key), "stack")
+		})
+		if !hasStack {
+			fields = append(fields, zap.Stack("log_stack"))
+		}
+		return f.fallback.Write(entry, fields)
+	}
+	return f.testing.Write(entry, fields)
+}
+
+func (f *fallbackTestCore) Sync() error {
+	if f.completed.Load() {
+		return f.fallback.Sync()
+	}
+	return f.testing.Sync()
+}
+
+var _ zapcore.Core = (*fallbackTestCore)(nil)
diff --git a/common/log/testlogger/testlogger_test.go b/common/log/testlogger/testlogger_test.go
@@ -0,0 +1,116 @@
+// The MIT License (MIT)
+
+// Copyright (c) 2017-2020 Uber Technologies Inc.
+
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+package testlogger
+
+import (
+	"fmt"
+	"os"
+	"testing"
+	"time"
+
+	"github.com/uber/cadence/common/log"
+	"github.com/uber/cadence/common/log/tag"
+)
+
+var (
+	done   = make(chan struct{})
+	logged = make(chan struct{})
+)
+
+func TestMain(m *testing.M) {
+	code := m.Run()
+	// ensure synchronization between t.done and t.logf, else this test is extremely flaky.
+	// for details see: https://github.com/golang/go/issues/67701
+	close(done)
+	select {
+	case <-logged:
+		os.Exit(code)
+	case <-time.After(time.Second): // should be MUCH faster
+		_, _ = fmt.Fprintln(os.Stderr, "timed out waiting for test to log")
+		os.Exit(1)
+	}
+}
+
+// Unfortunately a moderate hack, to work around our faulty lifecycle management,
+// and some libraries with issues as well.
+// Ideally this test WOULD fail, but that's much harder to assert "safely".
+func TestLoggerShouldNotFailIfLoggedLate(t *testing.T) {
+	origLogger := New(t)
+	// if With does not defer core selection, this will fail the test
+	// by sending the logs to t.Logf
+	withLogger := origLogger.WithTags(tag.ActorID("testing")) // literally any tag
+	origLogger.Info("before is fine, orig")
+	withLogger.Info("before is fine, with")
+	go func() {
+		<-done
+		origLogger.Info("too late, orig")
+		withLogger.Info("too late, with")
+		close(logged)
+	}()
+}
+
+func TestSubtestShouldNotFail(t *testing.T) {
+	// when complete, a subtest's too-late logs just get pushed to the parent,
+	// and do not fail any tests.  they only fail when no running parent exists.
+	//
+	// if Go changes this behavior, this test could fail, otherwise AFAICT it
+	// should be stable.
+	assertDoesNotFail := func(name string, setup, log func(t *testing.T)) {
+		// need to wrap in something that will out-live the "real" test,
+		// to ensure there is a running parent test to push logs toward.
+		t.Run(name, func(t *testing.T) {
+			// same setup as TestMain but contained within this sub-test
+			var (
+				done   = make(chan struct{})
+				logged = make(chan struct{})
+			)
+			t.Run("inner", func(t *testing.T) {
+				setup(t)
+				go func() {
+					<-done
+					// despite being too late, the parent test is still running
+					// so this does not fail the test.
+					log(t)
+					close(logged)
+				}()
+				time.AfterFunc(10*time.Millisecond, func() {
+					close(done)
+				})
+			})
+			<-logged
+		})
+	}
+
+	assertDoesNotFail("real", func(t *testing.T) {
+		// no setup needed
+	}, func(t *testing.T) {
+		t.Logf("too late but allowed")
+	})
+
+	var l log.Logger
+	assertDoesNotFail("wrapped", func(t *testing.T) {
+		l = New(t)
+	}, func(t *testing.T) {
+		l.Info("too late but allowed")
+	})
+}
diff --git a/common/reconciliation/invariant/staleWorkflow_test.go b/common/reconciliation/invariant/staleWorkflow_test.go
@@ -31,9 +31,9 @@ import (
 	"github.com/golang/mock/gomock"
 	"github.com/google/uuid"
 	"github.com/stretchr/testify/assert"
-	"go.uber.org/zap/zaptest"
 
 	"github.com/uber/cadence/common"
+	"github.com/uber/cadence/common/log/testlogger"
 	"github.com/uber/cadence/common/persistence"
 	"github.com/uber/cadence/common/types"
 )
@@ -44,7 +44,7 @@ func setup(t *testing.T, retentionDays int) (pr *persistence.MockRetryer, impl *
 	impl = &staleWorkflowCheck{
 		pr:       m,
 		dc:       nil, // not used by these tests
-		log:      zaptest.NewLogger(t),
+		log:      testlogger.NewZap(t),
 		testable: &mocked{},
 	}
 	withDomain(retentionDays, impl)

diff --git a/service/worker/archiver/workflow_test.go b/service/worker/archiver/workflow_test.go
@@ -28,7 +28,6 @@ import (
 	"go.uber.org/cadence/testsuite"
 	"go.uber.org/cadence/worker"
 	"go.uber.org/cadence/workflow"
-	"go.uber.org/zap/zaptest"
 
 	"github.com/uber/cadence/common/dynamicconfig"
 	"github.com/uber/cadence/common/log"
@@ -136,7 +135,7 @@ func (s *workflowSuite) TestArchivalWorkflow_Success() {
 }
 
 func (s *workflowSuite) TestReplayArchiveHistoryWorkflow() {
-	logger := zaptest.NewLogger(s.T())
+	logger := testlogger.NewZap(s.T())
 	globalLogger = workflowTestLogger
 	globalMetricsClient = metrics.NewClient(tally.NewTestScope("replay", nil), metrics.Worker)
 	globalConfig = &Config{

diff --git a/service/worker/indexer/esProcessor_test.go b/service/worker/indexer/esProcessor_test.go
@@ -28,7 +28,6 @@ import (
 
 	"github.com/stretchr/testify/mock"
 	"github.com/stretchr/testify/suite"
-	"go.uber.org/zap/zaptest"
 
 	"github.com/uber/cadence/.gen/go/indexer"
 	"github.com/uber/cadence/common"
@@ -38,7 +37,7 @@ import (
 	"github.com/uber/cadence/common/elasticsearch/bulk"
 	mocks2 "github.com/uber/cadence/common/elasticsearch/bulk/mocks"
 	esMocks "github.com/uber/cadence/common/elasticsearch/mocks"
-	"github.com/uber/cadence/common/log/loggerimpl"
+	"github.com/uber/cadence/common/log/testlogger"
 	msgMocks "github.com/uber/cadence/common/messaging/mocks"
 	"github.com/uber/cadence/common/metrics"
 	"github.com/uber/cadence/common/metrics/mocks"
@@ -78,12 +77,11 @@ func (s *esProcessorSuite) SetupTest() {
 		ESProcessorFlushInterval: dynamicconfig.GetDurationPropertyFn(1 * time.Minute),
 	}
 	s.mockBulkProcessor = &mocks2.GenericBulkProcessor{}
-	zapLogger := zaptest.NewLogger(s.T())
 	s.mockScope = &mocks.Scope{}
 
 	p := &ESProcessorImpl{
 		config:     config,
-		logger:     loggerimpl.NewLogger(zapLogger),
+		logger:     testlogger.New(s.T()),
 		scope:      s.mockScope,
 		msgEncoder: defaultEncoder,
 	}