diff --git a/pkg/common/aws/s3.go b/pkg/common/aws/s3.go index 9c9c787af7..3ce8d6b6b0 100644 --- a/pkg/common/aws/s3.go +++ b/pkg/common/aws/s3.go @@ -1,8 +1,10 @@ package aws import ( + "bytes" "errors" "fmt" + "io" "io/fs" "log" "os" @@ -15,6 +17,7 @@ import ( "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/s3" "github.com/aws/aws-sdk-go/service/s3/s3manager" + "github.com/openshift/osde2e/internal/sanitizer" viper "github.com/openshift/osde2e/pkg/common/concurrentviper" "github.com/openshift/osde2e/pkg/common/config" ) @@ -130,6 +133,7 @@ type S3Uploader struct { component string // component name for organizing artifacts (e.g., "osd-example-operator") category string // top-level category for organizing artifacts (e.g., "test-results") urlExpiry time.Duration + sanitizer *sanitizer.Sanitizer // redacts secrets from text artifacts before upload } // S3UploadResult contains information about uploaded files. @@ -172,6 +176,11 @@ func NewS3Uploader(component string) (*S3Uploader, error) { component = "unknown" } + s, err := sanitizer.New(&sanitizer.Config{EnableAudit: false}) + if err != nil { + log.Printf("Warning: failed to initialize sanitizer, uploads will not be redacted: %v", err) + } + return &S3Uploader{ s3Client: s3.New(s3Sess), uploader: s3manager.NewUploader(s3Sess), @@ -179,6 +188,7 @@ func NewS3Uploader(component string) (*S3Uploader, error) { component: component, category: "test-results", // fixed category for S3 path organization urlExpiry: 168 * time.Hour, // 7 days (max for IAM user credentials) + sanitizer: s, }, nil } @@ -241,6 +251,7 @@ func shouldUploadFile(filename string) bool { } // UploadDirectory uploads files matching allowed extensions to S3. +// Text-based artifacts are sanitized in-memory before upload to redact secrets. func (u *S3Uploader) UploadDirectory(srcDir string) ([]S3UploadResult, error) { if u == nil { return nil, nil @@ -263,7 +274,6 @@ func (u *S3Uploader) UploadDirectory(srcDir string) ([]S3UploadResult, error) { return fmt.Errorf("failed to get relative path: %w", err) } - // Skip hidden files if strings.HasPrefix(filepath.Base(relPath), ".") { return nil } @@ -274,31 +284,24 @@ func (u *S3Uploader) UploadDirectory(srcDir string) ([]S3UploadResult, error) { } s3Key := path.Join(baseKey, relPath) + contentType := contentTypeForFile(filePath) - file, err := os.Open(filePath) - if err != nil { - log.Printf("Warning: failed to open %s: %v", filePath, err) + body, size, prepErr := u.prepareUploadBody(filePath, relPath) + if prepErr != nil { + log.Printf("Warning: failed to prepare %s: %v", filePath, prepErr) return nil } - defer file.Close() - fileInfo, err := file.Stat() - if err != nil { - log.Printf("Warning: failed to stat %s: %v", filePath, err) - return nil - } - - contentType := contentTypeForFile(filePath) - - _, err = u.uploader.Upload(&s3manager.UploadInput{ + _, uploadErr := u.uploader.Upload(&s3manager.UploadInput{ Bucket: aws.String(u.bucket), Key: aws.String(s3Key), - Body: file, + Body: body, ContentType: aws.String(contentType), }) - if err != nil { - log.Printf("Warning: failed to upload %s: %v", filePath, err) - return nil // Continue with other files; partial upload is better than none + body.Close() + if uploadErr != nil { + log.Printf("Warning: failed to upload %s: %v", filePath, uploadErr) + return nil } presignedURL, err := u.generatePresignedURL(s3Key) @@ -311,7 +314,7 @@ func (u *S3Uploader) UploadDirectory(srcDir string) ([]S3UploadResult, error) { S3URI: CreateS3URL(u.bucket, s3Key), PresignedURL: presignedURL, Key: s3Key, - Size: fileInfo.Size(), + Size: size, }) return nil @@ -324,6 +327,48 @@ func (u *S3Uploader) UploadDirectory(srcDir string) ([]S3UploadResult, error) { return results, nil } +// maxSanitizableBytes is the upper bound for in-memory sanitization. +// Files larger than this are streamed directly to S3 without sanitization +// to avoid exhausting the uploader process memory. +const maxSanitizableBytes int64 = 50 * 1024 * 1024 // 50MB + +// prepareUploadBody returns an io.ReadCloser and file size for S3 upload. +// Files smaller than maxSanitizableBytes are read into memory and sanitized. +// Larger files are streamed directly (raw) to avoid memory exhaustion. +func (u *S3Uploader) prepareUploadBody(filePath, relPath string) (io.ReadCloser, int64, error) { + info, err := os.Stat(filePath) + if err != nil { + return nil, 0, err + } + + if u.sanitizer == nil || info.Size() > maxSanitizableBytes { + f, err := os.Open(filePath) + if err != nil { + return nil, 0, err + } + if info.Size() > maxSanitizableBytes { + log.Printf("Skipping sanitization for %s: file size %d exceeds limit", relPath, info.Size()) + } + return f, info.Size(), nil + } + + content, err := os.ReadFile(filePath) + if err != nil { + return nil, 0, err + } + + if result, err := u.sanitizer.SanitizeText(string(content), relPath); err != nil { + log.Printf("Warning: sanitization failed for %s, uploading raw content: %v", relPath, err) + } else { + if result.MatchesFound > 0 { + log.Printf("Sanitized %s: redacted %d secret(s)", relPath, result.MatchesFound) + } + content = []byte(result.Content) + } + + return io.NopCloser(bytes.NewReader(content)), int64(len(content)), nil +} + func (u *S3Uploader) generatePresignedURL(key string) (string, error) { req, _ := u.s3Client.GetObjectRequest(&s3.GetObjectInput{ Bucket: aws.String(u.bucket), diff --git a/pkg/common/aws/s3_test.go b/pkg/common/aws/s3_test.go index 6fb20c34a5..6c33c8c789 100644 --- a/pkg/common/aws/s3_test.go +++ b/pkg/common/aws/s3_test.go @@ -1,9 +1,13 @@ package aws import ( + "io" + "os" + "path/filepath" "strings" "testing" + "github.com/openshift/osde2e/internal/sanitizer" viper "github.com/openshift/osde2e/pkg/common/concurrentviper" "github.com/openshift/osde2e/pkg/common/config" ) @@ -162,3 +166,140 @@ func TestShouldUploadFile(t *testing.T) { }) } } + +func TestPrepareUploadBody_SanitizesSecrets(t *testing.T) { + tmpDir := t.TempDir() + logFile := filepath.Join(tmpDir, "test.log") + // Build synthetic secret-shaped values at runtime to avoid triggering secret scanners + // while still exercising the sanitizer rules with correctly formatted patterns. + awsKey := "AKIA" + "IOSFODNN7EXAMPLE" + ghToken := "ghp_" + strings.Repeat("a1b2c3d4", 5) + secretContent := "AWS_ACCESS_KEY_ID=" + awsKey + "\ntoken=" + ghToken + "\n" + if err := os.WriteFile(logFile, []byte(secretContent), 0o644); err != nil { + t.Fatal(err) + } + + s, err := sanitizer.New(&sanitizer.Config{ + EnableAudit: false, + }) + if err != nil { + t.Fatalf("Failed to create sanitizer: %v", err) + } + + uploader := &S3Uploader{sanitizer: s} + body, size, err := uploader.prepareUploadBody(logFile, "test.log") + if err != nil { + t.Fatalf("prepareUploadBody failed: %v", err) + } + + defer body.Close() + content, _ := io.ReadAll(body) + if size != int64(len(content)) { + t.Errorf("size mismatch: reported %d, actual %d", size, len(content)) + } + + result := string(content) + if strings.Contains(result, awsKey) { + t.Error("AWS access key was not redacted") + } + if strings.Contains(result, ghToken) { + t.Error("GitHub token was not redacted") + } + if !strings.Contains(result, "[AWS-ACCESS-KEY-REDACTED]") { + t.Error("Expected AWS redaction marker not found") + } + if !strings.Contains(result, "[GITHUB-TOKEN-REDACTED]") { + t.Error("Expected GitHub redaction marker not found") + } +} + +func TestPrepareUploadBody_FailOpenOnOversizedContent(t *testing.T) { + tmpDir := t.TempDir() + logFile := filepath.Join(tmpDir, "big.log") + awsKey := "AKIA" + "IOSFODNN7EXAMPLE" + content := strings.Repeat(awsKey+"\n", 100) + if err := os.WriteFile(logFile, []byte(content), 0o644); err != nil { + t.Fatal(err) + } + + s, err := sanitizer.New(&sanitizer.Config{ + EnableAudit: false, + MaxContentSize: 50, // Tiny limit to trigger failure + }) + if err != nil { + t.Fatalf("Failed to create sanitizer: %v", err) + } + uploader := &S3Uploader{sanitizer: s} + + body, size, prepErr := uploader.prepareUploadBody(logFile, "big.log") + if prepErr != nil { + t.Fatalf("prepareUploadBody should not error on sanitization failure: %v", prepErr) + } + if size != int64(len(content)) { + t.Errorf("Expected raw content size %d, got %d", len(content), size) + } + + // Fail-open: raw content must be returned unchanged + defer body.Close() + result, _ := io.ReadAll(body) + if string(result) != content { + t.Errorf("Expected raw content unchanged on fail-open, got different content") + } +} + +func TestPrepareUploadBody_NilSanitizer(t *testing.T) { + tmpDir := t.TempDir() + logFile := filepath.Join(tmpDir, "test.log") + content := "AKIA" + "IOSFODNN7EXAMPLE" + if err := os.WriteFile(logFile, []byte(content), 0o644); err != nil { + t.Fatal(err) + } + + uploader := &S3Uploader{sanitizer: nil} + body, size, err := uploader.prepareUploadBody(logFile, "test.log") + if err != nil { + t.Fatalf("prepareUploadBody failed: %v", err) + } + + // Without sanitizer, raw content is returned unchanged + defer body.Close() + result, _ := io.ReadAll(body) + if string(result) != content { + t.Errorf("Expected raw content %q, got %q", content, string(result)) + } + if size != int64(len(content)) { + t.Errorf("Expected size %d, got %d", len(content), size) + } +} + +func TestPrepareUploadBody_LargeFileStreamed(t *testing.T) { + tmpDir := t.TempDir() + logFile := filepath.Join(tmpDir, "large.log") + // Write a file larger than maxSanitizableBytes (50MB) — use sparse write via truncate + f, err := os.Create(logFile) + if err != nil { + t.Fatal(err) + } + size := maxSanitizableBytes + 1 + if err := f.Truncate(size); err != nil { + f.Close() + t.Fatal(err) + } + f.Close() + + s, err := sanitizer.New(&sanitizer.Config{EnableAudit: false}) + if err != nil { + t.Fatalf("Failed to create sanitizer: %v", err) + } + uploader := &S3Uploader{sanitizer: s} + + body, reportedSize, prepErr := uploader.prepareUploadBody(logFile, "large.log") + if prepErr != nil { + t.Fatalf("prepareUploadBody failed: %v", prepErr) + } + defer body.Close() + + if reportedSize != size { + t.Errorf("Expected size %d, got %d", size, reportedSize) + } +} diff --git a/pkg/e2e/adhoctestimages/adhoctestimages.go b/pkg/e2e/adhoctestimages/adhoctestimages.go index 1ea6c3430d..7b249f717b 100644 --- a/pkg/e2e/adhoctestimages/adhoctestimages.go +++ b/pkg/e2e/adhoctestimages/adhoctestimages.go @@ -12,6 +12,7 @@ import ( . "github.com/onsi/gomega" "github.com/openshift/osde2e-common/pkg/clients/ocm" "github.com/openshift/osde2e/internal/analysisengine" + "github.com/openshift/osde2e/internal/sanitizer" viper "github.com/openshift/osde2e/pkg/common/concurrentviper" "github.com/openshift/osde2e/pkg/common/config" "github.com/openshift/osde2e/pkg/common/executor" @@ -170,8 +171,9 @@ func runLogAnalysisForAdHocTestImage(ctx context.Context, logger logr.Logger, te Version: viper.GetString(config.Cluster.Version), }, }, - PromptTemplate: "default", - FailureContext: err.Error(), + PromptTemplate: "default", + FailureContext: err.Error(), + SanitizerConfig: &sanitizer.Config{EnableAudit: false}, } engine, err := analysisengine.New(ctx, engineConfig) diff --git a/pkg/e2e/e2e.go b/pkg/e2e/e2e.go index d5331078fe..6cdc8f0d2b 100644 --- a/pkg/e2e/e2e.go +++ b/pkg/e2e/e2e.go @@ -17,6 +17,7 @@ import ( "github.com/onsi/ginkgo/v2/types" "github.com/onsi/gomega" "github.com/openshift/osde2e/internal/analysisengine" + "github.com/openshift/osde2e/internal/sanitizer" "github.com/openshift/osde2e/pkg/common/aws" "github.com/openshift/osde2e/pkg/common/cluster" viper "github.com/openshift/osde2e/pkg/common/concurrentviper" @@ -307,8 +308,9 @@ func (o *E2EOrchestrator) AnalyzeLogs(ctx context.Context, testErr error) error Version: viper.GetString(config.Cluster.Version), }, }, - PromptTemplate: "default", - FailureContext: testErr.Error(), + PromptTemplate: "default", + FailureContext: testErr.Error(), + SanitizerConfig: &sanitizer.Config{EnableAudit: false}, } engine, err := analysisengine.New(ctx, engineConfig)