From 2515341f4cb3de589e3bc5ce1fc4e927d408cf3c Mon Sep 17 00:00:00 2001 From: = Date: Mon, 8 Jun 2026 11:56:40 +0200 Subject: [PATCH 1/6] Add neccessary config --- chart/templates/configmap.yaml | 7 +++++++ chart/values.yaml | 10 ++++++++++ src/configs/config.yaml | 7 +++++++ src/internal/analysis/patterns.go | 30 ++++++++++++++++++++++++++++++ src/internal/config/config.go | 30 ++++++++++++++++++++++-------- 5 files changed, 76 insertions(+), 8 deletions(-) create mode 100644 src/internal/analysis/patterns.go diff --git a/chart/templates/configmap.yaml b/chart/templates/configmap.yaml index 184f04b..1d83491 100644 --- a/chart/templates/configmap.yaml +++ b/chart/templates/configmap.yaml @@ -38,4 +38,11 @@ data: logging: level: "info" format: "json" + + static_analysis: + enabled: {{ .Values.staticAnalysis.enabled }} + check_network: {{ .Values.staticAnalysis.checkNetwork }} + check_exec: {{ .Values.staticAnalysis.checkExec }} + check_reflection: {{ .Values.staticAnalysis.checkReflection }} + check_filesystem: {{ .Values.staticAnalysis.checkFilesystem }} {{- end }} diff --git a/chart/values.yaml b/chart/values.yaml index 011e0c7..7846fe5 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -33,6 +33,16 @@ tls: # Kubernetes secret name containing tls.crt and tls.key secretName: "" +# ============================================================ +# Static Code Analysis (Java JARs) +# ============================================================ +staticAnalysis: + enabled: false + checkNetwork: false + checkExec: false + checkReflection: false + checkFilesystem: false + # ============================================================ # Gateway API (HTTPRoutes only) # ============================================================ diff --git a/src/configs/config.yaml b/src/configs/config.yaml index 223e24e..b841e16 100644 --- a/src/configs/config.yaml +++ b/src/configs/config.yaml @@ -25,3 +25,10 @@ metrics: logging: level: "info" format: "json" + +static_analysis: + enabled: false + check_network: true + check_exec: true + check_reflection: false + check_filesystem: false diff --git a/src/internal/analysis/patterns.go b/src/internal/analysis/patterns.go new file mode 100644 index 0000000..17c6096 --- /dev/null +++ b/src/internal/analysis/patterns.go @@ -0,0 +1,30 @@ +var networkPatterns = []string{ + "java/net/Socket", + "java/net/ServerSocket", + "java/net/DatagramSocket", + "java/net/URL", + "java/net/HttpURLConnection", + "java/net/InetAddress", + "java/net/URLConnection", +} + +var execPatterns = []string{ + "java/lang/Runtime", + "java/lang/ProcessBuilder", + "java/lang/Process", +} + +var reflectionPatterns = []string{ + "java/lang/reflect/Method", + "java/lang/reflect/Field", + "java/lang/ClassLoader", + "sun/misc/Unsafe", +} + +var filesystemPatterns = []string{ + "java/io/FileOutputStream", + "java/io/FileWriter", + "java/io/RandomAccessFile", + "java/nio/file/Files", + "java/nio/channels/FileChannel", +} \ No newline at end of file diff --git a/src/internal/config/config.go b/src/internal/config/config.go index 5079439..6a777bc 100644 --- a/src/internal/config/config.go +++ b/src/internal/config/config.go @@ -9,13 +9,14 @@ import ( ) type Config struct { - Server ServerConfig `mapstructure:"server"` - Storage StorageConfig `mapstructure:"storage"` - Cache CacheConfig `mapstructure:"cache"` - Auth AuthConfig `mapstructure:"auth"` - Metrics MetricsConfig `mapstructure:"metrics"` - Logging LoggingConfig `mapstructure:"logging"` - Sentry SentryConfig `mapstructure:"sentry"` + Server ServerConfig `mapstructure:"server"` + Storage StorageConfig `mapstructure:"storage"` + Cache CacheConfig `mapstructure:"cache"` + Auth AuthConfig `mapstructure:"auth"` + Metrics MetricsConfig `mapstructure:"metrics"` + Logging LoggingConfig `mapstructure:"logging"` + Sentry SentryConfig `mapstructure:"sentry"` + StaticAnalysis StaticAnalysisConfig `mapstructure:"static_analysis"` } type ServerConfig struct { @@ -67,6 +68,14 @@ type SentryConfig struct { Enabled bool `mapstructure:"enabled"` } +type StaticAnalysisConfig struct { + Enabled bool `mapstructure:"enabled"` + CheckNetwork bool `mapstructure:"check_network"` + CheckExec bool `mapstructure:"check_exec"` + CheckReflection bool `mapstructure:"check_reflection"` + CheckFilesystem bool `mapstructure:"check_filesystem"` +} + func Load(configPath string) (*Config, error) { v := viper.New() @@ -94,6 +103,12 @@ func Load(configPath string) (*Config, error) { v.SetDefault("logging.level", "info") v.SetDefault("logging.format", "json") + v.SetDefault("static_analysis.enabled", false) + v.SetDefault("static_analysis.check_network", false) + v.SetDefault("static_analysis.check_exec", false) + v.SetDefault("static_analysis.check_reflection", false) + v.SetDefault("static_analysis.check_filesystem", false) + // Read from config file if provided if configPath != "" { v.SetConfigFile(configPath) @@ -148,4 +163,3 @@ func (c *Config) Validate() error { func (c *Config) MaxEntrySizeBytes() int64 { return c.Cache.MaxEntrySizeMB * 1024 * 1024 } - From 4a9fc6f6ef0b79f55263650304d2719d4203df62 Mon Sep 17 00:00:00 2001 From: = Date: Mon, 8 Jun 2026 12:21:29 +0200 Subject: [PATCH 2/6] Add first version static code analysis --- src/go.mod | 2 +- src/internal/analysis/analyzer.go | 117 +++++++++++++++++++++++++++++ src/internal/analysis/classfile.go | 77 +++++++++++++++++++ src/internal/analysis/patterns.go | 20 ++++- src/internal/handler/cache_put.go | 56 ++++++++------ src/internal/handler/handler.go | 9 ++- src/internal/server/server.go | 18 ++++- 7 files changed, 270 insertions(+), 29 deletions(-) create mode 100644 src/internal/analysis/analyzer.go create mode 100644 src/internal/analysis/classfile.go diff --git a/src/go.mod b/src/go.mod index 312b502..44ef77c 100644 --- a/src/go.mod +++ b/src/go.mod @@ -24,7 +24,7 @@ require ( github.com/bytedance/sonic/loader v0.5.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cloudwego/base64x v0.1.6 // indirect - github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect + github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirectrelo github.com/fsnotify/fsnotify v1.9.0 // indirect github.com/gabriel-vasile/mimetype v1.4.13 // indirect github.com/gin-contrib/sse v1.1.0 // indirect diff --git a/src/internal/analysis/analyzer.go b/src/internal/analysis/analyzer.go new file mode 100644 index 0000000..76c9ea3 --- /dev/null +++ b/src/internal/analysis/analyzer.go @@ -0,0 +1,117 @@ +package analysis + +import ( + "archive/zip" + "bytes" + "fmt" + "io" + "strings" + + "github.com/kevingruber/gradle-cache/internal/config" +) + +// Violation describes a single forbidden API reference found in a class file. +type Violation struct { + Class string `json:"class"` // e.g. "com/student/Homework" + Matched string `json:"matched"` // e.g. "java/net/Socket" + Category string `json:"category"` // e.g. "network" +} + +// Analyzer inspects JAR bytecode for forbidden API usage. +type Analyzer struct { + rules []rule +} + +type rule struct { + category string + patterns []string +} + +// New creates an Analyzer whose active rules are determined by cfg. +func New(cfg config.StaticAnalysisConfig) *Analyzer { + a := &Analyzer{} + if cfg.CheckNetwork { + a.rules = append(a.rules, rule{"network", networkPatterns}) + } + if cfg.CheckExec { + a.rules = append(a.rules, rule{"exec", execPatterns}) + } + if cfg.CheckReflection { + a.rules = append(a.rules, rule{"reflection", reflectionPatterns}) + } + if cfg.CheckFilesystem { + a.rules = append(a.rules, rule{"filesystem", filesystemPatterns}) + } + return a +} + +// Check inspects data as a JAR file and returns any forbidden API violations. +// Returns nil, nil when data is not a JAR (so non-JAR artifacts pass silently). +func (a *Analyzer) Check(data []byte) ([]Violation, error) { + if !isJAR(data) { + return nil, nil + } + + zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) + if err != nil { + return nil, fmt.Errorf("failed to open JAR: %w", err) + } + + seen := make(map[string]struct{}) + var violations []Violation + + for _, f := range zr.File { + if !strings.HasSuffix(f.Name, ".class") { + continue + } + + rc, err := f.Open() + if err != nil { + // Skip unreadable entries rather than failing the whole check. + continue + } + classData, err := io.ReadAll(rc) + rc.Close() + if err != nil { + continue + } + + poolStrings, err := extractConstantPoolStrings(classData) + if err != nil { + // Malformed class file — skip it. + continue + } + + className := strings.TrimSuffix(f.Name, ".class") + + for _, s := range poolStrings { + for _, r := range a.rules { + for _, pattern := range r.patterns { + if !strings.HasPrefix(s, pattern) { + continue + } + key := className + "|" + s + "|" + r.category + if _, dup := seen[key]; dup { + continue + } + seen[key] = struct{}{} + violations = append(violations, Violation{ + Class: className, + Matched: s, + Category: r.category, + }) + } + } + } + } + + return violations, nil +} + +// isJAR returns true when data begins with the ZIP local file header signature. +// JARs are ZIP files, so this signature (PK\x03\x04) is the correct check. +func isJAR(data []byte) bool { + return len(data) >= 4 && + data[0] == 0x50 && data[1] == 0x4B && + data[2] == 0x03 && data[3] == 0x04 +} diff --git a/src/internal/analysis/classfile.go b/src/internal/analysis/classfile.go new file mode 100644 index 0000000..f106f3b --- /dev/null +++ b/src/internal/analysis/classfile.go @@ -0,0 +1,77 @@ +package analysis + +import ( + "encoding/binary" + "fmt" +) + +// extractConstantPoolStrings parses a Java class file and returns every UTF-8 +// string from its constant pool. Class names, method names, field names, and +// type descriptors all appear there verbatim, so scanning these strings is +// sufficient to detect forbidden API references without executing any code. +func extractConstantPoolStrings(data []byte) ([]string, error) { + if len(data) < 10 { + return nil, fmt.Errorf("class file too short (%d bytes)", len(data)) + } + + // Validate magic bytes: 0xCAFEBABE + if data[0] != 0xCA || data[1] != 0xFE || data[2] != 0xBA || data[3] != 0xBE { + return nil, fmt.Errorf("missing class file magic bytes") + } + + // Skip magic (4) + minor_version (2) + major_version (2) + pos := 8 + + if pos+2 > len(data) { + return nil, fmt.Errorf("truncated before constant_pool_count") + } + cpCount := int(binary.BigEndian.Uint16(data[pos : pos+2])) + pos += 2 + + var result []string + + // Pool indices run from 1 to cpCount-1. + for i := 1; i < cpCount; i++ { + if pos >= len(data) { + return nil, fmt.Errorf("unexpected end of constant pool at entry %d", i) + } + + tag := data[pos] + pos++ + + switch tag { + case 1: // CONSTANT_Utf8 — variable length string + if pos+2 > len(data) { + return nil, fmt.Errorf("truncated Utf8 length at entry %d", i) + } + length := int(binary.BigEndian.Uint16(data[pos : pos+2])) + pos += 2 + if pos+length > len(data) { + return nil, fmt.Errorf("truncated Utf8 data at entry %d", i) + } + result = append(result, string(data[pos:pos+length])) + pos += length + + case 3, 4: // CONSTANT_Integer, CONSTANT_Float + pos += 4 + + case 5, 6: // CONSTANT_Long, CONSTANT_Double — consume two pool slots + pos += 8 + i++ + + case 7, 8, 16, 19, 20: // Class, String, MethodType, Module, Package + pos += 2 + + case 9, 10, 11, 12, 17, 18: // Fieldref, Methodref, InterfaceMethodref, NameAndType, Dynamic, InvokeDynamic + pos += 4 + + case 15: // CONSTANT_MethodHandle + pos += 3 + + default: + return nil, fmt.Errorf("unknown constant pool tag %d at entry %d", tag, i) + } + } + + return result, nil +} diff --git a/src/internal/analysis/patterns.go b/src/internal/analysis/patterns.go index 17c6096..194370e 100644 --- a/src/internal/analysis/patterns.go +++ b/src/internal/analysis/patterns.go @@ -1,11 +1,24 @@ +package analysis + +// Forbidden API prefix lists, grouped by category. +// HasPrefix matching is used so e.g. "java/net/Socket" also covers +// SocketInputStream, SocketOutputStream, etc. A trailing slash like "sun/net/" +// catches the entire package subtree. + var networkPatterns = []string{ "java/net/Socket", "java/net/ServerSocket", "java/net/DatagramSocket", + "java/net/MulticastSocket", "java/net/URL", "java/net/HttpURLConnection", "java/net/InetAddress", + "java/net/InetSocketAddress", "java/net/URLConnection", + "java/nio/channels/SocketChannel", + "java/nio/channels/ServerSocketChannel", + "java/nio/channels/DatagramChannel", + "sun/net/", } var execPatterns = []string{ @@ -17,14 +30,19 @@ var execPatterns = []string{ var reflectionPatterns = []string{ "java/lang/reflect/Method", "java/lang/reflect/Field", + "java/lang/reflect/Constructor", "java/lang/ClassLoader", + "java/lang/invoke/MethodHandle", "sun/misc/Unsafe", } var filesystemPatterns = []string{ "java/io/FileOutputStream", + "java/io/FileInputStream", "java/io/FileWriter", + "java/io/FileReader", "java/io/RandomAccessFile", "java/nio/file/Files", + "java/nio/file/Path", "java/nio/channels/FileChannel", -} \ No newline at end of file +} diff --git a/src/internal/handler/cache_put.go b/src/internal/handler/cache_put.go index f2fa8fc..726726c 100644 --- a/src/internal/handler/cache_put.go +++ b/src/internal/handler/cache_put.go @@ -1,13 +1,17 @@ package handler import ( - "github.com/gin-gonic/gin" + "bytes" "io" "net/http" + + "github.com/gin-gonic/gin" ) // Put handles PUT requests to store cache entries. // Gradle expects: 2xx on success, 413 if too large. +// If static analysis is enabled and the artifact is a JAR, forbidden API usage +// causes a 403 and the artifact is not stored. func (h *CacheHandler) Put(c *gin.Context) { key := c.Param("key") if key == "" { @@ -15,7 +19,7 @@ func (h *CacheHandler) Put(c *gin.Context) { return } - // Check Content-Length header for size validation + // Reject early if Content-Length already exceeds the limit. contentLength := c.Request.ContentLength if contentLength > h.maxEntrySize { h.logger.Warn(). @@ -27,32 +31,40 @@ func (h *CacheHandler) Put(c *gin.Context) { return } - // Handle Expect: 100-continue - // Gin/Go handles this automatically, but we validate size first + // Always buffer the full body so analysis can inspect it before storage. + // The +1 lets us detect an over-limit chunked body after reading. + data, err := io.ReadAll(io.LimitReader(c.Request.Body, h.maxEntrySize+1)) + if err != nil { + h.logger.Error().Err(err).Str("key", key).Msg("failed to read request body") + c.Status(http.StatusInternalServerError) + return + } - // For chunked transfers or unknown size, we need to handle differently - if contentLength < 0 { - // Read with size limit - limitedReader := io.LimitReader(c.Request.Body, h.maxEntrySize+1) - data, err := io.ReadAll(limitedReader) - if err != nil { - h.logger.Error().Err(err).Str("key", key).Msg("failed to read request body") - c.Status(http.StatusInternalServerError) - return - } + if int64(len(data)) > h.maxEntrySize { + c.Status(http.StatusRequestEntityTooLarge) + return + } - if int64(len(data)) > h.maxEntrySize { - c.Status(http.StatusRequestEntityTooLarge) + // Run static analysis when enabled. Analysis errors are non-fatal: a broken + // parser should not block a legitimate upload, so we log and continue. + if h.analyzer != nil { + violations, err := h.analyzer.Check(data) + if err != nil { + h.logger.Warn().Err(err).Str("key", key).Msg("static analysis failed, skipping") + } else if len(violations) > 0 { + h.logger.Warn(). + Str("key", key). + Int("violations", len(violations)). + Msg("rejected artifact: forbidden API usage detected") + c.JSON(http.StatusForbidden, gin.H{ + "error": "artifact contains forbidden API usage", + "violations": violations, + }) return } - - contentLength = int64(len(data)) - c.Request.Body = io.NopCloser(io.NewSectionReader( - &bytesReaderAt{data: data}, 0, contentLength, - )) } - err := h.storage.Put(c.Request.Context(), key, c.Request.Body, contentLength) + err = h.storage.Put(c.Request.Context(), key, bytes.NewReader(data), int64(len(data))) if err != nil { h.logger.Error().Err(err).Str("key", key).Msg("failed to store cache entry") c.Status(http.StatusInternalServerError) diff --git a/src/internal/handler/handler.go b/src/internal/handler/handler.go index 0b7b42e..08514a9 100644 --- a/src/internal/handler/handler.go +++ b/src/internal/handler/handler.go @@ -1,9 +1,11 @@ package handler import ( + "io" + + "github.com/kevingruber/gradle-cache/internal/analysis" "github.com/kevingruber/gradle-cache/internal/storage" "github.com/rs/zerolog" - "io" ) // CacheHandler handles Gradle build cache HTTP requests. @@ -12,10 +14,12 @@ type CacheHandler struct { maxEntrySize int64 logger zerolog.Logger metrics *Metrics + analyzer *analysis.Analyzer // nil when static analysis is disabled } // NewCacheHandler creates a new cache handler. -func NewCacheHandler(store storage.Storage, maxEntrySize int64, logger zerolog.Logger) (*CacheHandler, error) { +// Pass a nil analyzer to disable static analysis. +func NewCacheHandler(store storage.Storage, maxEntrySize int64, logger zerolog.Logger, analyzer *analysis.Analyzer) (*CacheHandler, error) { metrics, err := NewMetrics() if err != nil { return nil, err @@ -26,6 +30,7 @@ func NewCacheHandler(store storage.Storage, maxEntrySize int64, logger zerolog.L maxEntrySize: maxEntrySize, logger: logger, metrics: metrics, + analyzer: analyzer, }, nil } diff --git a/src/internal/server/server.go b/src/internal/server/server.go index 7edd1b3..aa67bf0 100644 --- a/src/internal/server/server.go +++ b/src/internal/server/server.go @@ -7,6 +7,7 @@ import ( "time" "github.com/gin-gonic/gin" + "github.com/kevingruber/gradle-cache/internal/analysis" "github.com/kevingruber/gradle-cache/internal/config" "github.com/kevingruber/gradle-cache/internal/handler" "github.com/kevingruber/gradle-cache/internal/middleware" @@ -75,15 +76,26 @@ func (s *Server) setupRoutes() { s.router.GET("/metrics", gin.WrapH(promhttp.Handler())) } - // Cache endpoints + // Build an analyzer when static analysis is enabled; nil disables it. + var analyzer *analysis.Analyzer + if s.cfg.StaticAnalysis.Enabled { + analyzer = analysis.New(s.cfg.StaticAnalysis) + s.logger.Info(). + Bool("check_network", s.cfg.StaticAnalysis.CheckNetwork). + Bool("check_exec", s.cfg.StaticAnalysis.CheckExec). + Bool("check_reflection", s.cfg.StaticAnalysis.CheckReflection). + Bool("check_filesystem", s.cfg.StaticAnalysis.CheckFilesystem). + Msg("static analysis enabled") + } + cacheHandler, err := handler.NewCacheHandler( s.storage, s.cfg.MaxEntrySizeBytes(), s.logger, + analyzer, ) - if err != nil { - s.logger.Fatal().Err(err).Msg("Failed to initialize cache") + s.logger.Fatal().Err(err).Msg("Failed to initialize cache handler") } // Gradle cache endpoints From 93302cf0cb3502d12ebf82f6a25df2f1905f1db6 Mon Sep 17 00:00:00 2001 From: = Date: Mon, 8 Jun 2026 12:23:51 +0200 Subject: [PATCH 3/6] Bump chart version --- chart/Chart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chart/Chart.yaml b/chart/Chart.yaml index 7c98c0f..3049086 100644 --- a/chart/Chart.yaml +++ b/chart/Chart.yaml @@ -5,7 +5,7 @@ description: A Gradle Build Cache server with Redis backend for EduIDE deploymen type: application # Bump this version on every release — also used as the Docker image tag -version: 0.5.2 +version: 0.5.3 dependencies: - name: reposilite From 945994a603f1bc80986a8063c19f3d3c08ef3a30 Mon Sep 17 00:00:00 2001 From: = Date: Mon, 8 Jun 2026 14:01:27 +0200 Subject: [PATCH 4/6] fix typo, and adjust config --- src/configs/config.yaml | 4 ++-- src/go.mod | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/configs/config.yaml b/src/configs/config.yaml index b841e16..184fd53 100644 --- a/src/configs/config.yaml +++ b/src/configs/config.yaml @@ -28,7 +28,7 @@ logging: static_analysis: enabled: false - check_network: true - check_exec: true + check_network: false + check_exec: false check_reflection: false check_filesystem: false diff --git a/src/go.mod b/src/go.mod index 44ef77c..312b502 100644 --- a/src/go.mod +++ b/src/go.mod @@ -24,7 +24,7 @@ require ( github.com/bytedance/sonic/loader v0.5.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cloudwego/base64x v0.1.6 // indirect - github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirectrelo + github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect github.com/fsnotify/fsnotify v1.9.0 // indirect github.com/gabriel-vasile/mimetype v1.4.13 // indirect github.com/gin-contrib/sse v1.1.0 // indirect From 64c84bad50ba2559c707c0d0161760ad2cbb9bd3 Mon Sep 17 00:00:00 2001 From: = Date: Mon, 8 Jun 2026 15:01:13 +0200 Subject: [PATCH 5/6] add temp debug log --- src/internal/handler/cache_put.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/internal/handler/cache_put.go b/src/internal/handler/cache_put.go index 726726c..87edd6e 100644 --- a/src/internal/handler/cache_put.go +++ b/src/internal/handler/cache_put.go @@ -2,6 +2,7 @@ package handler import ( "bytes" + "fmt" "io" "net/http" @@ -40,6 +41,12 @@ func (h *CacheHandler) Put(c *gin.Context) { return } + h.logger.Debug(). + Str("key", key). + Int("size", len(data)). + Str("magic", fmt.Sprintf("%x", data[:min(4, len(data))])). + Msg("artifact received") + if int64(len(data)) > h.maxEntrySize { c.Status(http.StatusRequestEntityTooLarge) return From 8ebb9751ecc5b1d7ff8b722e5a76cce43063abb8 Mon Sep 17 00:00:00 2001 From: = Date: Mon, 8 Jun 2026 15:13:06 +0200 Subject: [PATCH 6/6] Move to handling Gzip --- src/internal/analysis/analyzer.go | 138 +++++++++++++++++++++++------- 1 file changed, 105 insertions(+), 33 deletions(-) diff --git a/src/internal/analysis/analyzer.go b/src/internal/analysis/analyzer.go index 76c9ea3..230bdda 100644 --- a/src/internal/analysis/analyzer.go +++ b/src/internal/analysis/analyzer.go @@ -1,8 +1,10 @@ package analysis import ( + "archive/tar" "archive/zip" "bytes" + "compress/gzip" "fmt" "io" "strings" @@ -17,7 +19,7 @@ type Violation struct { Category string `json:"category"` // e.g. "network" } -// Analyzer inspects JAR bytecode for forbidden API usage. +// Analyzer inspects Java bytecode for forbidden API usage. type Analyzer struct { rules []rule } @@ -45,21 +47,79 @@ func New(cfg config.StaticAnalysisConfig) *Analyzer { return a } -// Check inspects data as a JAR file and returns any forbidden API violations. -// Returns nil, nil when data is not a JAR (so non-JAR artifacts pass silently). +// Check inspects data for forbidden API usage. +// Gradle cache entries are gzip-compressed tar archives — those are handled first. +// Plain ZIP/JAR files are also supported. +// Any other format is passed through silently (returns nil, nil). func (a *Analyzer) Check(data []byte) ([]Violation, error) { - if !isJAR(data) { + switch { + case isGzip(data): + return a.checkGzipTar(data) + case isZIP(data): + seen := make(map[string]struct{}) + return a.checkZIP(data, seen) + default: return nil, nil } +} - zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) +// checkGzipTar decompresses a gzip stream and scans the tar entries inside. +// .class files are scanned directly; .jar files inside the tar are opened as +// ZIPs and their .class files scanned one level deep. +func (a *Analyzer) checkGzipTar(data []byte) ([]Violation, error) { + gr, err := gzip.NewReader(bytes.NewReader(data)) if err != nil { - return nil, fmt.Errorf("failed to open JAR: %w", err) + return nil, fmt.Errorf("failed to open gzip stream: %w", err) } + defer gr.Close() + tr := tar.NewReader(gr) seen := make(map[string]struct{}) var violations []Violation + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + return nil, fmt.Errorf("failed to read tar entry: %w", err) + } + + entryData, err := io.ReadAll(tr) + if err != nil { + continue + } + + switch { + case strings.HasSuffix(hdr.Name, ".class"): + // Direct class file — common for compileJava task output. + className := strings.TrimSuffix(hdr.Name, ".class") + v := a.scanClassBytes(className, entryData, seen) + violations = append(violations, v...) + + case strings.HasSuffix(hdr.Name, ".jar"): + // JAR inside the tar — common for the jar task output. + v, err := a.checkZIP(entryData, seen) + if err != nil { + continue + } + violations = append(violations, v...) + } + } + + return violations, nil +} + +// checkZIP opens data as a ZIP archive and scans every .class file inside. +func (a *Analyzer) checkZIP(data []byte, seen map[string]struct{}) ([]Violation, error) { + zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) + if err != nil { + return nil, fmt.Errorf("failed to open ZIP: %w", err) + } + + var violations []Violation + for _, f := range zr.File { if !strings.HasSuffix(f.Name, ".class") { continue @@ -67,7 +127,6 @@ func (a *Analyzer) Check(data []byte) ([]Violation, error) { rc, err := f.Open() if err != nil { - // Skip unreadable entries rather than failing the whole check. continue } classData, err := io.ReadAll(rc) @@ -76,41 +135,54 @@ func (a *Analyzer) Check(data []byte) ([]Violation, error) { continue } - poolStrings, err := extractConstantPoolStrings(classData) - if err != nil { - // Malformed class file — skip it. - continue - } - className := strings.TrimSuffix(f.Name, ".class") + v := a.scanClassBytes(className, classData, seen) + violations = append(violations, v...) + } + + return violations, nil +} + +// scanClassBytes parses a single .class file and returns any violations. +// seen deduplicates across multiple calls (shared within one Check invocation). +func (a *Analyzer) scanClassBytes(className string, data []byte, seen map[string]struct{}) []Violation { + poolStrings, err := extractConstantPoolStrings(data) + if err != nil { + return nil + } + + var violations []Violation - for _, s := range poolStrings { - for _, r := range a.rules { - for _, pattern := range r.patterns { - if !strings.HasPrefix(s, pattern) { - continue - } - key := className + "|" + s + "|" + r.category - if _, dup := seen[key]; dup { - continue - } - seen[key] = struct{}{} - violations = append(violations, Violation{ - Class: className, - Matched: s, - Category: r.category, - }) + for _, s := range poolStrings { + for _, r := range a.rules { + for _, pattern := range r.patterns { + if !strings.HasPrefix(s, pattern) { + continue + } + key := className + "|" + s + "|" + r.category + if _, dup := seen[key]; dup { + continue } + seen[key] = struct{}{} + violations = append(violations, Violation{ + Class: className, + Matched: s, + Category: r.category, + }) } } } - return violations, nil + return violations +} + +// isGzip returns true when data begins with the gzip magic bytes. +func isGzip(data []byte) bool { + return len(data) >= 2 && data[0] == 0x1f && data[1] == 0x8b } -// isJAR returns true when data begins with the ZIP local file header signature. -// JARs are ZIP files, so this signature (PK\x03\x04) is the correct check. -func isJAR(data []byte) bool { +// isZIP returns true when data begins with the ZIP local file header signature. +func isZIP(data []byte) bool { return len(data) >= 4 && data[0] == 0x50 && data[1] == 0x4B && data[2] == 0x03 && data[3] == 0x04