diff --git a/chart/Chart.yaml b/chart/Chart.yaml index 7c98c0f..3049086 100644 --- a/chart/Chart.yaml +++ b/chart/Chart.yaml @@ -5,7 +5,7 @@ description: A Gradle Build Cache server with Redis backend for EduIDE deploymen type: application # Bump this version on every release — also used as the Docker image tag -version: 0.5.2 +version: 0.5.3 dependencies: - name: reposilite diff --git a/chart/templates/configmap.yaml b/chart/templates/configmap.yaml index 184f04b..1d83491 100644 --- a/chart/templates/configmap.yaml +++ b/chart/templates/configmap.yaml @@ -38,4 +38,11 @@ data: logging: level: "info" format: "json" + + static_analysis: + enabled: {{ .Values.staticAnalysis.enabled }} + check_network: {{ .Values.staticAnalysis.checkNetwork }} + check_exec: {{ .Values.staticAnalysis.checkExec }} + check_reflection: {{ .Values.staticAnalysis.checkReflection }} + check_filesystem: {{ .Values.staticAnalysis.checkFilesystem }} {{- end }} diff --git a/chart/values.yaml b/chart/values.yaml index 011e0c7..7846fe5 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -33,6 +33,16 @@ tls: # Kubernetes secret name containing tls.crt and tls.key secretName: "" +# ============================================================ +# Static Code Analysis (Java JARs) +# ============================================================ +staticAnalysis: + enabled: false + checkNetwork: false + checkExec: false + checkReflection: false + checkFilesystem: false + # ============================================================ # Gateway API (HTTPRoutes only) # ============================================================ diff --git a/src/configs/config.yaml b/src/configs/config.yaml index 223e24e..184fd53 100644 --- a/src/configs/config.yaml +++ b/src/configs/config.yaml @@ -25,3 +25,10 @@ metrics: logging: level: "info" format: "json" + +static_analysis: + enabled: false + check_network: false + check_exec: false + check_reflection: false + check_filesystem: false diff --git a/src/internal/analysis/analyzer.go b/src/internal/analysis/analyzer.go new file mode 100644 index 0000000..230bdda --- /dev/null +++ b/src/internal/analysis/analyzer.go @@ -0,0 +1,189 @@ +package analysis + +import ( + "archive/tar" + "archive/zip" + "bytes" + "compress/gzip" + "fmt" + "io" + "strings" + + "github.com/kevingruber/gradle-cache/internal/config" +) + +// Violation describes a single forbidden API reference found in a class file. +type Violation struct { + Class string `json:"class"` // e.g. "com/student/Homework" + Matched string `json:"matched"` // e.g. "java/net/Socket" + Category string `json:"category"` // e.g. "network" +} + +// Analyzer inspects Java bytecode for forbidden API usage. +type Analyzer struct { + rules []rule +} + +type rule struct { + category string + patterns []string +} + +// New creates an Analyzer whose active rules are determined by cfg. +func New(cfg config.StaticAnalysisConfig) *Analyzer { + a := &Analyzer{} + if cfg.CheckNetwork { + a.rules = append(a.rules, rule{"network", networkPatterns}) + } + if cfg.CheckExec { + a.rules = append(a.rules, rule{"exec", execPatterns}) + } + if cfg.CheckReflection { + a.rules = append(a.rules, rule{"reflection", reflectionPatterns}) + } + if cfg.CheckFilesystem { + a.rules = append(a.rules, rule{"filesystem", filesystemPatterns}) + } + return a +} + +// Check inspects data for forbidden API usage. +// Gradle cache entries are gzip-compressed tar archives — those are handled first. +// Plain ZIP/JAR files are also supported. +// Any other format is passed through silently (returns nil, nil). +func (a *Analyzer) Check(data []byte) ([]Violation, error) { + switch { + case isGzip(data): + return a.checkGzipTar(data) + case isZIP(data): + seen := make(map[string]struct{}) + return a.checkZIP(data, seen) + default: + return nil, nil + } +} + +// checkGzipTar decompresses a gzip stream and scans the tar entries inside. +// .class files are scanned directly; .jar files inside the tar are opened as +// ZIPs and their .class files scanned one level deep. +func (a *Analyzer) checkGzipTar(data []byte) ([]Violation, error) { + gr, err := gzip.NewReader(bytes.NewReader(data)) + if err != nil { + return nil, fmt.Errorf("failed to open gzip stream: %w", err) + } + defer gr.Close() + + tr := tar.NewReader(gr) + seen := make(map[string]struct{}) + var violations []Violation + + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + return nil, fmt.Errorf("failed to read tar entry: %w", err) + } + + entryData, err := io.ReadAll(tr) + if err != nil { + continue + } + + switch { + case strings.HasSuffix(hdr.Name, ".class"): + // Direct class file — common for compileJava task output. + className := strings.TrimSuffix(hdr.Name, ".class") + v := a.scanClassBytes(className, entryData, seen) + violations = append(violations, v...) + + case strings.HasSuffix(hdr.Name, ".jar"): + // JAR inside the tar — common for the jar task output. + v, err := a.checkZIP(entryData, seen) + if err != nil { + continue + } + violations = append(violations, v...) + } + } + + return violations, nil +} + +// checkZIP opens data as a ZIP archive and scans every .class file inside. +func (a *Analyzer) checkZIP(data []byte, seen map[string]struct{}) ([]Violation, error) { + zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) + if err != nil { + return nil, fmt.Errorf("failed to open ZIP: %w", err) + } + + var violations []Violation + + for _, f := range zr.File { + if !strings.HasSuffix(f.Name, ".class") { + continue + } + + rc, err := f.Open() + if err != nil { + continue + } + classData, err := io.ReadAll(rc) + rc.Close() + if err != nil { + continue + } + + className := strings.TrimSuffix(f.Name, ".class") + v := a.scanClassBytes(className, classData, seen) + violations = append(violations, v...) + } + + return violations, nil +} + +// scanClassBytes parses a single .class file and returns any violations. +// seen deduplicates across multiple calls (shared within one Check invocation). +func (a *Analyzer) scanClassBytes(className string, data []byte, seen map[string]struct{}) []Violation { + poolStrings, err := extractConstantPoolStrings(data) + if err != nil { + return nil + } + + var violations []Violation + + for _, s := range poolStrings { + for _, r := range a.rules { + for _, pattern := range r.patterns { + if !strings.HasPrefix(s, pattern) { + continue + } + key := className + "|" + s + "|" + r.category + if _, dup := seen[key]; dup { + continue + } + seen[key] = struct{}{} + violations = append(violations, Violation{ + Class: className, + Matched: s, + Category: r.category, + }) + } + } + } + + return violations +} + +// isGzip returns true when data begins with the gzip magic bytes. +func isGzip(data []byte) bool { + return len(data) >= 2 && data[0] == 0x1f && data[1] == 0x8b +} + +// isZIP returns true when data begins with the ZIP local file header signature. +func isZIP(data []byte) bool { + return len(data) >= 4 && + data[0] == 0x50 && data[1] == 0x4B && + data[2] == 0x03 && data[3] == 0x04 +} diff --git a/src/internal/analysis/classfile.go b/src/internal/analysis/classfile.go new file mode 100644 index 0000000..f106f3b --- /dev/null +++ b/src/internal/analysis/classfile.go @@ -0,0 +1,77 @@ +package analysis + +import ( + "encoding/binary" + "fmt" +) + +// extractConstantPoolStrings parses a Java class file and returns every UTF-8 +// string from its constant pool. Class names, method names, field names, and +// type descriptors all appear there verbatim, so scanning these strings is +// sufficient to detect forbidden API references without executing any code. +func extractConstantPoolStrings(data []byte) ([]string, error) { + if len(data) < 10 { + return nil, fmt.Errorf("class file too short (%d bytes)", len(data)) + } + + // Validate magic bytes: 0xCAFEBABE + if data[0] != 0xCA || data[1] != 0xFE || data[2] != 0xBA || data[3] != 0xBE { + return nil, fmt.Errorf("missing class file magic bytes") + } + + // Skip magic (4) + minor_version (2) + major_version (2) + pos := 8 + + if pos+2 > len(data) { + return nil, fmt.Errorf("truncated before constant_pool_count") + } + cpCount := int(binary.BigEndian.Uint16(data[pos : pos+2])) + pos += 2 + + var result []string + + // Pool indices run from 1 to cpCount-1. + for i := 1; i < cpCount; i++ { + if pos >= len(data) { + return nil, fmt.Errorf("unexpected end of constant pool at entry %d", i) + } + + tag := data[pos] + pos++ + + switch tag { + case 1: // CONSTANT_Utf8 — variable length string + if pos+2 > len(data) { + return nil, fmt.Errorf("truncated Utf8 length at entry %d", i) + } + length := int(binary.BigEndian.Uint16(data[pos : pos+2])) + pos += 2 + if pos+length > len(data) { + return nil, fmt.Errorf("truncated Utf8 data at entry %d", i) + } + result = append(result, string(data[pos:pos+length])) + pos += length + + case 3, 4: // CONSTANT_Integer, CONSTANT_Float + pos += 4 + + case 5, 6: // CONSTANT_Long, CONSTANT_Double — consume two pool slots + pos += 8 + i++ + + case 7, 8, 16, 19, 20: // Class, String, MethodType, Module, Package + pos += 2 + + case 9, 10, 11, 12, 17, 18: // Fieldref, Methodref, InterfaceMethodref, NameAndType, Dynamic, InvokeDynamic + pos += 4 + + case 15: // CONSTANT_MethodHandle + pos += 3 + + default: + return nil, fmt.Errorf("unknown constant pool tag %d at entry %d", tag, i) + } + } + + return result, nil +} diff --git a/src/internal/analysis/patterns.go b/src/internal/analysis/patterns.go new file mode 100644 index 0000000..194370e --- /dev/null +++ b/src/internal/analysis/patterns.go @@ -0,0 +1,48 @@ +package analysis + +// Forbidden API prefix lists, grouped by category. +// HasPrefix matching is used so e.g. "java/net/Socket" also covers +// SocketInputStream, SocketOutputStream, etc. A trailing slash like "sun/net/" +// catches the entire package subtree. + +var networkPatterns = []string{ + "java/net/Socket", + "java/net/ServerSocket", + "java/net/DatagramSocket", + "java/net/MulticastSocket", + "java/net/URL", + "java/net/HttpURLConnection", + "java/net/InetAddress", + "java/net/InetSocketAddress", + "java/net/URLConnection", + "java/nio/channels/SocketChannel", + "java/nio/channels/ServerSocketChannel", + "java/nio/channels/DatagramChannel", + "sun/net/", +} + +var execPatterns = []string{ + "java/lang/Runtime", + "java/lang/ProcessBuilder", + "java/lang/Process", +} + +var reflectionPatterns = []string{ + "java/lang/reflect/Method", + "java/lang/reflect/Field", + "java/lang/reflect/Constructor", + "java/lang/ClassLoader", + "java/lang/invoke/MethodHandle", + "sun/misc/Unsafe", +} + +var filesystemPatterns = []string{ + "java/io/FileOutputStream", + "java/io/FileInputStream", + "java/io/FileWriter", + "java/io/FileReader", + "java/io/RandomAccessFile", + "java/nio/file/Files", + "java/nio/file/Path", + "java/nio/channels/FileChannel", +} diff --git a/src/internal/config/config.go b/src/internal/config/config.go index 5079439..6a777bc 100644 --- a/src/internal/config/config.go +++ b/src/internal/config/config.go @@ -9,13 +9,14 @@ import ( ) type Config struct { - Server ServerConfig `mapstructure:"server"` - Storage StorageConfig `mapstructure:"storage"` - Cache CacheConfig `mapstructure:"cache"` - Auth AuthConfig `mapstructure:"auth"` - Metrics MetricsConfig `mapstructure:"metrics"` - Logging LoggingConfig `mapstructure:"logging"` - Sentry SentryConfig `mapstructure:"sentry"` + Server ServerConfig `mapstructure:"server"` + Storage StorageConfig `mapstructure:"storage"` + Cache CacheConfig `mapstructure:"cache"` + Auth AuthConfig `mapstructure:"auth"` + Metrics MetricsConfig `mapstructure:"metrics"` + Logging LoggingConfig `mapstructure:"logging"` + Sentry SentryConfig `mapstructure:"sentry"` + StaticAnalysis StaticAnalysisConfig `mapstructure:"static_analysis"` } type ServerConfig struct { @@ -67,6 +68,14 @@ type SentryConfig struct { Enabled bool `mapstructure:"enabled"` } +type StaticAnalysisConfig struct { + Enabled bool `mapstructure:"enabled"` + CheckNetwork bool `mapstructure:"check_network"` + CheckExec bool `mapstructure:"check_exec"` + CheckReflection bool `mapstructure:"check_reflection"` + CheckFilesystem bool `mapstructure:"check_filesystem"` +} + func Load(configPath string) (*Config, error) { v := viper.New() @@ -94,6 +103,12 @@ func Load(configPath string) (*Config, error) { v.SetDefault("logging.level", "info") v.SetDefault("logging.format", "json") + v.SetDefault("static_analysis.enabled", false) + v.SetDefault("static_analysis.check_network", false) + v.SetDefault("static_analysis.check_exec", false) + v.SetDefault("static_analysis.check_reflection", false) + v.SetDefault("static_analysis.check_filesystem", false) + // Read from config file if provided if configPath != "" { v.SetConfigFile(configPath) @@ -148,4 +163,3 @@ func (c *Config) Validate() error { func (c *Config) MaxEntrySizeBytes() int64 { return c.Cache.MaxEntrySizeMB * 1024 * 1024 } - diff --git a/src/internal/handler/cache_put.go b/src/internal/handler/cache_put.go index f2fa8fc..87edd6e 100644 --- a/src/internal/handler/cache_put.go +++ b/src/internal/handler/cache_put.go @@ -1,13 +1,18 @@ package handler import ( - "github.com/gin-gonic/gin" + "bytes" + "fmt" "io" "net/http" + + "github.com/gin-gonic/gin" ) // Put handles PUT requests to store cache entries. // Gradle expects: 2xx on success, 413 if too large. +// If static analysis is enabled and the artifact is a JAR, forbidden API usage +// causes a 403 and the artifact is not stored. func (h *CacheHandler) Put(c *gin.Context) { key := c.Param("key") if key == "" { @@ -15,7 +20,7 @@ func (h *CacheHandler) Put(c *gin.Context) { return } - // Check Content-Length header for size validation + // Reject early if Content-Length already exceeds the limit. contentLength := c.Request.ContentLength if contentLength > h.maxEntrySize { h.logger.Warn(). @@ -27,32 +32,46 @@ func (h *CacheHandler) Put(c *gin.Context) { return } - // Handle Expect: 100-continue - // Gin/Go handles this automatically, but we validate size first + // Always buffer the full body so analysis can inspect it before storage. + // The +1 lets us detect an over-limit chunked body after reading. + data, err := io.ReadAll(io.LimitReader(c.Request.Body, h.maxEntrySize+1)) + if err != nil { + h.logger.Error().Err(err).Str("key", key).Msg("failed to read request body") + c.Status(http.StatusInternalServerError) + return + } - // For chunked transfers or unknown size, we need to handle differently - if contentLength < 0 { - // Read with size limit - limitedReader := io.LimitReader(c.Request.Body, h.maxEntrySize+1) - data, err := io.ReadAll(limitedReader) - if err != nil { - h.logger.Error().Err(err).Str("key", key).Msg("failed to read request body") - c.Status(http.StatusInternalServerError) - return - } + h.logger.Debug(). + Str("key", key). + Int("size", len(data)). + Str("magic", fmt.Sprintf("%x", data[:min(4, len(data))])). + Msg("artifact received") + + if int64(len(data)) > h.maxEntrySize { + c.Status(http.StatusRequestEntityTooLarge) + return + } - if int64(len(data)) > h.maxEntrySize { - c.Status(http.StatusRequestEntityTooLarge) + // Run static analysis when enabled. Analysis errors are non-fatal: a broken + // parser should not block a legitimate upload, so we log and continue. + if h.analyzer != nil { + violations, err := h.analyzer.Check(data) + if err != nil { + h.logger.Warn().Err(err).Str("key", key).Msg("static analysis failed, skipping") + } else if len(violations) > 0 { + h.logger.Warn(). + Str("key", key). + Int("violations", len(violations)). + Msg("rejected artifact: forbidden API usage detected") + c.JSON(http.StatusForbidden, gin.H{ + "error": "artifact contains forbidden API usage", + "violations": violations, + }) return } - - contentLength = int64(len(data)) - c.Request.Body = io.NopCloser(io.NewSectionReader( - &bytesReaderAt{data: data}, 0, contentLength, - )) } - err := h.storage.Put(c.Request.Context(), key, c.Request.Body, contentLength) + err = h.storage.Put(c.Request.Context(), key, bytes.NewReader(data), int64(len(data))) if err != nil { h.logger.Error().Err(err).Str("key", key).Msg("failed to store cache entry") c.Status(http.StatusInternalServerError) diff --git a/src/internal/handler/handler.go b/src/internal/handler/handler.go index 0b7b42e..08514a9 100644 --- a/src/internal/handler/handler.go +++ b/src/internal/handler/handler.go @@ -1,9 +1,11 @@ package handler import ( + "io" + + "github.com/kevingruber/gradle-cache/internal/analysis" "github.com/kevingruber/gradle-cache/internal/storage" "github.com/rs/zerolog" - "io" ) // CacheHandler handles Gradle build cache HTTP requests. @@ -12,10 +14,12 @@ type CacheHandler struct { maxEntrySize int64 logger zerolog.Logger metrics *Metrics + analyzer *analysis.Analyzer // nil when static analysis is disabled } // NewCacheHandler creates a new cache handler. -func NewCacheHandler(store storage.Storage, maxEntrySize int64, logger zerolog.Logger) (*CacheHandler, error) { +// Pass a nil analyzer to disable static analysis. +func NewCacheHandler(store storage.Storage, maxEntrySize int64, logger zerolog.Logger, analyzer *analysis.Analyzer) (*CacheHandler, error) { metrics, err := NewMetrics() if err != nil { return nil, err @@ -26,6 +30,7 @@ func NewCacheHandler(store storage.Storage, maxEntrySize int64, logger zerolog.L maxEntrySize: maxEntrySize, logger: logger, metrics: metrics, + analyzer: analyzer, }, nil } diff --git a/src/internal/server/server.go b/src/internal/server/server.go index 7edd1b3..aa67bf0 100644 --- a/src/internal/server/server.go +++ b/src/internal/server/server.go @@ -7,6 +7,7 @@ import ( "time" "github.com/gin-gonic/gin" + "github.com/kevingruber/gradle-cache/internal/analysis" "github.com/kevingruber/gradle-cache/internal/config" "github.com/kevingruber/gradle-cache/internal/handler" "github.com/kevingruber/gradle-cache/internal/middleware" @@ -75,15 +76,26 @@ func (s *Server) setupRoutes() { s.router.GET("/metrics", gin.WrapH(promhttp.Handler())) } - // Cache endpoints + // Build an analyzer when static analysis is enabled; nil disables it. + var analyzer *analysis.Analyzer + if s.cfg.StaticAnalysis.Enabled { + analyzer = analysis.New(s.cfg.StaticAnalysis) + s.logger.Info(). + Bool("check_network", s.cfg.StaticAnalysis.CheckNetwork). + Bool("check_exec", s.cfg.StaticAnalysis.CheckExec). + Bool("check_reflection", s.cfg.StaticAnalysis.CheckReflection). + Bool("check_filesystem", s.cfg.StaticAnalysis.CheckFilesystem). + Msg("static analysis enabled") + } + cacheHandler, err := handler.NewCacheHandler( s.storage, s.cfg.MaxEntrySizeBytes(), s.logger, + analyzer, ) - if err != nil { - s.logger.Fatal().Err(err).Msg("Failed to initialize cache") + s.logger.Fatal().Err(err).Msg("Failed to initialize cache handler") } // Gradle cache endpoints