diff --git a/README.md b/README.md index ef5fa12..cdbe73b 100644 --- a/README.md +++ b/README.md @@ -65,11 +65,12 @@ Set VM options "--module-path "...\JDKs\javafx-sdk-24.0.1\lib" --add-modules ja # Reserved key 'VERSION' is used for users notifications only, may be skipped VERSION=1.1 -# All key names may be in 3 formats +# All key names may be in 4 formats # KEY_NAME=VALUE - means the ferret will search for VALUE-string case-insensitive, the VALUE-string will be converted to RegExp pattern '\bVALUE\b'. Note: all spaces inside will be replaced with '\\s+', all special chars (&, -, +) will be escaped by '\\' # KEY_NAME(regexp)=VALUE - means you have finally defined RegExp pattern, and it will be used as is # KEY_NAME(allowed)=VALUE - means you have defined exact string - which may be found during scanned, but must be treated as allowed. Actually no matter what key name will be used - the value is a global string. # KEY_NAME(exclude-ext)=VALUE1,VALUE2,etc.. - list of file extentions to be ignored for the "KEY_NAME" signature +# BINARY_ARTIFACTS(binary-exclude)=PATTERN1,PATTERN2,etc.. - list of regex patterns for binary files to exclude from detection (e.g., gradle-wrapper\.jar) # Notes: all key names must be unique Examples @@ -93,4 +94,9 @@ PASSW-003=qwerty123 IP-ADDR(regexp)=((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?) IP-ADDR-1(allowed)=0.0.0.0 IP-ADDR-2(allowed)=127.0.0.1 + +# Binary Artifacts Detection +# The tool automatically detects binary files (files with zero bytes in first 16KiB) +# Use binary-exclude to whitelist binaries that are committed on purpose, e.g. the Gradle and Maven wrapper jars +BINARY_ARTIFACTS(binary-exclude)=gradle-wrapper\\.jar,maven-wrapper\\.jar ``` diff --git a/cli/src/main/java/com/github/exadmin/cyberferret/CyberFerretCLI.java b/cli/src/main/java/com/github/exadmin/cyberferret/CyberFerretCLI.java index 8162532..4dfa93f 100644 --- a/cli/src/main/java/com/github/exadmin/cyberferret/CyberFerretCLI.java +++ b/cli/src/main/java/com/github/exadmin/cyberferret/CyberFerretCLI.java @@ -139,6 +139,7 @@ private static void _main(String[] args) { runnableScanner.setSignaturesMap(sigsLoader.getSignaturesMap()); runnableScanner.setAllowedSignaturesMap(sigsLoader.getAllowedSignaturesMap()); runnableScanner.setExcludeExtMap(sigsLoader.getExcludeExtsMap()); + runnableScanner.setBinaryExcludePatterns(sigsLoader.getBinaryExcludePatterns()); runnableScanner.setDirToScan(repoPathToScan.toString()); runnableScanner.setStagedFiles(stagedFiles); runnableScanner.run(); diff --git a/common/src/main/java/com/github/exadmin/cyberferret/async/RunnableScanner.java b/common/src/main/java/com/github/exadmin/cyberferret/async/RunnableScanner.java index 008ef8a..988b896 100644 --- a/common/src/main/java/com/github/exadmin/cyberferret/async/RunnableScanner.java +++ b/common/src/main/java/com/github/exadmin/cyberferret/async/RunnableScanner.java @@ -6,6 +6,7 @@ import com.github.exadmin.cyberferret.model.FoundPathItem; import com.github.exadmin.cyberferret.model.ItemType; import com.github.exadmin.cyberferret.utils.FileUtils; +import com.github.exadmin.cyberferret.utils.ImgUtils; import com.github.exadmin.cyberferret.utils.MiscUtils; import java.io.File; @@ -29,6 +30,7 @@ public class RunnableScanner extends ARunnable { private FxCallback fxCallback = (type, message) -> logInfo(message); private boolean isAnySignatureFound = false; private List stagedFiles; + private List binaryExcludePatterns = null; public RunnableScanner(boolean isCLIMode) { super(isCLIMode); @@ -46,6 +48,10 @@ public void setExcludeExtMap(Map> excludeExtMap) { this.excludeExtMap = excludeExtMap; } + public void setBinaryExcludePatterns(List binaryExcludePatterns) { + this.binaryExcludePatterns = binaryExcludePatterns; + } + public void setDirToScan(String dirToScan) { this.dirToScan = dirToScan; } @@ -256,6 +262,41 @@ private void scan(FoundPathItem pathItem, Path rootDir, ExcludeFileModel exclude if (pathItem.getType() == ItemType.DIRECTORY || pathItem.getType() == ItemType.SIGNATURE) return; Path filePath = pathItem.getFilePath(); + + // Binary files are usually committed by accident, so report them as artifacts instead of + // text-scanning them. Supported images are the exception: readFile() extracts their metadata + // (e.g. EXIF) and scans that text, so they must fall through to the normal path below. + boolean isBinary = false; + try { + isBinary = FileUtils.isBinaryFile(filePath); + } catch (IOException ex) { + logWarn("Could not determine if file '{}' is binary. Treating as text.", filePath); + } + + String extension = FileUtils.getFileExtensionAsString(filePath); + if (isBinary && !ImgUtils.isSupportedImageFormat(extension)) { + if (FileUtils.matchesAnyPattern(filePath, binaryExcludePatterns)) { + logDebug("Binary file '{}' is excluded from detection", filePath); + return; + } + + FoundPathItem binaryItem = new FoundPathItem(filePath, ItemType.SIGNATURE, pathItem); + binaryItem.setVisualName("BINARY_ARTIFACT"); + binaryItem.setLineNumber(0); + binaryItem.setDisplayText("Binary file detected (contains zero bytes in first 16KiB)"); + binaryItem.setFoundString(filePath.getFileName().toString()); + + calculateIgnoreFlagState(binaryItem, pathItem, rootDir, excludeFileModel); + foundItemsContainer.addItem(binaryItem); + + // a non-ignored artifact must fail the scan so the CLI/pre-commit hook exits non-zero + if (!binaryItem.isIgnored()) { + isAnySignatureFound = true; + logWarn("Binary artifact detected: {}", filePath); + } + return; + } + String fileBody; try { logTrace("Reading file {}", filePath); diff --git a/common/src/main/java/com/github/exadmin/cyberferret/async/RunnableSigsLoader.java b/common/src/main/java/com/github/exadmin/cyberferret/async/RunnableSigsLoader.java index f222980..89a3922 100644 --- a/common/src/main/java/com/github/exadmin/cyberferret/async/RunnableSigsLoader.java +++ b/common/src/main/java/com/github/exadmin/cyberferret/async/RunnableSigsLoader.java @@ -15,6 +15,7 @@ public class RunnableSigsLoader extends ARunnable { private Map signaturesMap; // map of signatures private Map allowedSignaturesMap; // effectively the list of exact strings which are allowed when capturing private Map> excludeExtsMap; // signature -> List of file extensions to ignore + private List binaryExcludePatterns; // list of file name patterns to exclude from binary detection private String dictionaryVersion = "undefined"; private InputStream inputStream; @@ -38,6 +39,10 @@ public Map> getExcludeExtsMap() { return excludeExtsMap; } + public List getBinaryExcludePatterns() { + return binaryExcludePatterns; + } + public boolean isReady() { return isReady.get(); } @@ -56,6 +61,7 @@ public void _run() { Map allowedSignaturesTmpMap = new HashMap<>(); Map> includeExt = new HashMap<>(); Map> excludeExtTmpMap = new HashMap<>(); + List binaryExcludePatternsTmp = new ArrayList<>(); for (Object key : properties.keySet()) { String sigId = key.toString(); @@ -81,6 +87,25 @@ public void _run() { continue; } + // load binary file exclusion patterns + if (sigId.endsWith("(binary-exclude)")) { + String[] patterns = expression.split(","); + for (String patternStr : patterns) { + patternStr = patternStr.trim(); + if (!patternStr.isEmpty()) { + try { + Pattern pattern = Pattern.compile(patternStr); + binaryExcludePatternsTmp.add(pattern); + logInfo("Binary exclusion pattern loaded: '{}'", patternStr); + } catch (PatternSyntaxException pse) { + logError("Error while compiling binary exclusion pattern '{}'", patternStr, pse); + } + } + } + + continue; + } + if (sigId.endsWith("(allowed)")) { sigId = sigId.substring(0, sigId.length() - 9); @@ -94,9 +119,11 @@ public void _run() { signaturesMap = Collections.unmodifiableMap(regExpTmpMap); allowedSignaturesMap = Collections.unmodifiableMap(allowedSignaturesTmpMap); excludeExtsMap = Collections.unmodifiableMap(excludeExtTmpMap); + binaryExcludePatterns = Collections.unmodifiableList(binaryExcludePatternsTmp); logInfo("Signatures are loaded successfully, number of signatures is {}", signaturesMap.size()); logInfo("Number of allowed signatures is {}", allowedSignaturesMap.size()); + logInfo("Number of binary exclusion patterns is {}", binaryExcludePatterns.size()); logInfo("Dictionary version is {}", dictionaryVersion); isReady.set(true); diff --git a/common/src/main/java/com/github/exadmin/cyberferret/utils/FileUtils.java b/common/src/main/java/com/github/exadmin/cyberferret/utils/FileUtils.java index 24c0312..ac02031 100644 --- a/common/src/main/java/com/github/exadmin/cyberferret/utils/FileUtils.java +++ b/common/src/main/java/com/github/exadmin/cyberferret/utils/FileUtils.java @@ -8,6 +8,9 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.Arrays; +import java.util.List; +import java.util.regex.Pattern; public class FileUtils { public static String readFile(String filePath) throws IOException { @@ -63,6 +66,51 @@ public static InputStream toFileInputStream(Path path) { } } + public static boolean isBinaryFile(Path path) throws IOException { + if (!Files.isRegularFile(path)) { + return false; + } + + byte[] buffer = new byte[16 * 1024]; + int bytesRead; + try (InputStream is = Files.newInputStream(path)) { + bytesRead = is.read(buffer); + } + if (bytesRead <= 0) { + return false; + } + + byte[] head = bytesRead == buffer.length ? buffer : Arrays.copyOf(buffer, bytesRead); + + // UTF-16 text legitimately contains NUL bytes, so classify the head the same way + // readFile does and skip the NUL check when it is recognised as UTF-16. + Charset charset = decodeText(head).charset(); + if (charset == StandardCharsets.UTF_16LE || charset == StandardCharsets.UTF_16BE) { + return false; + } + + for (byte b : head) { + if (b == 0) { + return true; + } + } + return false; + } + + public static boolean matchesAnyPattern(Path path, List patterns) { + if (path == null || patterns == null || patterns.isEmpty()) { + return false; + } + + String fileName = path.getFileName().toString(); + for (Pattern pattern : patterns) { + if (pattern.matcher(fileName).matches()) { + return true; + } + } + return false; + } + private static DecodedText decodeText(byte[] bytes) { if (startsWith(bytes, (byte) 0xEF, (byte) 0xBB, (byte) 0xBF)) { return new DecodedText(StandardCharsets.UTF_8, 3); diff --git a/common/src/test/java/com/github/exadmin/cyberferret/async/RunnableScannerTests.java b/common/src/test/java/com/github/exadmin/cyberferret/async/RunnableScannerTests.java index abff0ee..d77bba0 100644 --- a/common/src/test/java/com/github/exadmin/cyberferret/async/RunnableScannerTests.java +++ b/common/src/test/java/com/github/exadmin/cyberferret/async/RunnableScannerTests.java @@ -21,6 +21,9 @@ import static org.junit.jupiter.api.Assertions.assertTrue; public class RunnableScannerTests { + // PNG magic header: a real binary signature used as a stand-in for a committed binary file + private static final byte[] PNG_HEADER = {(byte) 0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A, 0x00, 0x00, 0x00, 0x0D}; + @TempDir Path tempDir; @@ -100,6 +103,88 @@ public void cliMode_findsSignatureInUtf16LeFileCreatedByWindowsPowershell() thro assertTrue(runnableScanner.isAnySignatureFound()); } + @Test + public void cliMode_flagsBinaryFileAsArtifact() throws IOException { + Path repoRoot = tempDir.resolve("repo"); + Files.createDirectories(repoRoot.resolve(".git")); + Files.writeString(repoRoot.resolve(".git/config"), "[core]", StandardCharsets.UTF_8); + + Path stagedFile = repoRoot.resolve("blob.bin"); + Files.write(stagedFile, PNG_HEADER); + + FoundItemsContainer foundItemsContainer = new FoundItemsContainer(); + RunnableScanner runnableScanner = new RunnableScanner(true); + runnableScanner.setDirToScan(repoRoot.toString()); + runnableScanner.setFoundItemsContainer(foundItemsContainer); + runnableScanner.setSignaturesMap(Map.of("test", Pattern.compile("secret"))); + runnableScanner.setAllowedSignaturesMap(Map.of()); + runnableScanner.setExcludeExtMap(Map.of()); + runnableScanner.setStagedFiles(List.of(stagedFile)); + + runnableScanner.run(); + + List artifacts = foundItemsContainer.getFoundItemsCopy().stream() + .filter(item -> "BINARY_ARTIFACT".equals(item.getVisualName())) + .toList(); + assertEquals(1, artifacts.size()); + assertEquals(stagedFile.toAbsolutePath().normalize(), artifacts.getFirst().getFilePath()); + // a binary artifact must fail the scan so the CLI/pre-commit hook exits non-zero + assertTrue(runnableScanner.isAnySignatureFound()); + } + + @Test + public void cliMode_skipsBinaryFileMatchingExcludePattern() throws IOException { + Path repoRoot = tempDir.resolve("repo"); + Files.createDirectories(repoRoot.resolve(".git")); + Files.writeString(repoRoot.resolve(".git/config"), "[core]", StandardCharsets.UTF_8); + + Path stagedFile = repoRoot.resolve("archive.dat"); + Files.write(stagedFile, PNG_HEADER); + + FoundItemsContainer foundItemsContainer = new FoundItemsContainer(); + RunnableScanner runnableScanner = new RunnableScanner(true); + runnableScanner.setDirToScan(repoRoot.toString()); + runnableScanner.setFoundItemsContainer(foundItemsContainer); + runnableScanner.setSignaturesMap(Map.of("test", Pattern.compile("secret"))); + runnableScanner.setAllowedSignaturesMap(Map.of()); + runnableScanner.setExcludeExtMap(Map.of()); + runnableScanner.setBinaryExcludePatterns(List.of(Pattern.compile(".*\\.dat"))); + runnableScanner.setStagedFiles(List.of(stagedFile)); + + runnableScanner.run(); + + boolean anyArtifact = foundItemsContainer.getFoundItemsCopy().stream() + .anyMatch(item -> "BINARY_ARTIFACT".equals(item.getVisualName())); + assertFalse(anyArtifact); + assertFalse(runnableScanner.isAnySignatureFound()); + } + + @Test + public void cliMode_doesNotFlagSupportedImageAsBinaryArtifact() throws IOException { + Path repoRoot = tempDir.resolve("repo"); + Files.createDirectories(repoRoot.resolve(".git")); + Files.writeString(repoRoot.resolve(".git/config"), "[core]", StandardCharsets.UTF_8); + + // images are binary, but their metadata is scanned for signatures, so they must not be flagged + Path stagedFile = repoRoot.resolve("logo.png"); + Files.write(stagedFile, PNG_HEADER); + + FoundItemsContainer foundItemsContainer = new FoundItemsContainer(); + RunnableScanner runnableScanner = new RunnableScanner(true); + runnableScanner.setDirToScan(repoRoot.toString()); + runnableScanner.setFoundItemsContainer(foundItemsContainer); + runnableScanner.setSignaturesMap(Map.of("test", Pattern.compile("secret"))); + runnableScanner.setAllowedSignaturesMap(Map.of()); + runnableScanner.setExcludeExtMap(Map.of()); + runnableScanner.setStagedFiles(List.of(stagedFile)); + + runnableScanner.run(); + + boolean anyArtifact = foundItemsContainer.getFoundItemsCopy().stream() + .anyMatch(item -> "BINARY_ARTIFACT".equals(item.getVisualName())); + assertFalse(anyArtifact); + } + @Test public void cliMode_handlesAbsoluteStagedFileWhenScanDirectoryIsRelative() throws IOException { Path repoRoot = tempDir.resolve("repo"); diff --git a/common/src/test/java/com/github/exadmin/cyberferret/utils/FileUtilsTests.java b/common/src/test/java/com/github/exadmin/cyberferret/utils/FileUtilsTests.java new file mode 100644 index 0000000..ee31afc --- /dev/null +++ b/common/src/test/java/com/github/exadmin/cyberferret/utils/FileUtilsTests.java @@ -0,0 +1,105 @@ +package com.github.exadmin.cyberferret.utils; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.regex.Pattern; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class FileUtilsTests { + // PNG magic header: a real binary signature that is not valid text in any encoding + private static final byte[] PNG_HEADER = {(byte) 0x89, 'P', 'N', 'G', 0x0D, 0x0A, 0x1A, 0x0A, 0x00, 0x00, 0x00, 0x0D}; + + @TempDir + Path tempDir; + + @Test + public void isBinaryFile_detectsNulByteAsBinary() throws IOException { + Path file = tempDir.resolve("blob.bin"); + Files.write(file, PNG_HEADER); + + assertTrue(FileUtils.isBinaryFile(file)); + } + + @Test + public void isBinaryFile_treatsPlainTextAsText() throws IOException { + Path file = tempDir.resolve("notes.txt"); + Files.writeString(file, "just some text", StandardCharsets.UTF_8); + + assertFalse(FileUtils.isBinaryFile(file)); + } + + @Test + public void isBinaryFile_treatsUtf16TextAsText() throws IOException { + // UTF-16 contains NUL bytes by design, yet it is still text and must be scanned + Path file = tempDir.resolve("notes-utf16.txt"); + Files.write(file, utf16LeWithBom("hack hack\r\n")); + + assertFalse(FileUtils.isBinaryFile(file)); + } + + @Test + public void isBinaryFile_treatsEmptyFileAsText() throws IOException { + Path file = tempDir.resolve("empty.txt"); + Files.write(file, new byte[0]); + + assertFalse(FileUtils.isBinaryFile(file)); + } + + @Test + public void isBinaryFile_returnsFalseForDirectory() throws IOException { + assertFalse(FileUtils.isBinaryFile(tempDir)); + } + + @Test + public void matchesAnyPattern_matchesByFileName() { + Path file = Path.of("lib", "app.jar"); + List patterns = List.of(Pattern.compile(".*\\.png"), Pattern.compile(".*\\.jar")); + + assertTrue(FileUtils.matchesAnyPattern(file, patterns)); + } + + @Test + public void matchesAnyPattern_returnsFalseWhenNothingMatches() { + Path file = Path.of("lib", "app.jar"); + List patterns = List.of(Pattern.compile(".*\\.png")); + + assertFalse(FileUtils.matchesAnyPattern(file, patterns)); + } + + @Test + public void matchesAnyPattern_returnsFalseForEmptyOrNullPatterns() { + Path file = Path.of("app.jar"); + + assertFalse(FileUtils.matchesAnyPattern(file, List.of())); + assertFalse(FileUtils.matchesAnyPattern(file, null)); + } + + @Test + public void matchesAnyPattern_whitelistsWrapperJarsButNotOtherJars() { + // the default binary-exclude patterns shipped in README.md + List defaults = List.of(Pattern.compile("gradle-wrapper\\.jar"), Pattern.compile("maven-wrapper\\.jar")); + + assertTrue(FileUtils.matchesAnyPattern(Path.of("gradle", "wrapper", "gradle-wrapper.jar"), defaults)); + assertTrue(FileUtils.matchesAnyPattern(Path.of(".mvn", "wrapper", "maven-wrapper.jar"), defaults)); + assertFalse(FileUtils.matchesAnyPattern(Path.of("build", "libs", "app.jar"), defaults)); + } + + private static byte[] utf16LeWithBom(String value) { + byte[] content = value.getBytes(StandardCharsets.UTF_16LE); + ByteBuffer buffer = ByteBuffer.allocate(content.length + 2).order(ByteOrder.LITTLE_ENDIAN); + buffer.put((byte) 0xFF); + buffer.put((byte) 0xFE); + buffer.put(content); + return buffer.array(); + } +} diff --git a/fx/src/main/java/com/github/exadmin/cyberferret/fxui/SceneBuilder.java b/fx/src/main/java/com/github/exadmin/cyberferret/fxui/SceneBuilder.java index 55b50d5..c06dae1 100644 --- a/fx/src/main/java/com/github/exadmin/cyberferret/fxui/SceneBuilder.java +++ b/fx/src/main/java/com/github/exadmin/cyberferret/fxui/SceneBuilder.java @@ -174,6 +174,7 @@ protected TitledPane createOfflineDictionaryPane() { runnableScanner.setSignaturesMap(runnableSigsLoader.getSignaturesMap()); runnableScanner.setAllowedSignaturesMap(runnableSigsLoader.getAllowedSignaturesMap()); runnableScanner.setExcludeExtMap(runnableSigsLoader.getExcludeExtsMap()); + runnableScanner.setBinaryExcludePatterns(runnableSigsLoader.getBinaryExcludePatterns()); btnLoadSigs.setDisable(false); });