From ffdd6072f1d464cf8feace963816246589ee4f98 Mon Sep 17 00:00:00 2001 From: laststylebender14 Date: Fri, 17 Apr 2026 14:34:46 +0530 Subject: [PATCH 1/3] feat(fd): filter discovered files using server ignore patterns --- crates/forge_domain/src/repo.rs | 17 ++++ crates/forge_repo/proto/forge.proto | 11 +++ crates/forge_repo/src/forge_repo.rs | 18 +++- crates/forge_repo/src/ignore_patterns.rs | 42 ++++++++ crates/forge_repo/src/lib.rs | 1 + crates/forge_services/src/fd.rs | 101 +++++++++++++------- crates/forge_services/src/forge_services.rs | 13 ++- 7 files changed, 161 insertions(+), 42 deletions(-) create mode 100644 crates/forge_repo/src/ignore_patterns.rs diff --git a/crates/forge_domain/src/repo.rs b/crates/forge_domain/src/repo.rs index 4d6205f575..427432d0f6 100644 --- a/crates/forge_domain/src/repo.rs +++ b/crates/forge_domain/src/repo.rs @@ -232,3 +232,20 @@ pub trait FuzzySearchRepository: Send + Sync { search_all: bool, ) -> Result>; } + +/// Repository for fetching server-side ignore patterns. +/// +/// The backend owns the canonical list of gitignore-style patterns used to +/// decide which files are indexable. Clients (e.g. the CLI) fetch the raw +/// patterns through this repository so they can filter files identically to +/// the server without duplicating the rules. +#[async_trait::async_trait] +pub trait IgnorePatternsRepository: Send + Sync { + /// Returns the raw contents of the server's ignore_patterns file + /// (gitignore syntax). + /// + /// # Errors + /// Returns an error if the server cannot be reached or returns an invalid + /// response. + async fn list_ignore_patterns(&self) -> Result; +} diff --git a/crates/forge_repo/proto/forge.proto b/crates/forge_repo/proto/forge.proto index 5ea339a85d..39e48f5328 100644 --- a/crates/forge_repo/proto/forge.proto +++ b/crates/forge_repo/proto/forge.proto @@ -47,6 +47,10 @@ service ForgeService { // Searches for needle in haystack using fuzzy search rpc FuzzySearch(FuzzySearchRequest) returns (FuzzySearchResponse); + + // Returns the raw server-side ignore patterns (gitignore syntax) so + // clients (e.g. the CLI) can filter files identically to the server. + rpc ListIgnorePatterns(ListIgnorePatternsRequest) returns (ListIgnorePatternsResponse); } // Node types @@ -360,3 +364,10 @@ message SearchMatch { uint32 start_line = 1; uint32 end_line = 2; } + +message ListIgnorePatternsRequest {} + +message ListIgnorePatternsResponse { + // Raw contents of the server's ignore_patterns file (gitignore syntax). + string patterns = 1; +} diff --git a/crates/forge_repo/src/forge_repo.rs b/crates/forge_repo/src/forge_repo.rs index 34d1bb8498..30a6457994 100644 --- a/crates/forge_repo/src/forge_repo.rs +++ b/crates/forge_repo/src/forge_repo.rs @@ -12,9 +12,9 @@ use forge_config::ForgeConfig; use forge_domain::{ AnyProvider, AuthCredential, ChatCompletionMessage, ChatRepository, CommandOutput, Context, Conversation, ConversationId, ConversationRepository, Environment, FileInfo, - FuzzySearchRepository, McpServerConfig, MigrationResult, Model, ModelId, Provider, ProviderId, - ProviderRepository, ResultStream, SearchMatch, Skill, SkillRepository, Snapshot, - SnapshotRepository, + FuzzySearchRepository, IgnorePatternsRepository, McpServerConfig, MigrationResult, Model, + ModelId, Provider, ProviderId, ProviderRepository, ResultStream, SearchMatch, Skill, + SkillRepository, Snapshot, SnapshotRepository, }; // Re-export CacacheStorage from forge_infra pub use forge_infra::CacacheStorage; @@ -29,6 +29,7 @@ use crate::conversation::ConversationRepositoryImpl; use crate::database::{DatabasePool, PoolConfig}; use crate::fs_snap::ForgeFileSnapshotService; use crate::fuzzy_search::ForgeFuzzySearchRepository; +use crate::ignore_patterns::ForgeIgnorePatternsRepository; use crate::provider::{ForgeChatRepository, ForgeProviderRepository}; use crate::skill::ForgeSkillRepository; use crate::validation::ForgeValidationRepository; @@ -50,6 +51,7 @@ pub struct ForgeRepo { skill_repository: Arc>, validation_repository: Arc>, fuzzy_search_repository: Arc>, + ignore_patterns_repository: Arc>, } impl< @@ -83,6 +85,8 @@ impl< let skill_repository = Arc::new(ForgeSkillRepository::new(infra.clone())); let validation_repository = Arc::new(ForgeValidationRepository::new(infra.clone())); let fuzzy_search_repository = Arc::new(ForgeFuzzySearchRepository::new(infra.clone())); + let ignore_patterns_repository = + Arc::new(ForgeIgnorePatternsRepository::new(infra.clone())); Self { infra, file_snapshot_service, @@ -95,6 +99,7 @@ impl< skill_repository, validation_repository, fuzzy_search_repository, + ignore_patterns_repository, } } } @@ -628,6 +633,13 @@ impl FuzzySearchRepository for ForgeRepo { } } +#[async_trait::async_trait] +impl IgnorePatternsRepository for ForgeRepo { + async fn list_ignore_patterns(&self) -> anyhow::Result { + self.ignore_patterns_repository.list_ignore_patterns().await + } +} + impl GrpcInfra for ForgeRepo { fn channel(&self) -> anyhow::Result { self.infra.channel() diff --git a/crates/forge_repo/src/ignore_patterns.rs b/crates/forge_repo/src/ignore_patterns.rs new file mode 100644 index 0000000000..e6c9e4710b --- /dev/null +++ b/crates/forge_repo/src/ignore_patterns.rs @@ -0,0 +1,42 @@ +use std::sync::Arc; + +use anyhow::{Context, Result}; +use async_trait::async_trait; +use forge_app::GrpcInfra; +use forge_domain::IgnorePatternsRepository; + +use crate::proto_generated::ListIgnorePatternsRequest; +use crate::proto_generated::forge_service_client::ForgeServiceClient; + +/// gRPC implementation of [`IgnorePatternsRepository`]. +/// +/// Fetches the raw server-side ignore patterns file so the CLI can apply the +/// same filtering rules as the server without re-implementing them locally. +pub struct ForgeIgnorePatternsRepository { + infra: Arc, +} + +impl ForgeIgnorePatternsRepository { + /// Create a new repository backed by the provided gRPC infrastructure. + /// + /// # Arguments + /// * `infra` - Infrastructure that provides the gRPC channel. + pub fn new(infra: Arc) -> Self { + Self { infra } + } +} + +#[async_trait] +impl IgnorePatternsRepository for ForgeIgnorePatternsRepository { + async fn list_ignore_patterns(&self) -> Result { + let channel = self.infra.channel()?; + let mut client = ForgeServiceClient::new(channel); + let response = client + .list_ignore_patterns(tonic::Request::new(ListIgnorePatternsRequest {})) + .await + .context("Failed to call ListIgnorePatterns gRPC")? + .into_inner(); + + Ok(response.patterns) + } +} diff --git a/crates/forge_repo/src/lib.rs b/crates/forge_repo/src/lib.rs index d489072371..d14072f02c 100644 --- a/crates/forge_repo/src/lib.rs +++ b/crates/forge_repo/src/lib.rs @@ -6,6 +6,7 @@ mod database; mod forge_repo; mod fs_snap; mod fuzzy_search; +mod ignore_patterns; mod provider; mod skill; mod validation; diff --git a/crates/forge_services/src/fd.rs b/crates/forge_services/src/fd.rs index a86e3ac171..d06e480543 100644 --- a/crates/forge_services/src/fd.rs +++ b/crates/forge_services/src/fd.rs @@ -2,9 +2,12 @@ use std::collections::HashSet; use std::path::{Path, PathBuf}; use std::sync::{Arc, LazyLock}; +use anyhow::Context; use async_trait::async_trait; use forge_app::{CommandInfra, WalkerInfra}; -use forge_domain::WorkspaceId; +use forge_domain::{IgnorePatternsRepository, WorkspaceId}; +use ignore::gitignore::{Gitignore, GitignoreBuilder}; +use tokio::sync::OnceCell; use tracing::{info, warn}; use crate::error::Error as ServiceError; @@ -30,31 +33,6 @@ pub(crate) fn has_allowed_extension(path: &Path) -> bool { } } -/// Returns `true` if the file at `path` should be excluded based on its name, -/// regardless of extension. This covers lock files and other generated -/// dependency manifest files that are not useful to index. -fn is_ignored_by_name(path: &Path) -> bool { - let Some(name) = path.file_name().and_then(|n| n.to_str()) else { - return false; - }; - let name_lower = name.to_lowercase(); - - // Lock files: *-lock.json, *.lock, *.lockb, *.lock.json, etc. - if name_lower.ends_with(".lock") - || name_lower.ends_with(".lockb") - || name_lower.ends_with("-lock.json") - || name_lower.ends_with("-lock.yaml") - || name_lower.ends_with("-lock.yml") - || name_lower.ends_with(".lock.json") - || name_lower.ends_with(".lockfile") - || name == "Package.resolved" - { - return true; - } - - false -} - /// Returns `true` if `path` is a symlink (does not follow the link). fn is_symlink(path: &Path) -> bool { path.symlink_metadata() @@ -78,7 +56,6 @@ pub(crate) fn filter_and_resolve( .into_iter() .map(|p| dir_path.join(&p)) .filter(|p| !is_symlink(p)) - .filter(|p| !is_ignored_by_name(p)) .filter(|p| has_allowed_extension(p)) .collect(); @@ -128,29 +105,83 @@ pub async fn discover_sync_file_paths( /// It first attempts git-based discovery. If git is unavailable, returns no /// files, or fails for any reason it transparently falls back to the filesystem /// walker so that workspaces without git history are still indexed correctly. +/// +/// After the strategy returns, `FdDefault` applies the server's gitignore +/// patterns (fetched on first use via [`IgnorePatternsRepository`] and cached +/// for the process lifetime) to the result. When the server is unreachable or +/// the response cannot be compiled the filter is skipped and a warning is +/// logged, so discovery keeps working offline. pub struct FdDefault { + infra: Arc, git: FsGit, walker: FdWalker, + matcher: OnceCell>, } impl FdDefault { - /// Creates a new `RoutingFileDiscovery` using the provided infrastructure - /// for both the git and walker strategies. + /// Creates a new `FdDefault` using the provided infrastructure for both + /// the git and walker strategies. pub fn new(infra: Arc) -> Self { - Self { git: FsGit::new(infra.clone()), walker: FdWalker::new(infra) } + Self { + git: FsGit::new(infra.clone()), + walker: FdWalker::new(infra.clone()), + infra, + matcher: OnceCell::new(), + } } } +/// Compiles a [`Gitignore`] from the raw contents of the server's +/// `ignore_patterns.txt` using the same semantics as the server: builder root +/// `/` (non-anchored globs match absolute and relative paths alike), blank / +/// `#`-prefixed lines skipped. +fn build_matcher(contents: &str) -> anyhow::Result { + let mut builder = GitignoreBuilder::new("/"); + for line in contents.lines() { + let line = line.trim(); + if line.is_empty() || line.starts_with('#') { + continue; + } + builder + .add_line(None, line) + .with_context(|| format!("invalid ignore pattern: {line}"))?; + } + builder.build().context("failed to build ignore matcher") +} + #[async_trait] -impl FileDiscovery for FdDefault { +impl FileDiscovery + for FdDefault +{ async fn discover(&self, dir_path: &Path) -> anyhow::Result> { - match self.git.discover(dir_path).await { - Ok(files) => Ok(files), + let files = match self.git.discover(dir_path).await { + Ok(files) => files, Err(err) => { warn!(error = ?err, "git-based file discovery failed, falling back to walker"); - self.walker.discover(dir_path).await + self.walker.discover(dir_path).await? } - } + }; + + let Some(matcher) = self + .matcher + .get_or_init(|| async { + match self.infra.list_ignore_patterns().await.and_then(|contents| build_matcher(&contents)) { + Ok(gi) => Some(gi), + Err(err) => { + warn!(error = ?err, "failed to load server ignore patterns; continuing without"); + None + } + } + }) + .await + else { + return Ok(files); + }; + + Ok(files + .into_iter() + .filter(|p| !matcher.matched_path_or_any_parents(p, p.is_dir()).is_ignore()) + .collect()) } } diff --git a/crates/forge_services/src/forge_services.rs b/crates/forge_services/src/forge_services.rs index 7ff1d1a2fb..f7a114c590 100644 --- a/crates/forge_services/src/forge_services.rs +++ b/crates/forge_services/src/forge_services.rs @@ -6,8 +6,9 @@ use forge_app::{ McpServerInfra, Services, StrategyFactory, UserInfra, WalkerInfra, }; use forge_domain::{ - ChatRepository, ConversationRepository, FuzzySearchRepository, ProviderRepository, - SkillRepository, SnapshotRepository, ValidationRepository, WorkspaceIndexRepository, + ChatRepository, ConversationRepository, FuzzySearchRepository, IgnorePatternsRepository, + ProviderRepository, SkillRepository, SnapshotRepository, ValidationRepository, + WorkspaceIndexRepository, }; use crate::ForgeProviderAuthService; @@ -53,7 +54,8 @@ pub struct ForgeServices< + WorkspaceIndexRepository + AgentRepository + SkillRepository - + ValidationRepository, + + ValidationRepository + + IgnorePatternsRepository, > { chat_service: Arc>, config_service: Arc>, @@ -104,7 +106,8 @@ impl< + WorkspaceIndexRepository + AgentRepository + SkillRepository - + ValidationRepository, + + ValidationRepository + + IgnorePatternsRepository, > ForgeServices { pub fn new(infra: Arc) -> Self { @@ -199,6 +202,7 @@ impl< + WorkspaceIndexRepository + ValidationRepository + FuzzySearchRepository + + IgnorePatternsRepository + Clone + 'static, > Services for ForgeServices @@ -353,6 +357,7 @@ impl< + AgentRepository + SkillRepository + ValidationRepository + + IgnorePatternsRepository + Send + Sync, > forge_app::EnvironmentInfra for ForgeServices From 1303603bdade8c44f7548348a980c97d7629929e Mon Sep 17 00:00:00 2001 From: laststylebender14 Date: Fri, 17 Apr 2026 14:37:48 +0530 Subject: [PATCH 2/3] feat(tests): add tests for filtering files based on server ignore patterns --- crates/forge_services/src/fd.rs | 108 ++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) diff --git a/crates/forge_services/src/fd.rs b/crates/forge_services/src/fd.rs index d06e480543..0ad3d0a1c1 100644 --- a/crates/forge_services/src/fd.rs +++ b/crates/forge_services/src/fd.rs @@ -190,11 +190,82 @@ mod tests { use std::fs::{self, File}; use std::io::Write; + use forge_app::{WalkedFile, Walker}; + use forge_domain::CommandOutput; use pretty_assertions::assert_eq; use tempfile::tempdir; use super::*; + /// Test double that answers the three infra traits `FdDefault` depends on. + /// + /// * `WalkerInfra::walk` returns `files` verbatim so tests can control the + /// post-filter input. + /// * `CommandInfra::execute_command` always fails, forcing `FdDefault` to + /// fall back to the walker path. + /// * `IgnorePatternsRepository::list_ignore_patterns` returns `patterns`. + struct MockInfra { + files: Vec, + patterns: String, + } + + impl MockInfra { + fn new(files: Vec, patterns: &str) -> Self { + Self { files, patterns: patterns.to_string() } + } + } + + fn walked(path: &str) -> WalkedFile { + WalkedFile { + path: path.to_string(), + file_name: Path::new(path) + .file_name() + .map(|n| n.to_string_lossy().to_string()), + size: 0, + } + } + + #[async_trait] + impl WalkerInfra for MockInfra { + async fn walk(&self, _config: Walker) -> anyhow::Result> { + Ok(self.files.clone()) + } + } + + #[async_trait] + impl CommandInfra for MockInfra { + async fn execute_command( + &self, + command: String, + _working_dir: PathBuf, + _silent: bool, + _env_vars: Option>, + ) -> anyhow::Result { + Ok(CommandOutput { + command, + stdout: String::new(), + stderr: "not a git repo".to_string(), + exit_code: Some(128), + }) + } + + async fn execute_command_raw( + &self, + _command: &str, + _working_dir: PathBuf, + _env_vars: Option>, + ) -> anyhow::Result { + unreachable!("not used by FdDefault discovery") + } + } + + #[async_trait] + impl IgnorePatternsRepository for MockInfra { + async fn list_ignore_patterns(&self) -> anyhow::Result { + Ok(self.patterns.clone()) + } + } + #[test] fn test_filter_and_resolve_excludes_symlinks() { let dir = tempdir().unwrap(); @@ -262,4 +333,41 @@ mod tests { let expected = vec![base.join("src/main.rs")]; assert_eq!(actual, expected); } + + #[tokio::test] + async fn test_discover_filters_files_matching_server_ignore_patterns() { + let dir = tempdir().unwrap(); + let base = dir.path(); + + // Create every candidate on disk so `is_symlink` returns false and + // `has_allowed_extension` sees a real extension. + for rel in [ + "main.rs", + "lib.rs", + "node_modules/pkg/index.rs", + "package-lock.json", + ] { + let path = base.join(rel); + fs::create_dir_all(path.parent().unwrap()).unwrap(); + File::create(&path).unwrap(); + } + + let fixture = FdDefault::new(Arc::new(MockInfra::new( + vec![ + walked("main.rs"), + walked("lib.rs"), + walked("node_modules/pkg/index.rs"), + walked("package-lock.json"), + ], + "node_modules\npackage-lock.json\n", + ))); + + let mut actual = fixture.discover(base).await.unwrap(); + actual.sort(); + + let mut expected = vec![base.join("lib.rs"), base.join("main.rs")]; + expected.sort(); + + assert_eq!(actual, expected); + } } From da75b16f5001f05178e06a4972ba723368dd2267 Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Fri, 17 Apr 2026 09:11:38 +0000 Subject: [PATCH 3/3] [autofix.ci] apply automated fixes --- crates/forge_services/src/fd.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/crates/forge_services/src/fd.rs b/crates/forge_services/src/fd.rs index 0ad3d0a1c1..f0d3926498 100644 --- a/crates/forge_services/src/fd.rs +++ b/crates/forge_services/src/fd.rs @@ -180,7 +180,11 @@ impl FileDis Ok(files .into_iter() - .filter(|p| !matcher.matched_path_or_any_parents(p, p.is_dir()).is_ignore()) + .filter(|p| { + !matcher + .matched_path_or_any_parents(p, p.is_dir()) + .is_ignore() + }) .collect()) } }