Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 37 additions & 2 deletions src/cli/commands/index_parallel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex};

use crate::IndexPersistence;
use crate::config::Settings;
use crate::indexing::facade::{build_embedding_backend, resolve_remote_model_name};
use crate::indexing::pipeline::{IncrementalStats, Phase2Stats, Pipeline, PipelineConfig};
Expand Down Expand Up @@ -121,12 +122,37 @@ pub fn run(args: IndexParallelArgs, settings: &Settings) {
}
}

let persistence = IndexPersistence::new(settings.index_path.clone());
if let Err(e) = persistence.save_document_index_metadata(index.as_ref(), paths_to_index.clone())
{
tracing::error!(target: "pipeline", "Failed to persist index metadata: {e}");
std::process::exit(1);
}

tracing::info!(target: "pipeline", "Index saved to: {}", index_path.display());
if semantic.is_some() {
tracing::info!(target: "pipeline", "Embeddings saved to: {}", semantic_path.display());
}
}

fn emit_semantic_status(settings: &Settings) {
let is_remote =
std::env::var("CODANNA_EMBED_URL").is_ok() || settings.semantic_search.remote_url.is_some();

if is_remote {
eprintln!(
"Semantic search enabled (backend: remote, model: {}, threshold: {})",
resolve_remote_model_name(&settings.semantic_search),
settings.semantic_search.threshold
);
} else {
eprintln!(
"Semantic search enabled (model: {}, threshold: {})",
settings.semantic_search.model, settings.semantic_search.threshold
);
}
}

/// Create semantic search instance and embedding backend if enabled in settings.
///
/// Returns `(semantic, backend)` where:
Expand Down Expand Up @@ -157,6 +183,11 @@ fn create_semantic_search(
};

let model = &settings.semantic_search.model;
let effective_model = if is_remote {
resolve_remote_model_name(&settings.semantic_search)
} else {
model.clone()
};

// Load existing embeddings or create fresh instance.
// After loading, verify dimensions match the backend so we don't silently
Expand Down Expand Up @@ -207,14 +238,18 @@ fn create_semantic_search(
let new_result = if is_remote {
Ok(SimpleSemanticSearch::new_empty(
backend.dimensions(),
&resolve_remote_model_name(&settings.semantic_search),
&effective_model,
))
} else {
SimpleSemanticSearch::from_model_name(model)
};
match new_result {
Ok(s) => {
tracing::debug!(target: "pipeline", "Created new semantic search with model: {model}");
tracing::debug!(
target: "pipeline",
"Created new semantic search with model: {effective_model}"
);
emit_semantic_status(settings);
Some(Arc::new(Mutex::new(s)))
}
Err(e) => {
Expand Down
26 changes: 26 additions & 0 deletions src/indexing/facade.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ pub struct IndexFacade {
/// Set to true when load_semantic_search fails with DimensionMismatch so
/// hot-reload and other callers do not retry on every reload cycle.
semantic_incompatible: bool,

/// Persisted semantic metadata for status/reporting when semantic search
/// is not loaded into memory (for example, lite facade loads).
semantic_metadata_snapshot: Option<crate::semantic::SemanticMetadata>,
}

impl IndexFacade {
Expand Down Expand Up @@ -126,6 +130,7 @@ impl IndexFacade {
indexed_paths: HashSet::new(),
index_base,
semantic_incompatible: false,
semantic_metadata_snapshot: None,
})
}

Expand All @@ -151,6 +156,7 @@ impl IndexFacade {
indexed_paths: HashSet::new(),
index_base,
semantic_incompatible: false,
semantic_metadata_snapshot: None,
}
}

Expand Down Expand Up @@ -201,6 +207,7 @@ impl IndexFacade {
};

self.semantic_search = Some(Arc::new(Mutex::new(semantic)));
self.semantic_metadata_snapshot = self.get_semantic_metadata();
self.embedding_pool = Some(backend);

Ok(())
Expand Down Expand Up @@ -289,6 +296,7 @@ impl IndexFacade {
}

self.semantic_search = Some(Arc::new(Mutex::new(semantic)));
self.semantic_metadata_snapshot = self.get_semantic_metadata();
return Ok(true);
}
Err(SemanticSearchError::DimensionMismatch {
Expand Down Expand Up @@ -324,6 +332,18 @@ impl IndexFacade {
Ok(false)
}

/// Load persisted semantic metadata without initializing the semantic backend.
pub fn load_semantic_metadata_snapshot(&mut self, path: &Path) -> FacadeResult<bool> {
if !path.join("metadata.json").exists() {
self.semantic_metadata_snapshot = None;
return Ok(false);
}

let metadata = crate::semantic::SemanticMetadata::load(path)?;
self.semantic_metadata_snapshot = Some(metadata);
Ok(true)
}

/// Ensure embedding backend is initialized for generating new embeddings.
///
/// Called lazily by methods that need to compute embeddings (reindexing, watcher).
Expand All @@ -343,6 +363,11 @@ impl IndexFacade {
self.semantic_search
.as_ref()
.map(|s| s.lock().map(|sem| sem.embedding_count()).unwrap_or(0))
.or_else(|| {
self.semantic_metadata_snapshot
.as_ref()
.map(|m| m.embedding_count)
})
.unwrap_or(0)
}

Expand All @@ -351,6 +376,7 @@ impl IndexFacade {
self.semantic_search
.as_ref()
.and_then(|s| s.lock().ok().and_then(|sem| sem.metadata().cloned()))
.or_else(|| self.semantic_metadata_snapshot.clone())
}

// =========================================================================
Expand Down
20 changes: 15 additions & 5 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

use clap::Parser;
use codanna::cli::{Cli, Commands, RetrieveQuery};
use codanna::indexing::facade::IndexFacade;
use codanna::indexing::facade::{IndexFacade, resolve_remote_model_name};
use codanna::project_resolver::{
providers::{
csharp::CSharpProvider, go::GoProvider, java::JavaProvider, javascript::JavaScriptProvider,
Expand Down Expand Up @@ -414,10 +414,20 @@ async fn main() {
if let Err(e) = idx.enable_semantic_search() {
eprintln!("Warning: Failed to enable semantic search: {e}");
} else {
eprintln!(
"Semantic search enabled (model: {}, threshold: {})",
config.semantic_search.model, config.semantic_search.threshold
);
let is_remote = config.semantic_search.remote_url.is_some()
|| std::env::var("CODANNA_EMBED_URL").is_ok();
if is_remote {
eprintln!(
"Semantic search enabled (backend: remote, model: {}, threshold: {})",
resolve_remote_model_name(&config.semantic_search),
config.semantic_search.threshold
);
} else {
eprintln!(
"Semantic search enabled (model: {}, threshold: {})",
config.semantic_search.model, config.semantic_search.threshold
);
}
}
}
}
Expand Down
17 changes: 7 additions & 10 deletions src/parsing/java/behavior.rs
Original file line number Diff line number Diff line change
Expand Up @@ -963,16 +963,13 @@ mod tests {
fs::write(&pom_path, pom_content).unwrap();

// Create settings and build provider cache
let settings_content = format!(
r#"
[languages.java]
enabled = true
config_files = ["{}"]
"#,
pom_path.display()
);

let settings: crate::config::Settings = toml::from_str(&settings_content).unwrap();
let mut settings = crate::config::Settings::default();
let java_settings = settings
.languages
.get_mut("java")
.expect("java language config should exist");
java_settings.enabled = true;
java_settings.config_files = vec![pom_path.clone()];

// Save original directory to restore later
let original_dir = std::env::current_dir().unwrap();
Expand Down
13 changes: 7 additions & 6 deletions src/parsing/lua/behavior.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ impl LanguageBehavior for LuaBehavior {
mod tests {
use super::*;
use crate::Visibility;
use std::path::Path;
use tempfile::TempDir;

#[test]
fn test_module_separator() {
Expand All @@ -261,18 +261,19 @@ mod tests {
#[test]
fn test_module_path_from_file() {
let behavior = LuaBehavior::new();
let project_root = Path::new("/home/user/project");
let temp_dir = TempDir::new().unwrap();
let project_root = temp_dir.path();
let extensions = &["lua"];

let file_path = Path::new("/home/user/project/lib/utils.lua");
let file_path = project_root.join("lib/utils.lua");
assert_eq!(
behavior.module_path_from_file(file_path, project_root, extensions),
behavior.module_path_from_file(&file_path, project_root, extensions),
Some("lib.utils".to_string())
);

let file_path = Path::new("/home/user/project/main.lua");
let file_path = project_root.join("main.lua");
assert_eq!(
behavior.module_path_from_file(file_path, project_root, extensions),
behavior.module_path_from_file(&file_path, project_root, extensions),
Some("main".to_string())
);
}
Expand Down
11 changes: 7 additions & 4 deletions src/parsing/paths.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,13 @@ pub fn strip_extension<'a>(path_str: &'a str, extensions: &[&str]) -> &'a str {
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;

#[test]
fn test_normalize_relative_path() {
let file_path = Path::new("src/foo/bar.rs");
let workspace_root = Path::new("/home/user/workspace");
let temp_dir = TempDir::new().unwrap();
let workspace_root = temp_dir.path();

let result = normalize_for_module_path(file_path, workspace_root);

Expand All @@ -88,10 +90,11 @@ mod tests {

#[test]
fn test_normalize_absolute_path() {
let file_path = Path::new("/home/user/workspace/src/foo/bar.rs");
let workspace_root = Path::new("/home/user/workspace");
let temp_dir = TempDir::new().unwrap();
let workspace_root = temp_dir.path();
let file_path = workspace_root.join("src/foo/bar.rs");

let result = normalize_for_module_path(file_path, workspace_root);
let result = normalize_for_module_path(&file_path, workspace_root);

assert_eq!(result, file_path);
}
Expand Down
Loading
Loading