Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .semversioner/next-release/patch-20260219013931620740.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"type": "patch",
"description": "Add filtering, timestamp explosion, insert/count/remove/update operations to vector store API. Add top-level vector_size config to VectorStoreConfig."
}
4 changes: 4 additions & 0 deletions .semversioner/next-release/patch-20260220214632816094.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"type": "patch",
"description": "generate_text_embeddings streaming"
}
2 changes: 2 additions & 0 deletions dictionary.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ dtypes
ints
genid
isinstance
ismatch
ftype

# Azure
abfs
Expand Down
39 changes: 9 additions & 30 deletions docs/examples_notebooks/index_migration_to_v1.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -205,44 +205,23 @@
"metadata": {},
"outputs": [],
"source": [
"from graphrag.cache.factory import CacheFactory\n",
"from graphrag.callbacks.noop_workflow_callbacks import NoopWorkflowCallbacks\n",
"from graphrag.index.workflows.generate_text_embeddings import generate_text_embeddings\n",
"from graphrag.language_model.manager import ModelManager\n",
"from graphrag.tokenizer.get_tokenizer import get_tokenizer\n",
"from graphrag_cache import create_cache\n",
"\n",
"# We only need to re-run the embeddings workflow, to ensure that embeddings for all required search fields are in place\n",
"# We'll construct the context and run this function flow directly to avoid everything else\n",
"# We only need to re-run the embeddings workflow, to ensure that embeddings\n",
"# for all required search fields are in place.\n",
"# We pass in the table_provider created earlier so that generate_text_embeddings\n",
"# reads the migrated tables we just wrote.\n",
"\n",
"model_config = config.get_language_model_config(config.embed_text.model_id)\n",
"callbacks = NoopWorkflowCallbacks()\n",
"cache_config = config.cache.model_dump() # type: ignore\n",
"cache = CacheFactory().create_cache(\n",
" cache_type=cache_config[\"type\"], # type: ignore\n",
" **cache_config,\n",
")\n",
"model = ModelManager().get_or_create_embedding_model(\n",
" name=\"text_embedding\",\n",
" model_type=model_config.type,\n",
" config=model_config,\n",
" callbacks=callbacks,\n",
" cache=cache,\n",
")\n",
"\n",
"tokenizer = get_tokenizer(model_config)\n",
"cache = create_cache(config.cache)\n",
"\n",
"await generate_text_embeddings(\n",
" text_units=final_text_units,\n",
" entities=final_entities,\n",
" community_reports=final_community_reports,\n",
" config=config,\n",
" table_provider=table_provider,\n",
" cache=cache,\n",
" callbacks=callbacks,\n",
" model=model,\n",
" tokenizer=tokenizer,\n",
" batch_size=config.embed_text.batch_size,\n",
" batch_max_tokens=config.embed_text.batch_max_tokens,\n",
" num_threads=model_config.concurrent_requests,\n",
" vector_store_config=config.vector_store,\n",
" embedded_fields=config.embed_text.names,\n",
")"
]
}
Expand Down
Loading
Loading