-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstore_codebase_in_memory.py
More file actions
184 lines (150 loc) · 6.24 KB
/
store_codebase_in_memory.py
File metadata and controls
184 lines (150 loc) · 6.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#!/usr/bin/env python3
"""
Store Bob's codebase in pg_singularity for self-analysis
This allows Bob to understand his own architecture deeply
"""
import asyncio
import os
from pathlib import Path
from typing import List, Dict
import mimetypes
from src.memory.postgres_client import postgres_memory
class CodebaseMemorizer:
"""Store codebase files in Bob's memory for analysis"""
def __init__(self, base_path: Path):
self.base_path = base_path
self.stats = {
'files_processed': 0,
'files_stored': 0,
'errors': 0,
'total_size': 0
}
async def store_file(self, file_path: Path) -> bool:
"""Store a single file in memory"""
try:
# Get relative path for concept
rel_path = file_path.relative_to(self.base_path)
# Read file content
try:
content = file_path.read_text(encoding='utf-8')
except UnicodeDecodeError:
# Skip binary files
return False
# Skip very large files
if len(content) > 100000: # 100KB limit
print(f"⚠️ Skipping large file: {rel_path}")
return False
# Determine file type
mime_type, _ = mimetypes.guess_type(str(file_path))
file_type = 'code'
if file_path.suffix == '.md':
file_type = 'documentation'
elif file_path.suffix == '.json':
file_type = 'configuration'
elif file_path.suffix in ['.yml', '.yaml']:
file_type = 'configuration'
# Store in memory
thought_id = await postgres_memory.think(
concept=f"Codebase file: {rel_path}",
context=content,
importance=0.6, # Medium importance for codebase files
metadata={
'type': 'codebase_file',
'file_type': file_type,
'file_path': str(rel_path),
'file_size': len(content),
'mime_type': mime_type or 'text/plain',
'kb_source': 'codebase_analysis'
}
)
self.stats['files_stored'] += 1
self.stats['total_size'] += len(content)
print(f"✓ Stored: {rel_path} ({len(content)} bytes)")
return True
except Exception as e:
print(f"✗ Error storing {file_path}: {e}")
self.stats['errors'] += 1
return False
async def scan_directory(self, directory: Path, patterns: List[str]) -> List[Path]:
"""Scan directory for files matching patterns"""
files = []
# Define files/dirs to skip
skip_dirs = {
'__pycache__', '.git', 'node_modules', 'venv', 'env',
'.pytest_cache', '.mypy_cache', 'htmlcov', 'dist', 'build'
}
skip_files = {
'.pyc', '.pyo', '.pyd', '.so', '.dylib', '.dll',
'.log', '.pid', '.lock', '.tmp'
}
for pattern in patterns:
for file_path in directory.rglob(pattern):
# Skip if in ignored directory
if any(skip_dir in file_path.parts for skip_dir in skip_dirs):
continue
# Skip if ignored file type
if any(file_path.suffix == skip for skip in skip_files):
continue
if file_path.is_file():
files.append(file_path)
return sorted(files)
async def memorize_codebase(self):
"""Store entire codebase in memory"""
print(f"\n🧠 Storing codebase from: {self.base_path}")
print("=" * 60)
# Define file patterns to include
patterns = [
"*.py", # Python files
"*.md", # Documentation
"*.json", # Configuration
"*.yml", # YAML configs
"*.yaml", # YAML configs
"*.sh", # Shell scripts
"*.sql", # SQL files
"*.txt", # Requirements, etc
"*.toml", # Config files
]
# Scan for files
print("\n📂 Scanning for files...")
files = await self.scan_directory(self.base_path, patterns)
print(f"Found {len(files)} files to process")
# Process files
print("\n💾 Storing files in memory...")
for file_path in files:
self.stats['files_processed'] += 1
await self.store_file(file_path)
# Print summary
print("\n" + "=" * 60)
print("📊 Summary:")
print(f" Files processed: {self.stats['files_processed']}")
print(f" Files stored: {self.stats['files_stored']}")
print(f" Errors: {self.stats['errors']}")
print(f" Total size: {self.stats['total_size']:,} bytes")
# Store summary thought
await postgres_memory.think(
concept="Codebase memorization complete",
context=f"Stored {self.stats['files_stored']} files from my codebase in memory. "
f"Total size: {self.stats['total_size']:,} bytes. "
f"This allows me to deeply understand my own architecture and suggest improvements.",
importance=0.8,
metadata={
'type': 'codebase_analysis',
'stats': self.stats,
'base_path': str(self.base_path)
}
)
print("\n✨ Codebase stored in memory!")
async def main():
"""Store Bob's codebase in memory"""
# Initialize memory
await postgres_memory.initialize()
# Store current Bob codebase
bob_path = Path(__file__).parent
memorizer = CodebaseMemorizer(bob_path)
await memorizer.memorize_codebase()
# Close memory connection
await postgres_memory.close()
print("\n💭 Honest review incoming...")
print(" Now I can analyze my own codebase from memory!")
if __name__ == "__main__":
asyncio.run(main())