-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathpyproject.toml
More file actions
227 lines (202 loc) · 6.86 KB
/
Copy pathpyproject.toml
File metadata and controls
227 lines (202 loc) · 6.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
[build-system]
requires = ["setuptools>=68.0", "wheel", "Cython>=3.0.0"]
build-backend = "setuptools.build_meta"
[project]
name = "myspellchecker"
version = "1.8.0"
description = "Myanmar (Burmese) text intelligence library — spell checking, grammar validation, dictionary building, and AI model training"
readme = "README.md"
# NOTE: Codebase uses Python 3.10+ syntax (PEP 604 unions like `str | None`).
requires-python = ">=3.10"
license = {file = "LICENSE"}
authors = [
{name = "Thet Twe Aung", email = "thettweaung@gmail.com"}
]
keywords = ["myanmar", "burmese", "spell-checker", "nlp", "language-processing"]
classifiers = [
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Topic :: Text Processing :: Linguistic",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
]
dependencies = [
# Core dependencies (required for spell checking)
"numpy>=1.22.0", # CVE-2021-41496 fixed in 1.22.0
"python-crfsuite>=0.9.7",
"rich>=13.0.0", # Terminal formatting, colors, and progress bars
"pyyaml>=6.0.0", # YAML parsing for grammar rules (required)
# Configuration validation (Pydantic v2)
"pydantic>=2.0.0,<3.0.0", # Type-safe configuration with rich validation
# Zawgyi detection and conversion (required for Myanmar language support)
"myanmartools>=1.2.1", # Google's statistical Zawgyi detector (~95% accuracy)
"python-myanmar>=1.0.0,<2.0.0", # Zawgyi-to-Unicode conversion
]
[project.scripts]
myspellchecker = "myspellchecker.cli:main"
[project.optional-dependencies]
# Dictionary building pipeline (myspellchecker build)
build = [
"pyarrow>=14.0.1", # CVE-2023-47248 fixed in 14.0.1
"duckdb>=1.0.0", # Fast OLAP database for build pipeline aggregations
"xxhash>=3.0.0", # Fast non-cryptographic hashing for dedup
"tqdm>=4.65.0", # Progress bars for data pipeline
"cached-path>=1.2.0", # Resource downloading and caching
]
# AI features (Semantic Context Checking)
ai = [
"onnxruntime>=1.17.0",
"tokenizers>=0.13.0",
]
# Transformer-based POS tagging (Optional)
transformers = [
"transformers>=4.38.0",
"torch>=2.6.0", # CVE-2025-32434 RCE fixed in 2.6.0
]
# Complete AI stack (Semantic + Transformer POS)
ai-full = ["myspellchecker[ai,transformers]"]
# Training dependencies (for custom model training)
# Includes ONNX export dependencies since training pipeline exports to ONNX
train = [
"myspellchecker[ai,transformers]",
"datasets>=2.14.0",
"accelerate>=0.21.0",
"onnx>=1.14.0", # ONNX model format (required by onnxruntime.quantization)
"onnxscript>=0.1.0", # Required by torch.onnx.export
]
dev = [
# Testing
"pytest>=8.0.0",
"pytest-cov>=4.0.0",
"pytest-benchmark>=4.0.0",
"pytest-xdist>=3.0.0", # Parallel test execution
"pytest-timeout>=2.2.0", # Abort hanging tests
"hypothesis>=6.0.0", # Property-based testing
# Type checking and linting
"mypy>=1.0.0",
"types-PyYAML>=6.0.12",
"ruff>=0.3.0",
# AI Training (Only needed for model development, not usage)
"myspellchecker[train]",
]
# Note: pyyaml is now a core dependency (required for grammar rules)
[project.urls]
Homepage = "https://github.com/thettwe/myspellchecker"
Documentation = "https://docs.myspellchecker.com/"
Repository = "https://github.com/thettwe/myspellchecker"
"Bug Tracker" = "https://github.com/thettwe/myspellchecker/issues"
[tool.setuptools.packages.find]
where = ["src"]
[tool.setuptools.package-data]
myspellchecker = [
"py.typed",
"**/*.pyx",
"**/*.pxd",
"data/*.onnx",
"data/*.json",
"data/models/*.mmap",
"rules/*.yaml",
"schemas/*.json",
]
[tool.pytest.ini_options]
# testpaths intentionally omitted — specify paths explicitly on CLI:
# pytest tests/ → full suite
# pytest tests/test_foo.py → single file
# pytest tests/ -m unit → unit tests only
python_files = "test_*.py"
python_classes = "Test*"
python_functions = "test_*"
# Coverage is opt-in: pytest --cov=myspellchecker --cov-report=term-missing --cov-fail-under=75
addopts = [
"--strict-markers",
"-m", "not slow", # Skip slow tests by default; run with: pytest -m slow
]
norecursedirs = ["scripts", ".git", "venv", "*.egg-info", "__pycache__", ".tox", "dist", "build"]
markers = [
"slow: marks tests as slow",
"integration: marks tests as integration tests",
"unit: marks tests as unit tests",
"e2e: marks tests as end-to-end tests",
"benchmark: marks tests as benchmarks/performance checks",
]
filterwarnings = [
"ignore:builtin type SwigPyPacked:DeprecationWarning",
"ignore:builtin type SwigPyObject:DeprecationWarning",
]
[tool.coverage.run]
source = ["src/myspellchecker"]
omit = [
"*/tests/*",
"*/__pycache__/*",
# Hard-to-test files requiring optional dependencies (torch, transformers)
"src/myspellchecker/training/trainer.py",
"src/myspellchecker/algorithms/pos_tagger_transformer.py",
"src/myspellchecker/text/ner_model.py",
]
[tool.coverage.report]
exclude_lines = [
"pragma: no cover",
"raise NotImplementedError",
"if TYPE_CHECKING:",
"if __name__ == .__main__.:",
]
omit = [
"*/tests/*",
"*/__pycache__/*",
"src/myspellchecker/training/trainer.py",
"src/myspellchecker/algorithms/pos_tagger_transformer.py",
"src/myspellchecker/text/ner_model.py",
]
[tool.ruff]
line-length = 100
target-version = "py310"
extend-exclude = ["scripts", "plan"]
[tool.ruff.lint]
# Enable Pyflakes (F), pycodestyle (E, W), isort (I), and bugbear (B)
select = ["E", "F", "W", "I", "B"]
ignore = []
# Allow unused variables when underscore-prefixed.
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
[tool.ruff.lint.per-file-ignores]
# Tests with pytest.importorskip() at module level need imports after the skip guard
"tests/test_frequency_builder.py" = ["E402"]
"tests/test_data_pipeline.py" = ["E402"]
"tests/test_db_build_and_usage.py" = ["E402"]
"tests/test_incremental_build.py" = ["E402"]
"tests/test_pos_backward_compat.py" = ["E402"]
"tests/test_disk_space.py" = ["E402"]
[tool.ruff.format]
quote-style = "double"
indent-style = "space"
skip-magic-trailing-comma = false
line-ending = "auto"
[tool.mypy]
python_version = "3.10"
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = false
disallow_incomplete_defs = false
check_untyped_defs = true
ignore_missing_imports = true
exclude = [
"^tests/",
"^benchmarks/",
"^venv",
]
[[tool.mypy.overrides]]
module = [
"pycrfsuite.*",
"myanmartools.*",
"duckdb.*",
"pyarrow.*",
"onnxruntime.*",
"transformers.*",
"torch.*",
"cached_path.*",
]
ignore_missing_imports = true