diff --git a/Makefile b/Makefile index 4f99b2b..537d29c 100644 --- a/Makefile +++ b/Makefile @@ -31,7 +31,6 @@ check: test: uv run --group tests --locked pytest - uvx --with defusedxml genbadge coverage --input-file coverage.xml --silent doc: uv run --group docs --locked sphinx-build -M html docs/source docs/build diff --git a/README.md b/README.md index e03907b..a35b328 100644 --- a/README.md +++ b/README.md @@ -8,28 +8,21 @@ [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](LICENSE) [![Contributions Welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat)](https://github.com/jxmorris12/language_tool_python/pulls) -`language_tool_python` is a Python interface/wrapper to [LanguageTool](https://languagetool.org), an open-source grammar, style, and spell checker. +`language_tool_python` is a Python wrapper for [LanguageTool](https://github.com/languagetool-org/languagetool), +a free, multilingual, non-AI, open-source grammar, style, and spell checker. This python wrapper lets you detect and fix errors from +a Python script or from the command line, against a local Java server, the public +LanguageTool API, or your own remote server. -It can: -- run a local LanguageTool Java server, -- call LanguageTool public API, -- call your own remote LanguageTool server, -- be used from Python code and from a CLI. - -Default local download target: LanguageTool `6.8`. - -## Documentation - -- Docs: -- Changelog: [CHANGELOG.md](CHANGELOG.md) -- Contributing: [CONTRIBUTING.md](CONTRIBUTING.md) +

+ Demo +

## Requirements - Python `>=3.10` (tested up to 3.15) -- Java (to run local LanguageTool server): - - LanguageTool `< 6.6`: Java `>=9` - - LanguageTool `>= 6.6` (default): Java `>=17` +- Java `>=17` to run a local LanguageTool server (default download: `6.8`). See the + [installation docs](https://language-tool-python.readthedocs.io/en/latest/references/installation.html) + for full Java version details. ## Installation @@ -45,378 +38,77 @@ pip install --upgrade language_tool_python import language_tool_python with language_tool_python.LanguageTool("en-US") as tool: - text = "A sentence with a error in the Hitchhiker's Guide tot he Galaxy" - matches = tool.check(text) - print(matches) - print(tool.correct(text)) -``` - -### Public LanguageTool API - -```python -import language_tool_python + matches = tool.check("A sentence with a error in the Hitchhiker's Guide tot he Galaxy") -with language_tool_python.LanguageToolPublicAPI("es") as tool: - matches = tool.check("Se a hecho un esfuerzo.") - print(matches) +print(len(matches)) +# → 2 +print(matches[0].message) +# → 'Use "an" instead of "a" if the following word starts with a vowel sound' +print(matches[0].replacements) +# → ['an'] +print(matches[0].offset) +# → 16 ``` -#### Public LanguageTool API with an API key - -If you have a LanguageTool API key, set `premium_key` before calling `check()`: +### Public LanguageTool API ```python -import os - import language_tool_python with language_tool_python.LanguageToolPublicAPI("en-US") as tool: - tool.premium_key = os.environ["LANGUAGETOOL_API_KEY"] - matches = tool.check("This are bad.") - print(matches) -``` - -The key is sent to the public API as `apiKey` for each request. - -### Your own remote LanguageTool server - -```python -import language_tool_python - -with language_tool_python.LanguageTool( - "en-US", - remote_server="https://your-lt-server.example.com", -) as tool: - print(tool.check("This are bad.")) -``` - -## Constructor Parameters Worth Knowing - -### `language_tool_download_version` (local server only) - -Use this parameter to force which LanguageTool package is used when running a local server. - -```python -import language_tool_python + matches = tool.check("This are wrong.") -with language_tool_python.LanguageTool( - "en-US", - language_tool_download_version="6.7", -) as tool: - print(tool.check("This are bad.")) +print(len(matches)) +# → 2 ``` -Accepted formats: -- `latest`: latest snapshot available from the snapshot server -- `YYYYMMDD`: snapshot by date (example: `20260201`) -- `X.Y`: release version (default: `6.8`. Examples: `6.7`, `4.0`) - -Notes: -- Only relevant when using a local server (no `remote_server`). -- Versions below `4.0` are not supported. - -### `proxies` (remote server only) - -Use this parameter to pass proxy settings to `requests` when calling a remote LanguageTool server. +### Your own remote LanguageTool server ```python import language_tool_python with language_tool_python.LanguageTool( "en-US", - remote_server="https://your-lt-server.example.com", - proxies={ - "http": "http://proxy.example.com:8080", - "https": "http://proxy.example.com:8080", - }, + remote_server="http://my-languagetool-server:8081", ) as tool: - print(tool.check("This are bad.")) -``` - -Notes: -- `proxies` works only with `remote_server`. -- Passing `proxies` without `remote_server` raises `ValueError`. - -## Core Python API - -### Check text - -```python -matches = tool.check("This is noot okay.") + print(tool.correct("I has a problem.")) + # → I have a problem. ``` -Each item is a `Match` object with these fields: -- `rule_id` -- `message` -- `replacements` -- `offset_in_context`, `context`, `offset`, `error_length` -- `category`, `rule_issue_type` -- `sentence` - -### Auto-correct - -```python -corrected = tool.correct("This is noot okay.") -# Uses first suggestion for each match -``` - -### Apply only selected matches - -```python -text = "There is a bok on the table." -matches = tool.check(text) - -# Keep a specific suggestion for first match -matches[0].select_replacement(2) - -patched = language_tool_python.utils.correct(text, matches) -``` - -### Check only parts matching a regex - -```python -matches = tool.check_matching_regions( - 'He said "I has a problem" but she replied "It are fine".', - r'"[^"]*"', -) -``` - -### Classify result quality - -```python -from language_tool_python.utils import classify_matches - -status = classify_matches(tool.check("This is a cats.")) -# TextStatus.CORRECT / TextStatus.FAULTY / TextStatus.GARBAGE -``` - -## Rule and Language Controls - -You can tune checks per instance: - -```python -tool.language = "en" # Can also be set from constructor (`LanguageTool("en")`) -tool.mother_tongue = "fr" # Can also be set from constructor (`LanguageTool("en", mother_tongue="fr")`) - -tool.disabled_rules.update({"MORFOLOGIK_RULE_EN_US"}) -tool.enabled_rules.update({"EN_A_VS_AN"}) -tool.enabled_rules_only = False - -tool.disabled_categories.update({"CASING"}) -tool.enabled_categories.update({"GRAMMAR"}) - -tool.preferred_variants.update({"en-GB"}) -tool.picky = True -``` - -Spellchecking control: - -```python -tool.disable_spellchecking() -tool.enable_spellchecking() - -# Equivalent to: -tool.disabled_categories.update({"TYPOS"}) -tool.disabled_categories.difference_update({"TYPOS"}) -``` - -## Custom Spellings - -You can register domain-specific words: - -```python -with language_tool_python.LanguageTool( - "en-US", - new_spellings=["my_product_name", "my_team_term"], - new_spellings_persist=False, -) as tool: - print(tool.check("my_product_name is released")) -``` - -- `new_spellings_persist=True` (default): keeps words in the local LT spelling file. -- `new_spellings_persist=False`: session-only, words are removed on `close()`. - -## Local Server Configuration (`config=`) - -For local servers only, pass a config dictionary. Example: - -```python -with language_tool_python.LanguageTool( - "en-US", - config={ - "cacheSize": 1000, - "pipelineCaching": True, - "maxTextLength": 50000, - }, -) as tool: - print(tool.check("Text to inspect")) -``` - -Supported keys: -- `maxTextLength`, `maxTextHardLength`, `maxCheckTimeMillis` -- `maxErrorsPerWordRate`, `maxSpellingSuggestions`, `maxCheckThreads` -- `cacheSize`, `cacheTTLSeconds` -- `requestLimit`, `requestLimitInBytes`, `timeoutRequestLimit`, `requestLimitPeriodInSeconds` -- `languageModel`, `fasttextModel`, `fasttextBinary` -- `maxWorkQueueSize`, `rulesFile`, `blockedReferrers` -- `premiumOnly`, `disabledRuleIds` -- `pipelineCaching`, `maxPipelinePoolSize`, `pipelineExpireTimeInSeconds`, `pipelinePrewarming` -- `trustXForwardForHeader`, `suggestionsEnabled` -- spellcheck-only language keys: - - `lang-` - - `lang--dictPath` - -Notes: -- `remote_server` and `config` cannot be used together. -- `proxies` can only be used with `remote_server`. - -## CLI - -Entry point: +### CLI ```bash -language_tool_python [OPTIONS] FILE [FILE ...] -``` - -Use `-` as file to read from stdin. - -Examples: - -```bash -# Check a file -language_tool_python -l en-US README.md - -# Check stdin echo "This are bad." | language_tool_python -l en-US - - -# Auto-apply suggestions language_tool_python -l en-US --apply input.txt - -# Use only selected rules -language_tool_python -l en-US --enabled-only --enable MORFOLOGIK_RULE_EN_US input.txt - -# Use remote LT server -language_tool_python -l en-US --remote-host 127.0.0.1 --remote-port 8081 input.txt -``` - -Main options: -- `-l, --language CODE` -- `-m, --mother-tongue CODE` -- `-d, --disable RULES` -- `-e, --enable RULES` -- `--enabled-only` -- `-p, --picky` -- `-a, --apply` -- `-s, --spell-check-off` -- `--ignore-lines REGEX` -- `--remote-host HOST`, `--remote-port PORT` -- `-c, --encoding` -- `--verbose` -- `--version` - -Exit codes: -- `0`: no issues -- `2`: issues found - -## Environment Variables - -- `LTP_PATH`: directory used to store downloaded LanguageTool packages. - - default: `~/.cache/language_tool_python/` -- `LTP_JAR_DIR_PATH`: use an existing local LanguageTool directory (skip download). -- `LTP_DOWNLOAD_HOST_SNAPSHOT`: override snapshot download host. - - default: `https://internal1.languagetool.org/snapshots/` -- `LTP_DOWNLOAD_HOST_NEW_RELEASES`: override release download host for LanguageTool `>= 6.7`. - - default: `https://github.com/jxmorris12/language_tool_python/releases/download/LanguageTool-{version}/` -- `LTP_DOWNLOAD_HOST_RELEASE`: override release download host for LanguageTool `6.0` to `6.6`. - - default: `https://languagetool.org/download/` -- `LTP_DOWNLOAD_HOST_ARCHIVE`: override archive download host for LanguageTool `4.0` to `5.9`. - - default: `https://languagetool.org/download/archive/` -- `LTP_DOWNLOAD_SHA256_`: version-specific expected SHA-256 for the downloaded LanguageTool archive, for example `LTP_DOWNLOAD_SHA256_6_9_SNAPSHOT`. -- `LTP_DOWNLOAD_SHA256`: fallback expected SHA-256 for the downloaded LanguageTool archive. -- `LTP_BYPASS_VERIFIED_DOWNLOADS`: set to `true` to skip SHA-256 verification. -- `LTP_MAX_DOWNLOAD_BYTES`: maximum downloaded ZIP size in bytes. - - default: `536870912` (512 MiB) -- `LTP_SAFE_ZIP_MAX_ARCHIVE_BYTES`: maximum total compressed member size in bytes. - - default: `536870912` (512 MiB) -- `LTP_SAFE_ZIP_MAX_EXTRACTED_BYTES`: maximum total extracted size in bytes. - - default: `805306368` (768 MiB) -- `LTP_SAFE_ZIP_MAX_MEMBERS`: maximum ZIP member count. - - default: `5000` -- `LTP_SAFE_ZIP_MAX_MEMBER_EXTRACTED_BYTES`: maximum extracted size for a single ZIP member in bytes. - - default: `134217728` (128 MiB) -- `LTP_SAFE_ZIP_MAX_MEMBER_COMPRESSION_RATIO`: maximum compression ratio for a single ZIP member. - - default: `100.0` -- `LTP_SAFE_ZIP_MAX_TOTAL_COMPRESSION_RATIO`: maximum compression ratio for the whole ZIP archive. - - default: `10.0` - -Downloaded zips are verified with SHA-256 when a checksum is available. Checksums are resolved in this order: -1. `LTP_DOWNLOAD_SHA256_`, where non-alphanumeric characters in the version are replaced with `_` and the name is uppercased. -2. `LTP_DOWNLOAD_SHA256`. -3. The bundled `language_tool_python/_ressources/integrity.toml` manifest. - -The bundled manifest covers release/archive downloads. Snapshots are not stable, so provide `LTP_DOWNLOAD_SHA256_` or `LTP_DOWNLOAD_SHA256` if you want to verify a snapshot. If no checksum is available, the download proceeds without SHA-256 verification. - -Example: - -```bash -export LTP_PATH=/path/to/cache -export LTP_JAR_DIR_PATH=/path/to/LanguageTool-6.8 -export LTP_DOWNLOAD_SHA256_6_8= -# export LTP_BYPASS_VERIFIED_DOWNLOADS=true ``` -## Resource Management - -When using a local server, prefer a context manager or explicit `close()`: - -```python -with language_tool_python.LanguageTool("en-US") as tool: - ... - -# or -tool = language_tool_python.LanguageTool("en-US") -... -tool.close() -``` +For the full API reference, configuration options, CLI usage, and more examples, see the +[documentation](https://language-tool-python.readthedocs.io/en/latest/). -## Client/Server Pattern +## Documentation -You can run LT on one process/host and connect from another client: +- **Docs**: +- **Changelog**: [CHANGELOG.md](CHANGELOG.md) +- **Contributing**: [CONTRIBUTING.md](CONTRIBUTING.md) +- **Security**: [SECURITY.md](SECURITY.md) -```python -# Server side -server_tool = language_tool_python.LanguageTool("en-US") +## Versioning -# Client side -client_tool = language_tool_python.LanguageTool( - "en-US", - remote_server=f"http://127.0.0.1:{server_tool.port}", -) -``` +This project follows [Semantic Versioning](https://semver.org/) (`MAJOR.MINOR.PATCH`): -## Error Types +- **PATCH** - backwards-compatible bug fixes. +- **MINOR** - new backwards-compatible features. +- **MAJOR** - breaking changes. Deprecated APIs are removed at the next major version. -Main exceptions in `language_tool_python.exceptions`: -- `LanguageToolError` - - `ServerError` - - `JavaError` - - `PathError` - - `RateLimitError` +Versions are tagged in the git repository and can be found on [PyPI](https://pypi.org/project/language-tool-python/#history). ## Development ```bash -# Install dev dependencies -make install - -# Format code -make format - -# Lint / format / types -make check - -# Tests -make test +make install # Install dev dependencies +make format # Format code +make check # Lint / format / types +make test # Tests ``` ## License diff --git a/docs/assets/video/language_tool_python_demo.gif b/docs/assets/video/language_tool_python_demo.gif new file mode 100644 index 0000000..ecfee12 Binary files /dev/null and b/docs/assets/video/language_tool_python_demo.gif differ diff --git a/docs/assets/video/language_tool_python_demo.tape b/docs/assets/video/language_tool_python_demo.tape new file mode 100644 index 0000000..40b1d72 --- /dev/null +++ b/docs/assets/video/language_tool_python_demo.tape @@ -0,0 +1,73 @@ +Output language_tool_python_demo.gif + +Set Shell "bash" +Set FontSize 18 +Set Width 1200 +Set Height 750 +Set Theme "Dracula" +Set Padding 20 +Set TypingSpeed 50ms +Set PlaybackSpeed 1.0 + +Hide +# Need a venv with language_tool_python in current dir +Type "source venv/bin/activate" +Enter + +Type "clear" +Enter + +Type "python3" +Enter +Wait@30s />>>\s*$/ +Show + +Type "import language_tool_python" +Enter +Wait@30s />>>\s*$/ + +Type `tool = language_tool_python.LanguageTool("en-US")` +Enter +Wait@30s />>>\s*$/ + +Type `text = "I wentt to new york last week."` +Enter +Wait@30s />>>\s*$/ + +Type "tool.check(text)" +Enter +Wait@30s />>>\s*$/ + +Type "tool.correct(text)" +Enter +Wait@30s />>>\s*$/ + +Type "tool.close()" +Enter +Wait@30s />>>\s*$/ + +Type `tool_fr = language_tool_python.LanguageTool("fr")` +Enter +Wait@30s />>>\s*$/ + +Type `tool_fr.correct("Je suis aller au marché hier.")` +Enter +Wait@60s />>>\s*$/ +Sleep 10s + +Hide +Type "exit()" +Enter + +Type "clear" +Enter +Show + +Type "cat sample.txt" +Enter +Wait@60s />\s*$/ + +Type "language_tool_python -l en-US --apply sample.txt" +Enter +Wait@60s />\s*$/ +Sleep 10s diff --git a/docs/assets/video/sample.txt b/docs/assets/video/sample.txt new file mode 100644 index 0000000..e21f77b --- /dev/null +++ b/docs/assets/video/sample.txt @@ -0,0 +1,3 @@ +This are a sentence with a error in it. +She don't likes pizza. +He go to school every days. \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index c048b02..5971f4d 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -7,24 +7,32 @@ # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information project = "language_tool_python" -copyright = "2026, jxmorris12" # Keep in sync with LICENSE +copyright = "2026, jxmorris12" author = "jxmorris12" release = "3.4.0" # Keep in sync with pyproject.toml # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration -extensions = ["sphinx.ext.autodoc", "sphinx_design"] +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.viewcode", + "sphinx.ext.intersphinx", + "sphinx_design", +] templates_path = ["_templates"] -exclude_patterns = [] +exclude_patterns: list[str] = [] +intersphinx_mapping = { + "python": ("https://docs.python.org/3", None), +} # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output html_theme = "furo" -html_static_path = ["_static"] +html_static_path: list[str] = [] # -- Options for autodoc ----------------------------------------------------- @@ -34,3 +42,5 @@ "private-members": False, "show-inheritance": True, } + +autodoc_member_order = "bysource" diff --git a/docs/source/index.rst b/docs/source/index.rst index 87e1afd..d9a4024 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,21 +1,24 @@ -.. language_tool_python documentation master file, created by - sphinx-quickstart on Wed Nov 12 18:23:43 2025. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - language_tool_python documentation -================================== +==================================== -`language_tool_python` is a Python wrapper for `LanguageTool `_. LanguageTool is an open-source grammar tool, also known as the spellchecker for OpenOffice. This library allows you to detect grammar errors and spelling mistakes through a Python script or through a command-line interface. +`language_tool_python` is a Python wrapper for `LanguageTool `_, +a free, multilingual, non-AI, open-source grammar, style, and spell checker. This python wrapper lets you detect and fix errors from +a Python script or from the command line, against a local Java server, the public +LanguageTool API, or your own remote server. -In this documentation ---------------------- +.. image:: ../assets/video/language_tool_python_demo.gif + :align: center + :width: 100% + :alt: Demo -* :doc:`References <./references/language_tool_python>` -* :doc:`Modules <./references/modules>` +- `PyPI `_ +- `GitHub `_ +- `Changelog `_ +In this documentation +--------------------- .. toctree:: - :maxdepth: 2 - :caption: Contents: + :maxdepth: 3 + references/modules diff --git a/docs/source/references/advanced.rst b/docs/source/references/advanced.rst new file mode 100644 index 0000000..1a74c01 --- /dev/null +++ b/docs/source/references/advanced.rst @@ -0,0 +1,144 @@ +Advanced usage +============== + +This page covers advanced usage patterns that go beyond the basic check/correct +workflow. + +.. _pinning-lt-version: + +Pinning the LanguageTool version +--------------------------------- + +By default, ``language_tool_python`` downloads LanguageTool +:data:`~language_tool_python.download_lt.LTP_DOWNLOAD_VERSION`. Use the +``language_tool_download_version`` parameter to force a specific version, useful for +reproducible results or testing against a particular release: + +.. code-block:: python + + import language_tool_python + + with language_tool_python.LanguageTool( + "en-US", + language_tool_download_version="6.7", + ) as tool: + matches = tool.check("A sentence with a error in the Hitchhiker's Guide tot he Galaxy") + + print(matches[0].message) + # → Use “an” instead of ‘a’ if the following word starts with a vowel sound, e.g. ‘an article’, ‘an hour’. + +Accepted formats: + +- ``"X.Y"`` - a release version (e.g. ``"6.7"``, ``"4.0"``). Versions below ``4.0`` + are not supported. +- ``"YYYYMMDD"`` - a snapshot identified by date (e.g. ``"20260201"``). +- ``"latest"`` - the most recent snapshot. + +Authenticating with a premium API key +-------------------------------------- + +If you have a LanguageTool premium key, assign it to +:attr:`~language_tool_python.server.LanguageTool.premium_key` before calling +:meth:`~language_tool_python.server.LanguageTool.check`. The key is forwarded to the +public API as the ``apiKey`` parameter: + +.. code-block:: python + + import os + import language_tool_python + + with language_tool_python.LanguageToolPublicAPI("en-US") as tool: + tool.premium_key = os.environ["LANGUAGETOOL_API_KEY"] + print(tool.correct("A sentence with a error in the Hitchhiker's Guide tot he Galaxy")) + # → A sentence with an error in the Hitchhiker's Guide to the Galaxy + +Client/server pattern +---------------------- + +You can start the LanguageTool Java process once and connect to it from multiple client +instances, avoiding the per-check startup overhead: + +.. code-block:: python + + import language_tool_python + + # Start the server once, this launches the Java process + server_tool = language_tool_python.LanguageTool("en-US") + + # Connect as a lightweight client using the server's port + client_tool = language_tool_python.LanguageTool( + "en-US", + remote_server=f"http://127.0.0.1:{server_tool.port}", + ) + + print(client_tool.correct("A sentence with a error in the Hitchhiker's Guide tot he Galaxy")) + # → A sentence with an error in the Hitchhiker's Guide to the Galaxy + server_tool.close() + +This pattern is also useful to share a single server across multiple threads or +processes. + +Resource management +-------------------- + +When using a local server, the LanguageTool Java process must be terminated explicitly. +The recommended approach is a context manager, which calls +:meth:`~language_tool_python.server.LanguageTool.close` automatically on exit: + +.. code-block:: python + + import language_tool_python + + with language_tool_python.LanguageTool("en-US") as tool: + print(tool.correct("A sentence with a error in the Hitchhiker's Guide tot he Galaxy")) + # → A sentence with an error in the Hitchhiker's Guide to the Galaxy + # Java process is terminated here + +For longer-lived instances, call +:meth:`~language_tool_python.server.LanguageTool.close` explicitly: + +.. code-block:: python + + import language_tool_python + + tool = language_tool_python.LanguageTool("en-US") + try: + print(tool.correct("A sentence with a error in the Hitchhiker's Guide tot he Galaxy")) + # → A sentence with an error in the Hitchhiker's Guide to the Galaxy + finally: + tool.close() + +.. warning:: + + Forgetting to call ``close()`` (or not using a context manager) leaves the Java + process running until the Python interpreter exits. + +Error handling +-------------- + +All library exceptions inherit from +:class:`~language_tool_python.exceptions.LanguageToolError`, so a single ``except`` +clause is enough to catch any library error: + +.. code-block:: python + + import language_tool_python + from language_tool_python.exceptions import LanguageToolError + + try: + with language_tool_python.LanguageTool("en-US") as tool: + print(tool.correct("A sentence with a error in the Hitchhiker's Guide tot he Galaxy")) + # → A sentence with an error in the Hitchhiker's Guide to the Galaxy + except LanguageToolError as exc: + print(f"LanguageTool error: {exc}") + +More specific exception classes (all in :mod:`language_tool_python.exceptions`): + +- :class:`~language_tool_python.exceptions.ServerError` - the Java server failed to + start or crashed. +- :class:`~language_tool_python.exceptions.JavaError` - Java is not installed or the + version is incompatible. +- :class:`~language_tool_python.exceptions.PathError` - a path-like config value does + not point to an existing file. +- :class:`~language_tool_python.exceptions.RateLimitError` - the public API rate limit + was exceeded. diff --git a/docs/source/references/cli.rst b/docs/source/references/cli.rst new file mode 100644 index 0000000..f88ac73 --- /dev/null +++ b/docs/source/references/cli.rst @@ -0,0 +1,101 @@ +Command-line interface +====================== + +``language_tool_python`` can be invoked directly from the command line, without writing +any Python code. + +Usage +----- + +.. code-block:: text + + language_tool_python [OPTIONS] FILE [FILE ...] + +Use ``-`` as the file argument to read from stdin. + +Options +------- + +.. list-table:: + :header-rows: 1 + :widths: 35 65 + + * - Option + - Description + * - ``FILE [FILE ...]`` + - One or more plain-text files to check. Use ``-`` to read from stdin. + * - ``-c, --encoding ENCODING`` + - Input file encoding (default: system locale). + * - ``-l, --language CODE`` + - BCP 47 language code to use (e.g. ``en-US``, ``de-DE``). Pass ``auto`` for + automatic language detection. + * - ``-m, --mother-tongue CODE`` + - First-language code. Enables *false-friend* detection between the target language + and the mother tongue. + * - ``-d, --disable RULES`` + - Comma-separated list of rule IDs to disable. + * - ``-e, --enable RULES`` + - Comma-separated list of rule IDs to enable. + * - ``--enabled-only`` + - Run only the rules listed with ``--enable``, ignoring all others. + * - ``-p, --picky`` + - Enable stricter (picky) checking mode. + * - ``-a, --apply`` + - Automatically apply the first suggestion for each match and print the corrected + text. + * - ``-s, --spell-check-off`` + - Disable all spell-checking rules. + * - ``--ignore-lines REGEX`` + - Skip lines that match the given regular expression. + * - ``--remote-host HOST`` + - Hostname of a remote LanguageTool server to connect to instead of starting a + local one. + * - ``--remote-port PORT`` + - Port of the remote LanguageTool server. + * - ``--verbose`` + - Enable debug logging. + * - ``--version`` + - Print the ``language_tool_python`` version and exit. + +Exit codes +---------- + +.. list-table:: + :header-rows: 1 + :widths: 15 85 + + * - Code + - Meaning + * - ``0`` + - No issues found. + * - ``2`` + - At least one issue was found. + +Examples +-------- + +.. code-block:: bash + + # Check a file + language_tool_python -l en-US README.md + + # Check stdin + echo "This are bad." | language_tool_python -l en-US - + + # Auto-apply suggestions and print the result + language_tool_python -l en-US --apply input.txt + + # Disable spell checking + language_tool_python -l en-US --spell-check-off input.txt + + # Disable specific rules + language_tool_python -l en-US -d RULE_ID1,RULE_ID2 input.txt + + # Run only one specific rule + language_tool_python -l en-US --enabled-only -e MORFOLOGIK_RULE_EN_US input.txt + + # Use a remote LanguageTool server + language_tool_python -l en-US --remote-host 127.0.0.1 --remote-port 8081 input.txt + + # Picky mode with mother-tongue detection + language_tool_python -l de-DE -m en --picky input.txt diff --git a/docs/source/references/config.rst b/docs/source/references/config.rst new file mode 100644 index 0000000..eddcd7d --- /dev/null +++ b/docs/source/references/config.rst @@ -0,0 +1,222 @@ +Local server configuration +========================== + +When using a local LanguageTool server you can tune its behaviour by passing a ``config`` +dictionary to :class:`~language_tool_python.server.LanguageTool`. Internally, the +dictionary is validated and written to a temporary ``*.cfg`` file that is passed to the +Java process via ``--config``. + +.. note:: + + ``config`` is only available for local servers. Combining ``config`` with + ``remote_server`` raises ``ValueError``. + +Quick example +------------- + +.. code-block:: python + + import language_tool_python + + with language_tool_python.LanguageTool( + "en-US", + config={ + "cacheSize": 1000, + "cacheTTLSeconds": 300, + "maxTextLength": 100_000, + "pipelineCaching": True, + }, + ) as tool: + print(tool.correct("A sentence with a error in the Hitchhiker's Guide tot he Galaxy")) + # → A sentence with an error in the Hitchhiker's Guide to the Galaxy + +Accepted keys +------------- + +Limits +~~~~~~ + +.. list-table:: + :header-rows: 1 + :widths: 35 15 50 + + * - Key + - Type + - Description + * - ``maxTextLength`` + - ``int`` + - Maximum number of characters accepted per request. Requests exceeding this limit + are rejected. + * - ``maxTextHardLength`` + - ``int`` + - Hard character limit that applies even to privileged users with a special token. + Requests exceeding this limit are rejected. + * - ``maxCheckTimeMillis`` + - ``int`` + - Maximum time in milliseconds allowed for a single check request. + * - ``maxErrorsPerWordRate`` + - ``int | float`` + - If the ratio of errors to words exceeds this value, the check is aborted. + * - ``maxSpellingSuggestions`` + - ``int`` + - Maximum number of spelling suggestions returned per error. Applies to + Hunspell-based languages only. + * - ``maxCheckThreads`` + - ``int`` + - Maximum number of threads used concurrently for checking. + * - ``maxWorkQueueSize`` + - ``int`` + - Maximum number of requests that can queue up before new requests are rejected. + +Rate limiting +~~~~~~~~~~~~~ + +.. list-table:: + :header-rows: 1 + :widths: 35 15 50 + + * - Key + - Type + - Description + * - ``requestLimit`` + - ``int`` + - Maximum number of requests allowed within ``requestLimitPeriodInSeconds``. + * - ``requestLimitInBytes`` + - ``int`` + - Maximum total request body size in bytes within the rate-limit window. + * - ``timeoutRequestLimit`` + - ``int`` + - Maximum number of timed-out requests before the server starts rejecting new ones. + * - ``requestLimitPeriodInSeconds`` + - ``int`` + - Duration of the rate-limiting window in seconds. + +Pipeline caching +~~~~~~~~~~~~~~~~ + +.. list-table:: + :header-rows: 1 + :widths: 35 15 50 + + * - Key + - Type + - Description + * - ``cacheSize`` + - ``int`` + - Number of sentences to keep in the internal cache (default: 0, disabled). + * - ``cacheTTLSeconds`` + - ``int`` + - How many seconds sentences are kept in the cache (default: 300 if ``cacheSize`` + is set). + * - ``pipelineCaching`` + - ``bool | int`` + - Enable internal pipeline caching for faster repeated checks. + * - ``maxPipelinePoolSize`` + - ``int`` + - Maximum number of cached pipelines. + * - ``pipelineExpireTimeInSeconds`` + - ``int`` + - Expiry time for cached pipelines in seconds. + * - ``pipelinePrewarming`` + - ``bool | int`` + - Fill the pipeline cache on startup to reduce first-request latency. Can + significantly slow down server start. + +External models +~~~~~~~~~~~~~~~ + +All path values must point to existing files or directories, the path is validated when +:class:`~language_tool_python.config_file.LanguageToolConfig` is instantiated. + +.. list-table:: + :header-rows: 1 + :widths: 35 15 50 + + * - Key + - Type + - Description + * - ``languageModel`` + - ``str | Path`` + - Path to a directory containing ``1grams``, ``2grams``, and ``3grams`` + sub-directories (one Lucene index each) per language. Activates the confusion + rule for supported languages. + * - ``fasttextModel`` + - ``str | Path`` + - Path to a fastText language-identification model file. + * - ``fasttextBinary`` + - ``str | Path`` + - Path to the fastText binary executable. + * - ``rulesFile`` + - ``str | Path`` + - Path to an XML file containing custom rules. + +Access control +~~~~~~~~~~~~~~ + +.. list-table:: + :header-rows: 1 + :widths: 35 15 50 + + * - Key + - Type + - Description + * - ``blockedReferrers`` + - ``str | list | tuple | set`` + - Comma-separated referrer URLs (or a collection) that are blocked from using the + server. + * - ``premiumOnly`` + - ``bool | int`` + - Activate only the premium rules, ignoring all free rules. + * - ``trustXForwardForHeader`` + - ``bool | int`` + - Trust the ``X-Forwarded-For`` header for rate limiting (use only behind a + trusted reverse proxy). + +Miscellaneous +~~~~~~~~~~~~~ + +.. list-table:: + :header-rows: 1 + :widths: 35 15 50 + + * - Key + - Type + - Description + * - ``disabledRuleIds`` + - ``str | list | tuple | set`` + - Comma-separated rule IDs (or a collection) that are disabled globally for all + requests. + * - ``suggestionsEnabled`` + - ``bool | int`` + - Whether to compute replacement suggestions. Disabling this speeds up checking + when suggestions are not needed. + +Language-specific keys +---------------------- + +In addition to the keys above, you can configure per-language spell-checking by using +keys of the form ``lang-`` or ``lang--dictPath``: + +.. list-table:: + :header-rows: 1 + :widths: 35 15 50 + + * - Key pattern + - Type + - Description + * - ``lang-`` + - ``str`` + - Display name of the language (e.g. ``lang-tr=Turkish``). Registers a + spellcheck-only language that LT does not natively support. + * - ``lang--dictPath`` + - ``str | Path`` + - Absolute path to the Hunspell ``.dic`` file for the given language code + (e.g. ``lang-tr-dictPath``). The same directory must also contain a + ``common_words.txt`` file listing the 10,000 most common words (used for + language detection). The path must point to an existing file. + +API reference +------------- + +See :class:`~language_tool_python.config_file.LanguageToolConfig` for the full class +documentation. diff --git a/docs/source/references/env_vars.rst b/docs/source/references/env_vars.rst new file mode 100644 index 0000000..08c66fd --- /dev/null +++ b/docs/source/references/env_vars.rst @@ -0,0 +1,110 @@ +Environment variables +===================== + +The following environment variables control runtime behaviour without requiring code +changes. + +Download and cache +------------------ + +.. list-table:: + :header-rows: 1 + :widths: 45 55 + + * - Variable + - Description + * - ``LTP_PATH`` + - Directory used to store downloaded LanguageTool packages. + Default: ``~/.cache/language_tool_python/``. + * - ``LTP_JAR_DIR_PATH`` + - Path to an existing local LanguageTool installation directory. When set, the + automatic download is skipped entirely. + * - ``LTP_DOWNLOAD_HOST_SNAPSHOT`` + - Override the snapshot download host. + Default: ``https://internal1.languagetool.org/snapshots/``. + * - ``LTP_DOWNLOAD_HOST_NEW_RELEASES`` + - Override the release download host for LanguageTool ≥ 6.7. + Default: ``https://github.com/jxmorris12/language_tool_python/releases/download/LanguageTool-{version}/``. + * - ``LTP_DOWNLOAD_HOST_RELEASE`` + - Override the release download host for LanguageTool 6.0–6.6. + Default: ``https://languagetool.org/download/``. + * - ``LTP_DOWNLOAD_HOST_ARCHIVE`` + - Override the archive download host for LanguageTool 4.0–5.9. + Default: ``https://languagetool.org/download/archive/``. + * - ``LTP_MAX_DOWNLOAD_BYTES`` + - Maximum ZIP download size in bytes. + Default: ``536870912`` (512 MiB). + +Integrity verification +---------------------- + +Downloaded ZIPs are verified with SHA-256 when a checksum is available. Checksums are +resolved in this order: + +1. ``LTP_DOWNLOAD_SHA256_`` - version-specific checksum. Non-alphanumeric + characters in the version string are replaced with ``_`` and uppercased + (e.g. ``LTP_DOWNLOAD_SHA256_6_8`` for version ``6.8``). +2. ``LTP_DOWNLOAD_SHA256`` - fallback checksum applied to any version. +3. The bundled ``language_tool_python/_ressources/integrity.toml`` manifest, which + covers release and archive downloads. Snapshots are not included. + +If none of the above resolves to a checksum, the download proceeds without verification. + +.. list-table:: + :header-rows: 1 + :widths: 45 55 + + * - Variable + - Description + * - ``LTP_DOWNLOAD_SHA256_`` + - Expected SHA-256 for a specific LanguageTool version + (e.g. ``LTP_DOWNLOAD_SHA256_6_8=``). + * - ``LTP_DOWNLOAD_SHA256`` + - Fallback SHA-256 for any downloaded archive. + * - ``LTP_BYPASS_VERIFIED_DOWNLOADS`` + - Set to ``true`` to skip SHA-256 verification entirely. + +Safe ZIP extraction limits +-------------------------- + +.. list-table:: + :header-rows: 1 + :widths: 45 55 + + * - Variable + - Description + * - ``LTP_SAFE_ZIP_MAX_ARCHIVE_BYTES`` + - Maximum total compressed size in bytes. + Default: ``536870912`` (512 MiB). + * - ``LTP_SAFE_ZIP_MAX_EXTRACTED_BYTES`` + - Maximum total extracted size in bytes. + Default: ``805306368`` (768 MiB). + * - ``LTP_SAFE_ZIP_MAX_MEMBERS`` + - Maximum number of members in the ZIP archive. + Default: ``5000``. + * - ``LTP_SAFE_ZIP_MAX_MEMBER_EXTRACTED_BYTES`` + - Maximum extracted size for a single member in bytes. + Default: ``134217728`` (128 MiB). + * - ``LTP_SAFE_ZIP_MAX_MEMBER_COMPRESSION_RATIO`` + - Maximum compression ratio for a single member. + Default: ``100.0``. + * - ``LTP_SAFE_ZIP_MAX_TOTAL_COMPRESSION_RATIO`` + - Maximum compression ratio for the whole archive. + Default: ``10.0``. + +Example +------- + +.. code-block:: bash + + # Use a custom cache directory + export LTP_PATH=/path/to/cache + + # Skip download and use an existing installation + export LTP_JAR_DIR_PATH=/path/to/LanguageTool-6.8 + + # Verify a specific release + export LTP_DOWNLOAD_SHA256_6_8= + + # Or bypass verification entirely (not recommended) + export LTP_BYPASS_VERIFIED_DOWNLOADS=true diff --git a/docs/source/references/examples.rst b/docs/source/references/examples.rst new file mode 100644 index 0000000..0e4ae95 --- /dev/null +++ b/docs/source/references/examples.rst @@ -0,0 +1,446 @@ +Examples +======== + +This page provides practical examples for using ``language_tool_python``. +For advanced patterns (resource management, client/server, error handling, pinning the +LT version) see :doc:`advanced`. For CLI usage see :doc:`cli`. For environment variables +see :doc:`env_vars`. For local server configuration see :doc:`config`. + +Basic usage +----------- + +Checking text for errors +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Use :class:`~language_tool_python.server.LanguageTool` to check a piece of text. The +:meth:`~language_tool_python.server.LanguageTool.check` method returns a list of +:class:`~language_tool_python.match.Match` objects, each describing a detected issue. + +.. code-block:: python + + import language_tool_python + + with language_tool_python.LanguageTool("en-US") as tool: + matches = tool.check("A sentence with a error in the Hitchhiker's Guide tot he Galaxy") + + print(len(matches)) + # → 2 + print(matches[0].message) + # → 'Use "an" instead of "a" if the following word starts with a vowel sound' + print(matches[0].replacements) + # → ['an'] + print(matches[0].offset) + # → 16 + +Correcting text automatically +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:meth:`~language_tool_python.server.LanguageTool.correct` applies the first suggestion +for each detected issue and returns the fixed text. + +.. code-block:: python + + import language_tool_python + + with language_tool_python.LanguageTool("en-US") as tool: + corrected = tool.correct("A sentence with a error in the Hitchhiker's Guide tot he Galaxy") + + print(corrected) + # → A sentence with an error in the Hitchhiker's Guide to the Galaxy + +You can also call :func:`~language_tool_python.utils.correct` directly with a custom +list of :class:`~language_tool_python.match.Match` objects if you need to filter them first. + +.. code-block:: python + + import language_tool_python + from language_tool_python.utils import correct + + with language_tool_python.LanguageTool("en-US") as tool: + text = "This are wrong." + matches = tool.check(text) + + print([match.rule_id for match in matches]) + # → ['THIS_NNS', 'THAT_SOUND_GREAT'] + print(correct(text, matches)) + # → These is wrong. + + matches = [m for m in matches if m.rule_id != "THIS_NNS"] + print(correct(text, matches)) + # → This is wrong. + # Ignore the first match + +Applying a specific suggestion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:meth:`~language_tool_python.server.LanguageTool.correct` always picks the first +suggestion. To choose a different one, call +:meth:`~language_tool_python.match.Match.select_replacement` before passing the match to +:func:`~language_tool_python.utils.correct`: + +.. code-block:: python + + import language_tool_python + from language_tool_python.utils import correct + + text = "There is a bok on the table." + with language_tool_python.LanguageTool("en-US") as tool: + matches = tool.check(text) + + print(matches[0].replacements) + # → ['BOK', 'OK', 'book', 'box', 'boy', 'Bob', 'bow', 'beak', ...] + matches[0].select_replacement(2) # pick the third suggestion instead of the first + patched = correct(text, matches) + print(patched) + # → There is a book on the table. + +Using the public API (no local server) +--------------------------------------- + +:class:`~language_tool_python.server.LanguageToolPublicAPI` connects to the hosted +LanguageTool service instead of starting a local Java server. No Java installation is +required, but requests are subject to rate limits. + +.. code-block:: python + + import language_tool_python + + with language_tool_python.LanguageToolPublicAPI("en-US") as tool: + matches = tool.check("This are wrong.") + + print(len(matches)) + # → 2 + +.. note:: + + The public API is subject to rate limits. If you need to check multiple texts or + large documents, consider using :class:`~language_tool_python.server.LanguageTool` + with a local server instead, or authenticating with a premium key (see + :doc:`advanced`). + +Checking only specific regions of text +--------------------------------------- + +:meth:`~language_tool_python.server.LanguageTool.check_matching_regions` restricts +checking to the parts of the text that match a regular expression. This is useful when +the text contains markup, code blocks, or other sections that should be skipped. + +.. code-block:: python + + import language_tool_python + from language_tool_python.utils import correct + + text = 'He seid "I has a problem" but she replied "It are fine".' + + with language_tool_python.LanguageTool("en-US") as tool: + matches = tool.check_matching_regions( + text, + r'"[^"]*"', # only check text inside double quotes + ) + + print(correct(text, matches)) + # → He seid "I have a problem" but she replied "It is fine". + # "seid" is not corrected because it is outside the quoted regions. + + +Controlling rules +----------------- + +Disabling specific rules +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Pass rule IDs to :attr:`~language_tool_python.server.LanguageTool.disabled_rules` to +suppress individual rules, or call +:meth:`~language_tool_python.server.LanguageTool.disable_spellchecking` to turn off +all spell-check categories at once. + +.. code-block:: python + + import language_tool_python + + text = "Thiss is false." + + with language_tool_python.LanguageTool("en-US") as tool: + matches = tool.check(text) + print(len(matches)) + # → 1 + + with language_tool_python.LanguageTool("en-US") as tool: + tool.disabled_rules = {"MORFOLOGIK_RULE_EN_US"} + matches = tool.check(text) + print(len(matches)) + # → 0 + # The rule "MORFOLOGIK_RULE_EN_US" is disabled, so the spelling error is ignored. + + with language_tool_python.LanguageTool("en-US") as tool: + tool.disable_spellchecking() + matches = tool.check(text) + print(len(matches)) + # → 0 + # The spellchecking is disabled, so the spelling error is ignored. + +Enabling only specific rules +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Set :attr:`~language_tool_python.server.LanguageTool.enabled_rules_only` to ``True`` +to run exclusively the rules listed in +:attr:`~language_tool_python.server.LanguageTool.enabled_rules`. + +.. code-block:: python + + import language_tool_python + + text = "This are wrong." + + with language_tool_python.LanguageTool("en-US") as tool: + matches = tool.check(text) + print([match.rule_id for match in matches]) + # → ['THIS_NNS', 'THAT_SOUND_GREAT'] + print(tool.correct(text)) + # → These is wrong. + + with language_tool_python.LanguageTool("en-US") as tool: + tool.enabled_rules = {"THIS_NNS"} + tool.enabled_rules_only = True + print(tool.correct(text)) + # → These are wrong. + # Only the THIS_NNS rule is applied + +Controlling categories +~~~~~~~~~~~~~~~~~~~~~~~ + +Use :attr:`~language_tool_python.server.LanguageTool.disabled_categories` and +:attr:`~language_tool_python.server.LanguageTool.enabled_categories` to enable or +disable entire rule categories at once. `Here is a list of all categories`_. + +.. _`Here is a list of all categories`: https://github.com/languagetool-org/languagetool/blob/master/languagetool-core/src/main/java/org/languagetool/rules/Categories.java + +.. code-block:: python + + import language_tool_python + + text = "I wentt to new york last week." + + with language_tool_python.LanguageTool("en-US") as tool: + matches = tool.check(text) + print([match.category for match in matches]) + # → ['TYPOS', 'CASING'] + print(tool.correct(text)) + # → I went to New York last week. + + with language_tool_python.LanguageTool("en-US") as tool: + tool.enabled_categories = {"CASING"} + tool.enabled_rules_only = True + matches = tool.check(text) + print([match.category for match in matches]) + # → ['CASING'] + print(tool.correct(text)) + # → I wentt to New York last week. + +Picky mode (stricter checking) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Enable :attr:`~language_tool_python.server.LanguageTool.picky` for additional style +rules that are too strict for casual writing. + +Preferred language variants +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +:attr:`~language_tool_python.server.LanguageTool.preferred_variants` lets you specify +which dialect to prefer when you use the "auto" language. LanguageTool can detect the +language that is used in the text, but you have to specify variants in case of +a language with multiple dialects (e.g. English) is detected. + +.. code-block:: python + + import language_tool_python + + text = "The colour of the sky." + + with language_tool_python.LanguageTool("auto") as tool: + tool.preferred_variants = {"en-US"} + print(tool.correct(text)) + # → The color of the sky. + + with language_tool_python.LanguageTool("auto") as tool: + tool.preferred_variants = {"en-GB"} + print(tool.correct(text)) + # → The colour of the sky. + +Mother tongue detection +------------------------ + +Setting :attr:`~language_tool_python.server.LanguageTool.mother_tongue` helps +LanguageTool detect *false friends* (words that look similar across two languages but +carry different meanings). It works only with ngrams data installed (see :doc:`config`). + +.. code-block:: python + + import language_tool_python + + config = { + "languageModel": "/path/to/ngrams" + } + + with language_tool_python.LanguageTool("en-US", config=config) as tool: + matches = tool.check("My handy is broken.") + print(matches) + # → [] + + with language_tool_python.LanguageTool("en-US", mother_tongue="de", config=config) as tool: + matches = tool.check("My handy is broken.") + print(matches[0].message) + # → “handy” (English) means “praktisch”, “handlich” (German). Did you maybe mean “cell phone”, “mobile phone”? + +Classifying matches +-------------------- + +:func:`~language_tool_python.utils.classify_matches` categorises a list of matches as: + +- :attr:`~language_tool_python.utils.TextStatus.CORRECT` - no matches found. +- :attr:`~language_tool_python.utils.TextStatus.FAULTY` - at least one match has replacement suggestions. +- :attr:`~language_tool_python.utils.TextStatus.GARBAGE` - matches exist but none have suggestions (unrecognisable input). + +.. code-block:: python + + import language_tool_python + from language_tool_python.utils import classify_matches + + with language_tool_python.LanguageTool("en-US") as tool: + matches = tool.check("This sentence is correct.") + print(classify_matches(matches)) + # → TextStatus.CORRECT + matches = tool.check("This are wrong.") + print(classify_matches(matches)) + # → TextStatus.FAULTY + matches = tool.check("fnekknfzn") + print(classify_matches(matches)) + # → TextStatus.GARBAGE + +Custom spellings +---------------- + +Pass a list of words via ``new_spellings`` to add them to the local LanguageTool +dictionary. By default they persist across sessions (``new_spellings_persist=True``). + +.. code-block:: python + + import language_tool_python + + with language_tool_python.LanguageTool( + "en-US", + ) as tool: + matches = tool.check("Welcome to the compani.") + print(len(matches)) + # → 1 + # "compani" is not a known word, so LanguageTool suggests "company" as a correction + + with language_tool_python.LanguageTool( + "en-US", + new_spellings=["compani"], + new_spellings_persist=False, + ) as tool: + matches = tool.check("Welcome to the compani.") + print(len(matches)) + # → 0 + # "compani" is now a known word, so LanguageTool does not suggest any corrections + +Pass ``new_spellings_persist=False`` to keep the words for the current session only, +they are removed when :meth:`~language_tool_python.server.LanguageTool.close` is called. + +Using a remote LanguageTool server +------------------------------------ + +Point :class:`~language_tool_python.server.LanguageTool` at a self-hosted LanguageTool +server with the ``remote_server`` parameter. + +.. code-block:: python + + import language_tool_python + + with language_tool_python.LanguageTool( + "en-US", + remote_server="http://my-languagetool-server:8081", + ) as tool: + print(tool.correct("I has a problem.")) + # → I have a problem. + +You can also route requests through an HTTP proxy: + +.. code-block:: python + + import language_tool_python + + with language_tool_python.LanguageTool( + "en-US", + remote_server="http://my-languagetool-server:8081", + proxies={"http": "http://proxy:3128", "https": "http://proxy:3128"}, + ) as tool: + print(tool.correct("I has a problem.")) + # → I have a problem + +.. note:: + + ``proxies`` can only be used together with ``remote_server``. Passing ``proxies`` + without ``remote_server`` raises ``ValueError``. + +Inspecting a Match object +-------------------------- + +Each :class:`~language_tool_python.match.Match` object exposes the following attributes: + +.. list-table:: + :header-rows: 1 + :widths: 25 75 + + * - Attribute + - Description + * - ``rule_id`` + - Identifier of the triggered rule (e.g. ``"MORFOLOGIK_RULE_EN_US"``). + * - ``message`` + - Human-readable description of the issue. + * - ``replacements`` + - Ordered list of suggested corrections (may be empty). + * - ``offset`` + - Start character position in the original text. + * - ``error_length`` + - Number of characters covered by the error. + * - ``context`` + - Short excerpt of text surrounding the error. + * - ``offset_in_context`` + - Position of the error within ``context``. + * - ``category`` + - Rule category (e.g. ``"TYPOS"``, ``"GRAMMAR"``). + * - ``rule_issue_type`` + - Issue type string (e.g. ``"misspelling"``, ``"grammar"``). + * - ``sentence`` + - Full sentence containing the error. + +.. code-block:: python + + import language_tool_python + + with language_tool_python.LanguageTool("en-US") as tool: + matches = tool.check("This are wrong.") + + m = matches[0] + print(m.rule_id) + # → THIS_NNS + print(m.message) + # → The singular demonstrative pronoun ‘this’ does not agree with the plural verb ‘are’. Did you mean “these”? + print(m.replacements) + # → ['These'] + print(m.offset) + # → 0 + print(m.error_length) + # → 4 + print(m.context) + # → 'This are wrong.' + print(m.offset_in_context) + # → 0 + print(m.category) + # → GRAMMAR + print(m.rule_issue_type) + # → grammar + print(m.sentence) + # → 'This are wrong.' diff --git a/docs/source/references/installation.rst b/docs/source/references/installation.rst new file mode 100644 index 0000000..36c8d39 --- /dev/null +++ b/docs/source/references/installation.rst @@ -0,0 +1,40 @@ +Installation and quick start +============================ + +Installation +------------ + +.. code-block:: bash + + pip install --upgrade language_tool_python + +**Requirements** + +- Python ``>=3.10`` (tested up to 3.15). +- Java ``>=9`` for LanguageTool ``4.0`` to ``6.5``, ``>=17`` for LanguageTool ``>=6.6`` (default). + +.. note:: + + LanguageTool is downloaded automatically on first use. The default downloaded version is :data:`~language_tool_python.download_lt.LTP_DOWNLOAD_VERSION`. To use a different version, see :ref:`pinning-lt-version`. + +Quick start +----------- + +.. code-block:: python + + import language_tool_python + + with language_tool_python.LanguageTool("en-US") as tool: + text = "A sentence with a error in the Hitchhiker's Guide tot he Galaxy" + matches = tool.check(text) + + print(matches[0].message) + # → Use “an” instead of ‘a’ if the following word starts with a vowel sound, e.g. ‘an article’, ‘an hour’. + print(matches[0].replacements) + # → ['an'] + + with language_tool_python.LanguageTool("en-US") as tool: + corrected = tool.correct(text) + + print(corrected) + # → A sentence with an error in the Hitchhiker's Guide to the Galaxy diff --git a/docs/source/references/language_tool_python.rst b/docs/source/references/language_tool_python.rst index 61ac8f8..a480333 100644 --- a/docs/source/references/language_tool_python.rst +++ b/docs/source/references/language_tool_python.rst @@ -1,69 +1,123 @@ -language\_tool\_python package -============================== +language\_tool\_python package (full API reference) +=================================================== -Submodules ----------- +Available modules +----------------- -language\_tool\_python.config\_file module ------------------------------------------- +The following modules make up the public interface of ``language_tool_python``. -.. automodule:: language_tool_python.config_file - :members: - :show-inheritance: - :undoc-members: +Core - :mod:`language_tool_python.server` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -language\_tool\_python.download\_lt module ------------------------------------------- +Contains :class:`~language_tool_python.server.LanguageTool`, the main class for interacting +with a local LanguageTool server, and +:class:`~language_tool_python.server.LanguageToolPublicAPI`, a subclass that targets the +hosted public API instead. -.. automodule:: language_tool_python.download_lt +.. automodule:: language_tool_python.server :members: :show-inheritance: :undoc-members: -language\_tool\_python.exceptions module ----------------------------------------- +Match - :mod:`language_tool_python.match` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. automodule:: language_tool_python.exceptions +Contains the :class:`~language_tool_python.match.Match` class that wraps a single language +issue returned by LanguageTool. + +.. automodule:: language_tool_python.match :members: :show-inheritance: :undoc-members: -language\_tool\_python.language\_tag module -------------------------------------------- +Utilities - :mod:`language_tool_python.utils` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. automodule:: language_tool_python.language_tag +Standalone helper functions: +:func:`~language_tool_python.utils.correct` applies match suggestions to a text, and +:func:`~language_tool_python.utils.classify_matches` categorises a list of matches as +:attr:`~language_tool_python.utils.TextStatus.CORRECT`, +:attr:`~language_tool_python.utils.TextStatus.FAULTY`, or +:attr:`~language_tool_python.utils.TextStatus.GARBAGE`. + +.. automodule:: language_tool_python.utils :members: :show-inheritance: :undoc-members: -language\_tool\_python.match module ------------------------------------ +Exceptions - :mod:`language_tool_python.exceptions` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. automodule:: language_tool_python.match +All custom exceptions raised by the library. Every exception inherits from +:class:`~language_tool_python.exceptions.LanguageToolError`, so a single +``except LanguageToolError`` clause is sufficient to catch all library errors. + +.. automodule:: language_tool_python.exceptions :members: :show-inheritance: :undoc-members: -language\_tool\_python.server module ------------------------------------- +Language tags - :mod:`language_tool_python.language_tag` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. automodule:: language_tool_python.server +:class:`~language_tool_python.language_tag.LanguageTag` normalises BCP 47 language +tags (e.g. ``"en-US"``, ``"de-DE"``) to the format expected by LanguageTool, and +handles POSIX locale fallbacks. + +.. automodule:: language_tool_python.language_tag :members: :show-inheritance: :undoc-members: -language\_tool\_python.utils module ------------------------------------ +Server configuration - :mod:`language_tool_python.config_file` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. automodule:: language_tool_python.utils +:class:`~language_tool_python.config_file.LanguageToolConfig` accepts a +:data:`~language_tool_python.config_file.ConfigValue` dictionary and writes it to a +temporary file that is passed to the LanguageTool Java process via ``--config``. + +.. automodule:: language_tool_python.config_file :members: :show-inheritance: :undoc-members: -Module contents ---------------- +Advanced +-------- + +Download management - :mod:`language_tool_python.download_lt` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Handles downloading and caching the LanguageTool JAR. Exposed for advanced use cases +such as pinning a specific LanguageTool version or working with snapshot builds. +The default download version is given by :data:`~language_tool_python.download_lt.LTP_DOWNLOAD_VERSION`. -.. automodule:: language_tool_python +.. automodule:: language_tool_python.download_lt :members: :show-inheritance: :undoc-members: + +Internal utilities - :mod:`language_tool_python._internals` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Only the types from this private module that surface in the public API are documented +here. :class:`~language_tool_python._internals.api_types.CheckMatch` in particular +appears in the :class:`~language_tool_python.match.Match` constructor signature. +Note that if you have to perform type checking against :class:`~language_tool_python._internals.api_types.CheckMatch` (if you need to construct a :class:`~language_tool_python.match.Match` manually, for example), you can use the function :func:`~language_tool_python.match.is_check_match` as a type guard (this function is in the public API). :class:`~language_tool_python._internals.utils.SupportsBool` is also used in the public API, as it is in the alias type :data:`~language_tool_python.config_file.ConfigValue`. Stuff from this module is not intended for public use, and may change or be removed without notice. + +.. autoclass:: language_tool_python._internals.api_types.CheckMatch + +.. autoclass:: language_tool_python._internals.api_types.Replacement + +.. autoclass:: language_tool_python._internals.api_types.ReplacementOptional + +.. autoclass:: language_tool_python._internals.api_types.Context + +.. autoclass:: language_tool_python._internals.api_types.MatchType + +.. autoclass:: language_tool_python._internals.api_types.Rule + +.. autoclass:: language_tool_python._internals.api_types.RuleOptional + +.. autoclass:: language_tool_python._internals.api_types.Category + +.. autoclass:: language_tool_python._internals.utils.SupportsBool diff --git a/docs/source/references/modules.rst b/docs/source/references/modules.rst index e0db238..93aac0b 100644 --- a/docs/source/references/modules.rst +++ b/docs/source/references/modules.rst @@ -1,7 +1,13 @@ -language_tool_python -==================== +API Reference +============= .. toctree:: :maxdepth: 4 + installation + examples + advanced + config + cli + env_vars language_tool_python diff --git a/make.bat b/make.bat index 74daffa..6f0e194 100644 --- a/make.bat +++ b/make.bat @@ -51,9 +51,6 @@ exit /b %errorlevel% :test uv run --group tests --locked pytest -if errorlevel 1 exit /b %errorlevel% - -uvx --with defusedxml genbadge coverage --input-file coverage.xml --silent exit /b %errorlevel% :doc diff --git a/src/language_tool_python/_internals/api_types.py b/src/language_tool_python/_internals/api_types.py index e314ca2..99e0c4b 100644 --- a/src/language_tool_python/_internals/api_types.py +++ b/src/language_tool_python/_internals/api_types.py @@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, TypedDict if TYPE_CHECKING: - from typing import TypeGuard + import typing __all__ = [ "Category", @@ -34,13 +34,13 @@ class LanguageInfo(TypedDict): name: str -def is_language_info(value: object) -> TypeGuard[LanguageInfo]: +def is_language_info(value: object) -> typing.TypeGuard[LanguageInfo]: """Verify that a value is a LanguageInfo. :param value: The value to check. :type value: object :return: TypeGuard indicating whether the value is a LanguageInfo. - :rtype: TypeGuard[LanguageInfo] + :rtype: typing.TypeGuard[LanguageInfo] """ if not isinstance(value, dict): return False @@ -144,13 +144,13 @@ class CheckResponse(TypedDict): warnings: WarningInfo -def is_check_response(value: object) -> TypeGuard[CheckResponse]: +def is_check_response(value: object) -> typing.TypeGuard[CheckResponse]: """Verify that a value is a CheckResponse. :param value: The value to check. :type value: object :return: TypeGuard indicating whether the value is a CheckResponse. - :rtype: TypeGuard[CheckResponse] + :rtype: typing.TypeGuard[CheckResponse] """ if not isinstance(value, dict): return False diff --git a/src/language_tool_python/_ressources/integrity.toml b/src/language_tool_python/_ressources/integrity.toml index b9544a9..2a797df 100644 --- a/src/language_tool_python/_ressources/integrity.toml +++ b/src/language_tool_python/_ressources/integrity.toml @@ -1,5 +1,5 @@ # Hashes of languagetool zip files that are available at -# https://languagetool.org/download/ and https://languagetool.org/download/archive/ +# https://languagetool.org/download/, https://languagetool.org/download/archive/ and https://github.com/jxmorris12/language_tool_python/releases/ (for LT releases after and including 6.7). # The hashes are calculated with sha256 and are used to check the integrity of the downloaded zip files. # This manifest only contains hashes for archives and releases and not for snapshots because # they are not stable. diff --git a/src/language_tool_python/config_file.py b/src/language_tool_python/config_file.py index 9b47401..8b73dbe 100644 --- a/src/language_tool_python/config_file.py +++ b/src/language_tool_python/config_file.py @@ -20,6 +20,12 @@ ] ConfigValue = PathLike[str] | SupportsBool | str | int | float | Iterable[str] +"""Union of types accepted as values in the :class:`LanguageToolConfig` dictionary. + +:class:`os.PathLike`\\ [:class:`str`], :class:`.SupportsBool`, :class:`str`, +:class:`int`, :class:`float`, :class:`collections.abc.Iterable`\\ [:class:`str`] +""" + _ConfigValueT = TypeVar("_ConfigValueT", bound=ConfigValue) logger = logging.getLogger(__name__) @@ -115,7 +121,7 @@ def _comma_list_encoder(v: str | Iterable[str]) -> str: it's an iterable, its elements are converted to strings and joined with commas. :param v: The value to encode. Can be a string or an iterable of values. - :type v: str | Iterable[str] + :type v: str | collections.abc.Iterable[str] :return: A comma-separated string representation of the input value. :rtype: str """ @@ -219,7 +225,7 @@ def _encode_config(config: Mapping[str, ConfigValue]) -> dict[str, str]: specification. :param config: A dictionary containing configuration keys and values to be encoded. - :type config: Mapping[str, ConfigValue] + :type config: collections.abc.Mapping[str, ConfigValue] :return: A dictionary with the same keys but with all values encoded as strings. :rtype: dict[str, str] :raises ValueError: If a key in the config is not found in the CONFIG_SCHEMA and is @@ -267,7 +273,7 @@ class LanguageToolConfig: """Configuration class for LanguageTool. :param config: Dictionary containing configuration keys and values. - :type config: Mapping[str, ConfigValue] + :type config: collections.abc.Mapping[str, ConfigValue] """ config: dict[str, str] diff --git a/src/language_tool_python/download_lt.py b/src/language_tool_python/download_lt.py index 181369b..65defa0 100644 --- a/src/language_tool_python/download_lt.py +++ b/src/language_tool_python/download_lt.py @@ -74,7 +74,8 @@ ) _FILENAME_RELEASE = "LanguageTool-{version}.zip" -LTP_DOWNLOAD_VERSION = "6.8" +LTP_DOWNLOAD_VERSION: str = "6.8" +"""Default LanguageTool version downloaded and used by the library.""" _LT_SNAPSHOT_LATEST_VERSION = "latest" _LTP_DOWNLOAD_SHA256_ENV_VAR = "LTP_DOWNLOAD_SHA256" _LTP_BYPASS_VERIFIED_DOWNLOADS_ENV_VAR = "LTP_BYPASS_VERIFIED_DOWNLOADS" @@ -362,7 +363,7 @@ def from_path(cls, path: Path) -> LocalLanguageTool: and creates the appropriate instance. :param path: The path to a LanguageTool installation directory. - :type path: Path + :type path: pathlib.Path :return: An instance of the appropriate LocalLanguageTool subclass. :rtype: LocalLanguageTool :raises ValueError: If the version cannot be determined from the path or the @@ -520,7 +521,7 @@ def get_directory_path(self) -> Path: version's name. :return: The path to the LanguageTool installation directory. - :rtype: Path + :rtype: pathlib.Path :raises FileNotFoundError: If the LanguageTool version directory is not found. """ download_folder = get_language_tool_download_path() @@ -549,7 +550,7 @@ def get_jar_path(self) -> Path: languagetool.jar) within the installation directory. :return: The path to the LanguageTool JAR file. - :rtype: Path + :rtype: pathlib.Path :raises FileNotFoundError: If no LanguageTool JAR file is found. """ directory_path = self.get_directory_path() @@ -629,7 +630,7 @@ def version_into(self) -> tuple[int, int] | datetime: for comparison purposes. :return: A tuple of integers for releases or datetime for snapshots. - :rtype: tuple[int, int] | datetime + :rtype: tuple[int, int] | datetime.datetime :raises NotImplementedError: Always, unless implemented by a subclass. """ raise NotImplementedError @@ -938,7 +939,7 @@ def version_into(self) -> datetime: current date. :return: A datetime object representing the snapshot date. - :rtype: datetime + :rtype: datetime.datetime :raises ValueError: If the snapshot version is not a valid ``YYYYMMDD`` date. """ return datetime.strptime(self.version_name, "%Y%m%d") # noqa: DTZ007 # Constructing a datetime without timezone because it is the format of the version string diff --git a/src/language_tool_python/exceptions.py b/src/language_tool_python/exceptions.py index 01c163c..d9f5977 100644 --- a/src/language_tool_python/exceptions.py +++ b/src/language_tool_python/exceptions.py @@ -20,16 +20,16 @@ class LanguageToolError(Exception): class ServerError(LanguageToolError): """Raised when interacting with the LanguageTool server fails. - This exception is a subclass of ``LanguageToolError`` and is used to indicate issues - such as server startup failures. + This exception is a subclass of :class:`LanguageToolError` and is used to indicate + issues such as server startup failures. """ class JavaError(LanguageToolError): """Exception raised for errors related to the Java backend of LanguageTool. - This exception is a subclass of ``LanguageToolError`` and is used to indicate issues - that occur when interacting with Java, such as Java not being found. + This exception is a subclass of :class:`LanguageToolError` and is used to indicate + issues that occur when interacting with Java, such as Java not being found. """ @@ -45,6 +45,7 @@ class PathError(LanguageToolError): class RateLimitError(LanguageToolError): """Exception raised for errors related to rate limiting in the LanguageTool server. - This exception is a subclass of ``LanguageToolError`` and is used to indicate issues - such as exceeding the allowed number of requests to the public API without a key. + This exception is a subclass of :class:`LanguageToolError` and is used to indicate + issues such as exceeding the allowed number of requests to the public API without + a key. """ diff --git a/src/language_tool_python/language_tag.py b/src/language_tool_python/language_tag.py index 0ee8d1f..c035ec2 100644 --- a/src/language_tool_python/language_tag.py +++ b/src/language_tool_python/language_tag.py @@ -17,7 +17,7 @@ class LanguageTag: :param tag: The language tag. :type tag: str :param languages: An iterable of supported language tags. - :type languages: Iterable[str] + :type languages: collections.abc.Iterable[str] :raises ValueError: If the tag is empty or unsupported. """ diff --git a/src/language_tool_python/match.py b/src/language_tool_python/match.py index 390fe2a..162a5a9 100644 --- a/src/language_tool_python/match.py +++ b/src/language_tool_python/match.py @@ -3,11 +3,12 @@ from __future__ import annotations import logging +import typing import unicodedata from collections import OrderedDict from collections import OrderedDict as OrderedDictType from functools import total_ordering -from typing import TYPE_CHECKING, TypeGuard +from typing import TYPE_CHECKING if TYPE_CHECKING: from collections.abc import Iterator @@ -87,13 +88,13 @@ def _four_byte_char_positions(text: str) -> list[int]: return positions -def is_check_match(value: object) -> TypeGuard[CheckMatch]: +def is_check_match(value: object) -> typing.TypeGuard[CheckMatch]: """Verify that a value is a CheckMatch. :param value: The value to check. :type value: object :return: TypeGuard indicating whether the value is a CheckMatch. - :rtype: TypeGuard[CheckMatch] + :rtype: typing.TypeGuard[CheckMatch] """ if not isinstance(value, dict): return False diff --git a/src/language_tool_python/server.py b/src/language_tool_python/server.py index db8419c..93b36d6 100644 --- a/src/language_tool_python/server.py +++ b/src/language_tool_python/server.py @@ -116,7 +116,7 @@ class LanguageTool: :param host: The host address for the LanguageTool server. Defaults to 'localhost'. :type host: str | None :param config: Configuration options for the local LanguageTool server. - :type config: Mapping[str, ConfigValue] | None + :type config: collections.abc.Mapping[str, ConfigValue] | None :param language_tool_download_version: The version of LanguageTool to download if needed. :type language_tool_download_version: str @@ -195,7 +195,8 @@ class LanguageTool: """A set of explicitly enabled categories (used in requests to the server).""" _enabled_rules_only: bool - """A flag to use only explicitly enabled rules (used in requests to the server).""" + """A flag to use only explicitly enabled rules/categories + (used in requests to the server).""" _preferred_variants: set[str] """A set of preferred language variants (used in requests to the server).""" @@ -523,7 +524,7 @@ def enabled_categories(self, value: set[str]) -> None: @property def enabled_rules_only(self) -> bool: - """Get whether only enabled rules should be used. + """Get whether only enabled rules/categories should be used. :return: True if using only enabled rules, False otherwise. :rtype: bool @@ -532,9 +533,10 @@ def enabled_rules_only(self) -> bool: @enabled_rules_only.setter def enabled_rules_only(self, value: bool) -> None: - """Set whether to use only explicitly enabled rules. + """Set whether to use only explicitly enabled rules/categories. - When set to True, only rules in enabled_rules will be applied. + When set to True, only rules in enabled_rules will be applied, + and categories in enabled_categories will be applied. :param value: True to use only enabled rules, False to use default rules. :type value: bool @@ -788,6 +790,7 @@ def correct(self, text: str) -> str: """Corrects the given text by applying language tool suggestions. Applies only the first suggestion for each issue. + :param text: The text to be corrected. :type text: str :return: The corrected text. @@ -801,7 +804,7 @@ def correct(self, text: str) -> str: def enable_spellchecking(self) -> None: """Enable spellchecking by removing spellcheck category exclusions. - This method updates the ``disabled_categories`` attribute by removing any + This method updates the :attr:`disabled_categories` attribute by removing any categories that are related to spell checking, which are defined in the ``_SPELL_CHECKING_CATEGORIES`` class constant. """ @@ -1200,8 +1203,8 @@ def _terminate_server(self) -> None: class LanguageToolPublicAPI(LanguageTool): """A class to interact with the public LanguageTool API. - This class extends the ``LanguageTool`` class and initializes it with the remote - server set to the public LanguageTool API endpoint. + This class extends the :class:`LanguageTool` class and initializes it with the + remote server set to the public LanguageTool API endpoint. :param language: The language code to use for checking text (e.g., 'en-US'). :type language: str | None diff --git a/src/language_tool_python/utils.py b/src/language_tool_python/utils.py index 40d08ac..9a7fca9 100644 --- a/src/language_tool_python/utils.py +++ b/src/language_tool_python/utils.py @@ -23,11 +23,11 @@ def classify_matches(matches: list[Match]) -> TextStatus: """Classify matches as CORRECT, FAULTY, or GARBAGE. This function checks the status of the matches and returns a corresponding - ``TextStatus`` value. + :class:`TextStatus` value. :param matches: A list of Match objects to be classified. :type matches: list[Match] - :return: The classification of the matches as a ``TextStatus`` value. + :return: The classification of the matches as a :class:`TextStatus` value. :rtype: TextStatus """ if not len(matches):