From 44cbadeaeced5b3d50742cbace3cf5462882b6b8 Mon Sep 17 00:00:00 2001 From: bugsyb <5527773+bugsyb@users.noreply.github.com> Date: Fri, 18 Apr 2025 11:13:39 +0200 Subject: [PATCH 1/5] correct() updated to only return positive match, otherwise None is returned --- language_tool_python/utils.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/language_tool_python/utils.py b/language_tool_python/utils.py index 2f73572..703f181 100644 --- a/language_tool_python/utils.py +++ b/language_tool_python/utils.py @@ -113,18 +113,21 @@ def correct(text: str, matches: List[Match]) -> str: """ ltext = list(text) matches = [match for match in matches if match.replacements] - errors = [ltext[match.offset:match.offset + match.errorLength] - for match in matches] - correct_offset = 0 - for n, match in enumerate(matches): - frompos, topos = (correct_offset + match.offset, - correct_offset + match.offset + match.errorLength) - if ltext[frompos:topos] != errors[n]: - continue - repl = match.replacements[0] - ltext[frompos:topos] = list(repl) - correct_offset += len(repl) - len(errors[n]) - return ''.join(ltext) + if matches: + errors = [ltext[match.offset:match.offset + match.errorLength] + for match in matches] + correct_offset = 0 + for n, match in enumerate(matches): + frompos, topos = (correct_offset + match.offset, + correct_offset + match.offset + match.errorLength) + if ltext[frompos:topos] != errors[n]: + continue + repl = match.replacements[0] + ltext[frompos:topos] = list(repl) + correct_offset += len(repl) - len(errors[n]) + return ''.join(ltext) + else: + return None def get_language_tool_download_path() -> str: From 2f3cf5bbc1784a82c5f9a9b0972a151786bb6946 Mon Sep 17 00:00:00 2001 From: bugsyb <5527773+bugsyb@users.noreply.github.com> Date: Thu, 24 Apr 2025 20:35:50 +0200 Subject: [PATCH 2/5] correct() updated to return None in case of correct word submit for test and str() for lack of suggestion, i.e. gibberish submit --- language_tool_python/utils.py | 43 ++++++++++++++--------------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/language_tool_python/utils.py b/language_tool_python/utils.py index 703f181..e757fa6 100644 --- a/language_tool_python/utils.py +++ b/language_tool_python/utils.py @@ -1,5 +1,3 @@ -"""Utility functions for the LanguageTool library.""" - from typing import List, Tuple, Optional from shutil import which @@ -73,15 +71,6 @@ class PathError(LanguageToolError): pass -class RateLimitError(LanguageToolError): - """ - Exception raised for errors related to rate limiting in the LanguageTool server. - This exception is a subclass of `LanguageToolError` and is used to indicate - issues such as exceeding the allowed number of requests to the public API without a key. - """ - pass - - def parse_url(url_str: str) -> str: """ Parse the given URL string and ensure it has a scheme. @@ -112,24 +101,26 @@ def correct(text: str, matches: List[Match]) -> str: :rtype: str """ ltext = list(text) - matches = [match for match in matches if match.replacements] - if matches: - errors = [ltext[match.offset:match.offset + match.errorLength] - for match in matches] - correct_offset = 0 - for n, match in enumerate(matches): - frompos, topos = (correct_offset + match.offset, - correct_offset + match.offset + match.errorLength) - if ltext[frompos:topos] != errors[n]: - continue - repl = match.replacements[0] - ltext[frompos:topos] = list(repl) - correct_offset += len(repl) - len(errors[n]) - return ''.join(ltext) + if len(matches): + matches = [match for match in matches if match.replacements] + if matches: + errors = [ltext[match.offset:match.offset + match.errorLength] + for match in matches] + correct_offset = 0 + for n, match in enumerate(matches): + frompos, topos = (correct_offset + match.offset, + correct_offset + match.offset + match.errorLength) + if ltext[frompos:topos] != errors[n]: + continue + repl = match.replacements[0] + ltext[frompos:topos] = list(repl) + correct_offset += len(repl) - len(errors[n]) + return ''.join(ltext) + else: + return str() else: return None - def get_language_tool_download_path() -> str: """ Get the download path for LanguageTool. From 908a11ce1242875df29b5ee4a5747cf5164449c9 Mon Sep 17 00:00:00 2001 From: bugsyb <5527773+bugsyb@users.noreply.github.com> Date: Thu, 24 Apr 2025 20:40:51 +0200 Subject: [PATCH 3/5] re-applied based on latest code - correct() updated to return None in case of correct word submit for test and str() for lack of suggestion, i.e. gibberish submit --- language_tool_python/utils.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/language_tool_python/utils.py b/language_tool_python/utils.py index e757fa6..d8114b7 100644 --- a/language_tool_python/utils.py +++ b/language_tool_python/utils.py @@ -1,3 +1,5 @@ +"""Utility functions for the LanguageTool library.""" + from typing import List, Tuple, Optional from shutil import which @@ -71,6 +73,15 @@ class PathError(LanguageToolError): pass +class RateLimitError(LanguageToolError): + """ + Exception raised for errors related to rate limiting in the LanguageTool server. + This exception is a subclass of `LanguageToolError` and is used to indicate + issues such as exceeding the allowed number of requests to the public API without a key. + """ + pass + + def parse_url(url_str: str) -> str: """ Parse the given URL string and ensure it has a scheme. @@ -121,6 +132,7 @@ def correct(text: str, matches: List[Match]) -> str: else: return None + def get_language_tool_download_path() -> str: """ Get the download path for LanguageTool. From dd31d9b5906686deea8e2938d5cb78b832a417ff Mon Sep 17 00:00:00 2001 From: bugsyb <5527773+bugsyb@users.noreply.github.com> Date: Thu, 24 Apr 2025 20:46:38 +0200 Subject: [PATCH 4/5] correct() - in code comments added to help understasnd the logic --- language_tool_python/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/language_tool_python/utils.py b/language_tool_python/utils.py index d8114b7..ef5af43 100644 --- a/language_tool_python/utils.py +++ b/language_tool_python/utils.py @@ -112,7 +112,7 @@ def correct(text: str, matches: List[Match]) -> str: :rtype: str """ ltext = list(text) - if len(matches): + if len(matches): # some suggestions available, we'll use first/best matches = [match for match in matches if match.replacements] if matches: errors = [ltext[match.offset:match.offset + match.errorLength] @@ -127,9 +127,9 @@ def correct(text: str, matches: List[Match]) -> str: ltext[frompos:topos] = list(repl) correct_offset += len(repl) - len(errors[n]) return ''.join(ltext) - else: + else: # no suggestions for given language, i.e. gibberish submit return str() - else: + else: # Correct string submit return None From aad0aed90caf5f734f6e51abf59ee21886085952 Mon Sep 17 00:00:00 2001 From: mdevolde Date: Sun, 27 Apr 2025 15:54:45 +0200 Subject: [PATCH 5/5] fix: reset correct function, adding a specific func to determine the status of a text (list of match) --- language_tool_python/utils.py | 60 +++++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 20 deletions(-) diff --git a/language_tool_python/utils.py b/language_tool_python/utils.py index ef5af43..03f055b 100644 --- a/language_tool_python/utils.py +++ b/language_tool_python/utils.py @@ -8,7 +8,7 @@ import os import subprocess import urllib.parse -import urllib.request +from enum import Enum import psutil from .config_file import LanguageToolConfig @@ -99,6 +99,32 @@ def parse_url(url_str: str) -> str: return urllib.parse.urlparse(url_str).geturl() +class TextStatus(Enum): + CORRECT = "correct" + FAULTY = "faulty" + GARBAGE = "garbage" + + +def classify_matches(matches: List[Match]) -> TextStatus: + """ + Classify the matches (result of a check on a text) into one of three categories: + CORRECT, FAULTY, or GARBAGE. + This function checks the status of the matches and returns a corresponding + `TextStatus` value. + + :param matches: A list of Match objects to be classified. + :type matches: List[Match] + :return: The classification of the matches as a `TextStatus` value. + :rtype: TextStatus + """ + if not len(matches): + return TextStatus.CORRECT + matches = [match for match in matches if match.replacements] + if not len(matches): + return TextStatus.GARBAGE + return TextStatus.FAULTY + + def correct(text: str, matches: List[Match]) -> str: """ Corrects the given text based on the provided matches. @@ -112,25 +138,19 @@ def correct(text: str, matches: List[Match]) -> str: :rtype: str """ ltext = list(text) - if len(matches): # some suggestions available, we'll use first/best - matches = [match for match in matches if match.replacements] - if matches: - errors = [ltext[match.offset:match.offset + match.errorLength] - for match in matches] - correct_offset = 0 - for n, match in enumerate(matches): - frompos, topos = (correct_offset + match.offset, - correct_offset + match.offset + match.errorLength) - if ltext[frompos:topos] != errors[n]: - continue - repl = match.replacements[0] - ltext[frompos:topos] = list(repl) - correct_offset += len(repl) - len(errors[n]) - return ''.join(ltext) - else: # no suggestions for given language, i.e. gibberish submit - return str() - else: # Correct string submit - return None + matches = [match for match in matches if match.replacements] + errors = [ltext[match.offset:match.offset + match.errorLength] + for match in matches] + correct_offset = 0 + for n, match in enumerate(matches): + frompos, topos = (correct_offset + match.offset, + correct_offset + match.offset + match.errorLength) + if ltext[frompos:topos] != errors[n]: + continue + repl = match.replacements[0] + ltext[frompos:topos] = list(repl) + correct_offset += len(repl) - len(errors[n]) + return ''.join(ltext) def get_language_tool_download_path() -> str: