From 9b8f27b544d46848472dfc7d7198001eff5fc192 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Wed, 19 Nov 2025 16:59:49 +0100 Subject: [PATCH 1/5] Added initial stub documenting new search module. --- tripper/search.py | 54 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 tripper/search.py diff --git a/tripper/search.py b/tripper/search.py new file mode 100644 index 00000000..98bd20cd --- /dev/null +++ b/tripper/search.py @@ -0,0 +1,54 @@ +"""Module providing a simple interface to SPARQL queries. + +This module is not imported by default, since it depends on the +excellent `SPARQL-builder` package develop by 7P9 in the PINK project. +""" +from sparqlbuilder import select + + + +def make_query( + criteria: "Optional[Union[Sequence[Tuple[str, str]], Mapping]]" = None, + prefixes: "Optional[dict]" = None, + type: = None, + regex: "Optional[dict]" = None, + flags: "Optional[str]" = None, + keywords: "Optional[Keywords]" = None, + query_type: "Optional[str]" = "SELECT DISTINCT", + limit: "Optional[int]" = None, + offset: "Optional[int]" = None, +) -> "str": +""" + + +Examples: + + Alternative ways to search for all datasets: + + >>> make_query(criteria=[("rdf:type", "dcat:Dataset")]) + >>> make_query(criteria=[("rdf:type", "Dataset")]) + >>> make_query(type="dcat:Dataset")) + >>> make_query(type="Dataset")) + + All datasets created by a given agent: + >>> make_query( + ... type="Dataset", + ... criteria=[("creator", "kb:JohnDow")], + ... ) + + alternatively: + + >>> make_query( + ... criteria=[("rdf:type": "Dataset"), ("creator", "kb:JohnDow")], + ... ) + + All datasets that has a creator, regardless who: + >>> make_query( + ... type="Dataset", + ... criteria=[("creator", None)], + ... ) + +""" + + +make_query(prefixes=ts.namespaces) From f43da2fe7e215f86cddb89c2646a99eb7928a4a5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 19 Nov 2025 16:01:41 +0000 Subject: [PATCH 2/5] [pre-commit.ci] auto fixes from pre-commit hooks For more information, see https://pre-commit.ci --- tripper/search.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tripper/search.py b/tripper/search.py index 98bd20cd..825f9c14 100644 --- a/tripper/search.py +++ b/tripper/search.py @@ -6,7 +6,6 @@ from sparqlbuilder import select - def make_query( criteria: "Optional[Union[Sequence[Tuple[str, str]], Mapping]]" = None, prefixes: "Optional[dict]" = None, From 9682941f63935d747f2feae3d34624c0a8cb125f Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Wed, 19 Nov 2025 18:35:59 +0100 Subject: [PATCH 3/5] Added docstring to make_query() --- tripper/search.py | 93 +++++++++++++++++++++++++++++++---------------- 1 file changed, 62 insertions(+), 31 deletions(-) diff --git a/tripper/search.py b/tripper/search.py index 825f9c14..dd7156ca 100644 --- a/tripper/search.py +++ b/tripper/search.py @@ -3,51 +3,82 @@ This module is not imported by default, since it depends on the excellent `SPARQL-builder` package develop by 7P9 in the PINK project. """ + from sparqlbuilder import select def make_query( - criteria: "Optional[Union[Sequence[Tuple[str, str]], Mapping]]" = None, - prefixes: "Optional[dict]" = None, - type: = None, - regex: "Optional[dict]" = None, - flags: "Optional[str]" = None, - keywords: "Optional[Keywords]" = None, - query_type: "Optional[str]" = "SELECT DISTINCT", + criteria: "Sequence[Tuple]" = (), + type: "Optional[str]" = None, + skipblanks: "bool" = True, + distinct: "bool" = True, + reduced: "bool" = False, limit: "Optional[int]" = None, - offset: "Optional[int]" = None, + offset: "int" = 0, + keywords: "Optional[KeywordsType]" = None, + context: "Optional[ContextType]" = None, + prefixes: "Optional[dict]" = None, ) -> "str": -""" + """Creates a SPARQL query to find resources in a knowledge base. + The returned query will return the IRIs of all resources that match the + criteria specified in the arguments. -Examples: + Arguments: + criteria: Exact match criteria. A dict of IRI, value pairs, where the + IRIs refer to data properties on the resource match. The IRIs + may use any prefix defined in `ts`. E.g. if the prefix `dcterms` + is in `ts`, it is expanded and the match criteria `dcterms:title` + is correctly parsed. + type: Either a [resource type] (ex: "Dataset", "Distribution", ...) + or the IRI of a class to limit the search to. + skipblanks: Whether the query will skip matching blank nodes. + distinct: Whether the query will filter out duplicated matches. + reduced: A weaker version of `distinct` that may eliminate + some duplicates but is not required to eliminate all + duplicates. This can be more efficient than `distinct` in + some query engines. `distinct` and `reduced` are mutually + exclusive. + limit: Limit the number of returned IRIs to this number. + offset: The index of the first returned IRI. `offset` often used in + combination with limit for pagination. + keywords: Keywords instance defining prefixes and keywords for + use in `criteria`. + context: Context instance defining prefixes and keywords for + use in `criteria`. Extends what has been provided by `keywords`. + prefixes: Additional prefixes to use in criteria. - Alternative ways to search for all datasets: + Returns: + A string with a SPARQL query that can be passed to the + `Triplestore.query()` method. - >>> make_query(criteria=[("rdf:type", "dcat:Dataset")]) - >>> make_query(criteria=[("rdf:type", "Dataset")]) - >>> make_query(type="dcat:Dataset")) - >>> make_query(type="Dataset")) + Examples: - All datasets created by a given agent: - >>> make_query( - ... type="Dataset", - ... criteria=[("creator", "kb:JohnDow")], - ... ) + Alternative ways to search for all datasets: - alternatively: + >>> make_query(criteria=[("rdf:type", "dcat:Dataset")]) + >>> make_query(type="dcat:Dataset")) # use shorthand `type` argument + >>> make_query(type="Dataset")) # refer to a pre-defined keyword - >>> make_query( - ... criteria=[("rdf:type": "Dataset"), ("creator", "kb:JohnDow")], - ... ) + Search for all datasets created by a given agent: + >>> make_query( + ... type="Dataset", + ... criteria=[("creator", "kb:JohnDow")], + ... ) - All datasets that has a creator, regardless who: - >>> make_query( - ... type="Dataset", - ... criteria=[("creator", None)], - ... ) + alternatively: -""" + >>> make_query( + ... criteria=[("rdf:type": "Dataset"), ("creator", "kb:JohnDow")], + ... ) + + All datasets that has a creator, regardless who: + >>> make_query( + ... type="Dataset", + ... criteria=[("creator", None)], + ... ) + + """ -make_query(prefixes=ts.namespaces) +# make_query(prefixes=ts.namespaces) From 92463d1d544a528c4cb2b76b32c7f135b3ca91df Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Wed, 19 Nov 2025 21:22:31 +0100 Subject: [PATCH 4/5] Updated the documentation of search. --- tripper/search.py | 53 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/tripper/search.py b/tripper/search.py index dd7156ca..b4317b47 100644 --- a/tripper/search.py +++ b/tripper/search.py @@ -4,8 +4,16 @@ excellent `SPARQL-builder` package develop by 7P9 in the PINK project. """ +from typing import TYPE_CHECKING + from sparqlbuilder import select +if TYPE_CHECKING: # pragma: no cover + from typing import IO, Optional, Sequence, Tuple + + from tripper.datadoc.context import ContextType + from tripper.datadoc.keywords import KeywordsType + def make_query( criteria: "Sequence[Tuple]" = (), @@ -25,11 +33,12 @@ def make_query( criteria specified in the arguments. Arguments: - criteria: Exact match criteria. A dict of IRI, value pairs, where the - IRIs refer to data properties on the resource match. The IRIs - may use any prefix defined in `ts`. E.g. if the prefix `dcterms` - is in `ts`, it is expanded and the match criteria `dcterms:title` - is correctly parsed. + criteria: A sequence of tuples describing a set of matching criteria + that all must be met. + + (pred, obj, [spec]) + + type: Either a [resource type] (ex: "Dataset", "Distribution", ...) or the IRI of a class to limit the search to. skipblanks: Whether the query will skip matching blank nodes. @@ -56,27 +65,47 @@ def make_query( Alternative ways to search for all datasets: - >>> make_query(criteria=[("rdf:type", "dcat:Dataset")]) - >>> make_query(type="dcat:Dataset")) # use shorthand `type` argument - >>> make_query(type="Dataset")) # refer to a pre-defined keyword + ``` + >>> q = make_query(criteria=[("rdf:type", "dcat:Dataset")]) + >>> q = make_query(type="dcat:Dataset")) # use `type` argument + >>> q = make_query(type="Dataset")) # refer to a pre-defined keyword + ``` Search for all datasets created by a given agent: - >>> make_query( + + ``` + >>> q = make_query( ... type="Dataset", ... criteria=[("creator", "kb:JohnDow")], ... ) + ``` alternatively: - >>> make_query( + ``` + >>> q = make_query( ... criteria=[("rdf:type": "Dataset"), ("creator", "kb:JohnDow")], ... ) + ``` + + Match all datasets that has a creator, regardless who: - All datasets that has a creator, regardless who: - >>> make_query( + ``` + >>> q = make_query( ... type="Dataset", ... criteria=[("creator", None)], ... ) + ``` + + Use regular expressions to match all resources that have a labels that + ends with "Atom". + + ``` + >>> q = make_query( + ... criteria=[("rdfs:label", ".+Atom", "regex")], + ... ) + ``` + """ From db0180d0ff039cbce3c62f95d8ff53f1fee74002 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Fri, 21 Nov 2025 22:00:46 +0100 Subject: [PATCH 5/5] Updated pyproject.toml --- pyproject.toml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9f32cab4..bf81fa18 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,8 +42,11 @@ units = [ mappings = [ "tripper[units]", ] +search = [ + "sparql-builder @ git+https://github.com/PINK-project/SPARQL-builder@master", +] datadoc = [ - "tripper[mappings]", + "tripper[mappings]", # add requirement on search "keyring>=22.0.0,<25.6.1", "PyLD>=2.0.0,<2.0.5; python_version<='3.13'", "PyYaml>=3.0.0,<6.0.3", # todo: check lower version