Skip to content

Commit dc013f4

Browse files
committed
feat(client): add stemming dictionary support
- add new `stemming` module with dictionary management functionality - add `StemmingDictionaries` class for crud operations on dictionaries - add corresponding types and tests for stemming operations - update `Client` class to expose stemming functionality
1 parent 71fc135 commit dc013f4

8 files changed

Lines changed: 418 additions & 3 deletions

File tree

setup.cfg

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,12 @@ max-complexity = 6
2020

2121
# # Excluding some directories:
2222
exclude = .git,__pycache__,venv,.eggs,*.egg
23-
ignore = Q000, WPS602, WPS432, WPS305, WPS221, WPS230, WPS234, WPS433, WPS440, W503, WPS331, WPS306, WPS237, WPS202, RST301, RST306, WPS214, WPS235, WPS226, WPS337, WPS320, F821, WPS201
23+
ignore = Q000, WPS602, WPS432, WPS305, WPS221, WPS230, WPS234, WPS433, WPS440, W503, WPS331, WPS306, WPS237, WPS202, RST301, RST306, WPS214, WPS235, WPS226, WPS337, WPS320, F821, WPS201, E704, D102
2424
per-file-ignores =
2525
tests/*.py: S101, WPS226, WPS118, WPS202, WPS204, WPS218, WPS211, WPS604, WPS431, WPS210, WPS201, WPS437
2626
src/typesense/types/*.py: B950, WPS215, WPS111, WPS462, WPS322, WPS428, WPS114, WPS110, WPS202
27-
src/typesense/documents.py: WPS320, E704, D102, WPS428, WPS220
27+
src/typesense/documents.py: WPS320, WPS428, WPS220
28+
src/typesense/stemming_dictionaries.py: WPS320, WPS428, WPS220
2829
src/typesense/api_call.py: WPS110, WPS211
2930
src/typesense/request_handler.py: WPS110, WPS211
3031

src/typesense/client.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
from typesense.keys import Keys
4646
from typesense.multi_search import MultiSearch
4747
from typesense.operations import Operations
48+
from typesense.stemming import Stemming
4849
from typesense.stopwords import Stopwords
4950

5051
TDoc = typing.TypeVar("TDoc", bound=DocumentSchema)
@@ -56,7 +57,7 @@ class Client:
5657
5758
This class serves as the entry point for all Typesense operations. It initializes
5859
and provides access to various components of the Typesense SDK, such as collections,
59-
multi-search, keys, aliases, analytics, operations, debug, stopwords,
60+
multi-search, keys, aliases, analytics, stemming, operations, debug, stopwords,
6061
and conversation models.
6162
6263
Attributes:
@@ -67,6 +68,7 @@ class Client:
6768
keys (Keys): Instance for managing API keys.
6869
aliases (Aliases): Instance for managing collection aliases.
6970
analytics (Analytics): Instance for analytics operations.
71+
stemming (Stemming): Instance for stemming dictionary operations.
7072
operations (Operations): Instance for various Typesense operations.
7173
debug (Debug): Instance for debug operations.
7274
stopwords (Stopwords): Instance for managing stopwords.
@@ -96,6 +98,7 @@ def __init__(self, config_dict: ConfigDict) -> None:
9698
self.keys = Keys(self.api_call)
9799
self.aliases = Aliases(self.api_call)
98100
self.analytics = Analytics(self.api_call)
101+
self.stemming = Stemming(self.api_call)
99102
self.operations = Operations(self.api_call)
100103
self.debug = Debug(self.api_call)
101104
self.stopwords = Stopwords(self.api_call)

src/typesense/stemming.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
"""
2+
Module for managing stemming dictionaries in Typesense.
3+
4+
This module provides a class for managing stemming dictionaries in Typesense,
5+
including creating, updating, and retrieving them.
6+
7+
Classes:
8+
- Stemming: Handles operations related to stemming dictionaries.
9+
10+
Attributes:
11+
- StemmingDictionaries: The StemmingDictionaries object for managing stemming dictionaries.
12+
13+
Methods:
14+
- __init__: Initializes the Stemming object.
15+
16+
The Stemming class interacts with the Typesense API to manage stemming dictionary operations.
17+
It provides access to the StemmingDictionaries object for managing stemming dictionaries.
18+
19+
For more information on stemming dictionaries, refer to the Stemming
20+
[documentation](https://typesense.org/docs/28.0/api/stemming.html)
21+
22+
This module uses type hinting and is compatible with Python 3.11+ as well as earlier
23+
versions through the use of the typing_extensions library.
24+
"""
25+
26+
from typesense.api_call import ApiCall
27+
from typesense.stemming_dictionaries import StemmingDictionaries
28+
29+
30+
class Stemming(object):
31+
"""
32+
Class for managing stemming dictionaries in Typesense.
33+
34+
This class provides methods to interact with stemming dictionaries, including
35+
creating, updating, and retrieving them.
36+
37+
Attributes:
38+
dictionaries (StemmingDictionaries): The StemmingDictionaries object for managing
39+
stemming dictionaries.
40+
"""
41+
42+
def __init__(self, api_call: ApiCall):
43+
"""
44+
Initialize the Stemming object.
45+
46+
Args:
47+
api_call (ApiCall): The API call object for making requests.
48+
"""
49+
self.api_call = api_call
50+
self.dictionaries = StemmingDictionaries(api_call)
Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
"""
2+
Module for interacting with the stemming dictionaries endpoint of the Typesense API.
3+
4+
This module provides a class for managing stemming dictionaries in Typesense, including creating
5+
and updating them.
6+
7+
Classes:
8+
- StemmingDictionaries: Handles operations related to stemming dictionaries.
9+
10+
Methods:
11+
- __init__: Initializes the StemmingDictionaries object.
12+
- __getitem__: Retrieves or creates a StemmingDictionary object for a given dictionary_id.
13+
- upsert: Creates or updates a stemming dictionary.
14+
- _upsert_list: Creates or updates a list of stemming dictionaries.
15+
- _dump_to_jsonl: Dumps a list of StemmingDictionaryCreateSchema objects to a JSONL string.
16+
- _parse_response: Parses the response from the upsert operation.
17+
- _upsert_raw: Performs the raw upsert operation.
18+
- _endpoint_path: Constructs the API endpoint path for this specific stemming dictionary.
19+
20+
The StemmingDictionaries class interacts with the Typesense API to manage stemming dictionary
21+
operations.
22+
It provides methods to create, update, and retrieve stemming dictionaries, as well as
23+
access individual StemmingDictionary objects.
24+
25+
For more information on stemming dictionaries,
26+
refer to the Stemming [documentation](https://typesense.org/docs/28.0/api/stemming.html)
27+
"""
28+
29+
import sys
30+
31+
if sys.version_info >= (3, 11):
32+
import typing
33+
else:
34+
import typing_extensions as typing
35+
36+
import json
37+
38+
from typesense.api_call import ApiCall
39+
from typesense.stemming_dictionary import StemmingDictionary
40+
from typesense.types.stemming import (
41+
StemmingDictionariesRetrieveSchema,
42+
StemmingDictionaryCreateSchema,
43+
)
44+
45+
46+
class StemmingDictionaries:
47+
"""
48+
Class for managing stemming dictionaries in Typesense.
49+
50+
This class provides methods to interact with stemming dictionaries, including
51+
creating, updating, and retrieving them.
52+
53+
Attributes:
54+
api_call (ApiCall): The API call object for making requests.
55+
stemming_dictionaries (Dict[str, StemmingDictionary]): A dictionary of
56+
StemmingDictionary objects.
57+
"""
58+
59+
resource_path: typing.Final[str] = "/stemming/dictionaries"
60+
61+
def __init__(self, api_call: ApiCall):
62+
"""
63+
Initialize the StemmingDictionaries object.
64+
65+
Args:
66+
api_call (ApiCall): The API call object for making requests.
67+
"""
68+
self.api_call = api_call
69+
self.stemming_dictionaries: typing.Dict[str, StemmingDictionary] = {}
70+
71+
def __getitem__(self, dictionary_id: str) -> StemmingDictionary:
72+
"""
73+
Get or create an StemmingDictionary object for a given rule_id.
74+
75+
Args:
76+
rule_id (str): The ID of the analytics rule.
77+
78+
Returns:
79+
StemmingDictionary: The StemmingDictionary object for the given ID.
80+
"""
81+
if not self.stemming_dictionaries.get(dictionary_id):
82+
self.stemming_dictionaries[dictionary_id] = StemmingDictionary(
83+
self.api_call,
84+
dictionary_id,
85+
)
86+
return self.stemming_dictionaries[dictionary_id]
87+
88+
def retrieve(self) -> StemmingDictionariesRetrieveSchema:
89+
"""
90+
Retrieve the list of stemming dictionaries.
91+
92+
Returns:
93+
StemmingDictionariesRetrieveSchema: The list of stemming dictionaries.
94+
"""
95+
response: StemmingDictionariesRetrieveSchema = self.api_call.get(
96+
self._endpoint_path(),
97+
entity_type=StemmingDictionariesRetrieveSchema,
98+
)
99+
return response
100+
101+
@typing.overload
102+
def upsert(
103+
self,
104+
dictionary_id: str,
105+
word_root_combinations: typing.Union[str, bytes],
106+
) -> str: ...
107+
108+
@typing.overload
109+
def upsert(
110+
self,
111+
dictionary_id: str,
112+
word_root_combinations: typing.List[StemmingDictionaryCreateSchema],
113+
) -> typing.List[StemmingDictionaryCreateSchema]: ...
114+
115+
def upsert(
116+
self,
117+
dictionary_id: str,
118+
word_root_combinations: typing.Union[
119+
typing.List[StemmingDictionaryCreateSchema],
120+
str,
121+
bytes,
122+
],
123+
) -> typing.Union[str, typing.List[StemmingDictionaryCreateSchema]]:
124+
if isinstance(word_root_combinations, (str, bytes)):
125+
return self._upsert_raw(dictionary_id, word_root_combinations)
126+
127+
return self._upsert_list(dictionary_id, word_root_combinations)
128+
129+
def _upsert_list(
130+
self,
131+
dictionary_id: str,
132+
word_root_combinations: typing.List[StemmingDictionaryCreateSchema],
133+
) -> typing.List[StemmingDictionaryCreateSchema]:
134+
word_combos_in_jsonl = self._dump_to_jsonl(word_root_combinations)
135+
response = self._upsert_raw(dictionary_id, word_combos_in_jsonl)
136+
return self._parse_response(response)
137+
138+
def _dump_to_jsonl(
139+
self,
140+
word_root_combinations: typing.List[StemmingDictionaryCreateSchema],
141+
) -> str:
142+
word_root_strs = [json.dumps(combo) for combo in word_root_combinations]
143+
144+
return "\n".join(word_root_strs)
145+
146+
def _parse_response(
147+
self,
148+
response: str,
149+
) -> typing.List[StemmingDictionaryCreateSchema]:
150+
object_list: typing.List[StemmingDictionaryCreateSchema] = []
151+
152+
for line in response.split("\n"):
153+
try:
154+
decoded = json.loads(line)
155+
except json.JSONDecodeError:
156+
raise ValueError(f"Failed to parse JSON from response: {line}")
157+
object_list.append(decoded)
158+
return object_list
159+
160+
def _upsert_raw(
161+
self,
162+
dictionary_id: str,
163+
word_root_combinations: typing.Union[bytes, str],
164+
) -> str:
165+
response: str = self.api_call.post(
166+
self._endpoint_path("import"),
167+
body=word_root_combinations,
168+
as_json=False,
169+
entity_type=str,
170+
params={"id": dictionary_id},
171+
)
172+
return response
173+
174+
def _endpoint_path(self, action: typing.Union[str, None] = None) -> str:
175+
"""
176+
Construct the API endpoint path for this specific stemming dictionary.
177+
178+
Args:
179+
action (str, optional): The action to perform on the stemming dictionary.
180+
Defaults to None.
181+
182+
Returns:
183+
str: The constructed endpoint path.
184+
"""
185+
if action:
186+
return f"{StemmingDictionaries.resource_path}/{action}"
187+
return StemmingDictionaries.resource_path
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
"""
2+
Module for managing individual stemming dictionaries in Typesense.
3+
4+
This module provides a class for managing individual stemming dictionaries in Typesense,
5+
including retrieving them.
6+
7+
Classes:
8+
- StemmingDictionary: Handles operations related to individual stemming dictionaries.
9+
10+
Methods:
11+
- __init__: Initializes the StemmingDictionary object.
12+
- retrieve: Retrieves this specific stemming dictionary.
13+
14+
The StemmingDictionary class interacts with the Typesense API to manage operations on a
15+
specific stemming dictionary. It provides methods to retrieve the dictionary details.
16+
17+
For more information on stemming dictionaries, refer to the Stemming
18+
[documentation](https://typesense.org/docs/28.0/api/stemming.html)
19+
20+
This module uses type hinting and is compatible with Python 3.11+ as well as earlier
21+
versions through the use of the typing_extensions library.
22+
"""
23+
24+
from typesense.api_call import ApiCall
25+
from typesense.types.stemming import StemmingDictionarySchema
26+
27+
28+
class StemmingDictionary:
29+
"""
30+
Class for managing individual stemming dictionaries in Typesense.
31+
32+
This class provides methods to interact with a specific stemming dictionary,
33+
including retrieving it.
34+
35+
Attributes:
36+
api_call (ApiCall): The API call object for making requests.
37+
dict_id (str): The ID of the stemming dictionary.
38+
"""
39+
40+
def __init__(self, api_call: ApiCall, dict_id: str):
41+
"""
42+
Initialize the StemmingDictionary object.
43+
44+
Args:
45+
api_call (ApiCall): The API call object for making requests.
46+
dict_id (str): The ID of the stemming dictionary.
47+
"""
48+
self.api_call = api_call
49+
self.dict_id = dict_id
50+
51+
def retrieve(self) -> StemmingDictionarySchema:
52+
"""
53+
Retrieve this specific stemming dictionary.
54+
55+
Returns:
56+
StemmingDictionarySchema: The schema containing the stemming dictionary details.
57+
"""
58+
response: StemmingDictionarySchema = self.api_call.get(
59+
self._endpoint_path,
60+
entity_type=StemmingDictionarySchema,
61+
as_json=True,
62+
)
63+
return response
64+
65+
@property
66+
def _endpoint_path(self) -> str:
67+
"""
68+
Construct the API endpoint path for this specific analytics rule.
69+
70+
Returns:
71+
str: The constructed endpoint path.
72+
"""
73+
from typesense.stemming_dictionaries import StemmingDictionaries
74+
75+
return "/".join([StemmingDictionaries.resource_path, self.dict_id])

0 commit comments

Comments
 (0)