Skip to content

Commit 9e9c65d

Browse files
committed
Update parameters for Spanish
1 parent a532052 commit 9e9c65d

1 file changed

Lines changed: 9 additions & 9 deletions

File tree

ac_dc/parameters_filtering.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -294,32 +294,32 @@
294294
"cond_remove_words_with_incorrect_substrings": True,
295295
"incorrect_word_substrings": ["http", "www", ".com", "href", "//"],
296296
"cond_remove_long_words": True,
297-
"length_word_max_cutoff": 30,
297+
"length_word_max_cutoff": 25,
298298
"cond_check_number_words": True,
299299
"tokenization": False,
300300
"strip_characters": special_characters_default,
301-
"number_words_min_cutoff": 1,
301+
"number_words_min_cutoff": 16,
302302
"number_words_max_cutoff": 100000,
303303
"cond_check_character_repetition_removal": True,
304304
"character_repetition_length": 10,
305-
"character_repetition_max_cutoff": 0.106,
305+
"character_repetition_max_cutoff": 0.14,
306306
"cond_check_word_repetition_removal": True,
307307
"word_repetition_length": 5,
308-
"word_repetition_max_cutoff": 0.19,
308+
"word_repetition_max_cutoff": 0.25,
309309
"cond_check_special_characters": True,
310310
"special_characters": special_characters_default,
311-
"special_characters_max_cutoff": 0.3,
311+
"special_characters_max_cutoff": 0.34,
312312
"cond_words_augmentation": False,
313313
"words_augmentation_group_sizes": [],
314314
"words_augmentation_join_char": "",
315315
"cond_check_stopwords": True,
316-
"stopwords_min_cutoff": 0.2,
316+
"stopwords_min_cutoff": 0.4,
317317
"cond_check_flagged_words": True,
318-
"flagged_words_max_cutoff": 0.2,
318+
"flagged_words_max_cutoff": 0.01,
319319
"cond_check_lang_id": True,
320-
"lang_id_min_cutoff": 0.75,
320+
"lang_id_min_cutoff": 0.8,
321321
"cond_check_perplexity": True,
322-
"perplexity_max_cutoff": 2500000,
322+
"perplexity_max_cutoff": 1300,
323323
}
324324

325325
parameters_filtering_eu = {

0 commit comments

Comments
 (0)