Skip to content

Commit b595a06

Browse files
committed
Update parameters for Basque
1 parent 4d81fa3 commit b595a06

1 file changed

Lines changed: 9 additions & 9 deletions

File tree

ac_dc/parameters_filtering.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -328,32 +328,32 @@
328328
"cond_remove_words_with_incorrect_substrings": True,
329329
"incorrect_word_substrings": ["http", "www", ".com", "href", "//"],
330330
"cond_remove_long_words": True,
331-
"length_word_max_cutoff": 35,
331+
"length_word_max_cutoff": 28,
332332
"cond_check_number_words": True,
333333
"tokenization": False,
334334
"strip_characters": special_characters_default,
335-
"number_words_min_cutoff": 1,
335+
"number_words_min_cutoff": 8,
336336
"number_words_max_cutoff": 100000,
337337
"cond_check_character_repetition_removal": True,
338338
"character_repetition_length": 10,
339-
"character_repetition_max_cutoff": 0.106,
339+
"character_repetition_max_cutoff": 0.20,
340340
"cond_check_word_repetition_removal": True,
341341
"word_repetition_length": 5,
342-
"word_repetition_max_cutoff": 0.19,
342+
"word_repetition_max_cutoff": 0.40,
343343
"cond_check_special_characters": True,
344344
"special_characters": special_characters_default,
345-
"special_characters_max_cutoff": 0.3,
345+
"special_characters_max_cutoff": 0.31,
346346
"cond_words_augmentation": False,
347347
"words_augmentation_group_sizes": [],
348348
"words_augmentation_join_char": "",
349349
"cond_check_stopwords": True,
350-
"stopwords_min_cutoff": 0,
350+
"stopwords_min_cutoff": 0.05,
351351
"cond_check_flagged_words": True,
352-
"flagged_words_max_cutoff": 0.2,
352+
"flagged_words_max_cutoff": 0.1,
353353
"cond_check_lang_id": True,
354-
"lang_id_min_cutoff": 0.75,
354+
"lang_id_min_cutoff": 0.70,
355355
"cond_check_perplexity": True,
356-
"perplexity_max_cutoff": 3000000,
356+
"perplexity_max_cutoff": 3000,
357357
}
358358

359359
parameters_filtering_fr = {

0 commit comments

Comments
 (0)