SYMBOL INDEX (140 symbols across 20 files) FILE: nlpretext/_utils/daskloader.py function read_text (line 8) | def read_text(files_path: Union[str, List[str]], encoding: str): # type... function read_json (line 12) | def read_json(files_path: Union[str, List[str]], encoding: str): # type... function read_csv (line 16) | def read_csv(files_path: Union[str, List[str]], encoding: str): # type:... function read_parquet (line 20) | def read_parquet(files_path: Union[str, List[str]], encoding: str): # t... FILE: nlpretext/_utils/file_loader.py function detect_encoding (line 26) | def detect_encoding(file_path_or_string: Union[str, bytes], n_lines: int... function check_text_file_format (line 51) | def check_text_file_format(filepath: Union[str, List[str]]) -> str: FILE: nlpretext/_utils/pandasloader.py function _list_handler (line 7) | def _list_handler(func): function read_text (line 18) | def read_text(file_path: str, encoding: str) -> pd.DataFrame: function read_json (line 24) | def read_json(file_path: str, encoding: str) -> pd.DataFrame: function read_csv (line 30) | def read_csv(file_path: str, encoding: str) -> pd.DataFrame: function read_parquet (line 36) | def read_parquet(file_path: str, encoding: str) -> pd.DataFrame: FILE: nlpretext/_utils/phone_number.py function find_phone_numbers (line 24) | def find_phone_numbers(string: str, region_code: Optional[str] = None) -... function extract_phone_numbers (line 55) | def extract_phone_numbers(text: str, countrylist: List[Optional[str]]) -... class PhoneParser (line 78) | class PhoneParser: method __init__ (line 84) | def __init__(self): method parsed_num (line 90) | def parsed_num(self) -> Optional[_phonenumbers.PhoneNumber]: method parsed_num (line 94) | def parsed_num(self, value: Optional[_phonenumbers.PhoneNumber]) -> None: method parse_number (line 97) | def parse_number( method format_number (line 131) | def format_number(self, num_format: str) -> str: FILE: nlpretext/_utils/stopwords.py function get_stopwords (line 24) | def get_stopwords(lang: str = "en") -> List[str]: FILE: nlpretext/augmentation/text_augmentation.py class CouldNotAugment (line 10) | class CouldNotAugment(ValueError): # noqa: D101 class UnavailableAugmenter (line 14) | class UnavailableAugmenter(ValueError): # noqa: D101 function augment_text (line 18) | def augment_text( function process_entities_and_text (line 63) | def process_entities_and_text( function are_entities_in_augmented_text (line 108) | def are_entities_in_augmented_text(entities: List[Dict[str, Any]], augme... function get_augmenter (line 142) | def get_augmenter(method: str, stopwords: Optional[List[str]] = None) ->... function get_augmented_entities (line 168) | def get_augmented_entities( function clean_sentence_entities (line 212) | def clean_sentence_entities(text: str, entities: List[Dict[str, Any]]) -... function check_interval_included (line 254) | def check_interval_included( FILE: nlpretext/basic/preprocess.py function normalize_whitespace (line 30) | def normalize_whitespace(text: str) -> str: function remove_whitespace (line 56) | def remove_whitespace(text: str) -> str: function lower_text (line 75) | def lower_text(text: str) -> str: function filter_groups (line 90) | def filter_groups(token: str, ignored_stopwords: Optional[List[str]] = N... function ungroup_ignored_stopwords (line 112) | def ungroup_ignored_stopwords( function remove_stopwords (line 132) | def remove_stopwords( function remove_eol_characters (line 189) | def remove_eol_characters(text: str) -> str: function fix_bad_unicode (line 205) | def fix_bad_unicode(text: str, normalization: str = "NFC") -> str: function unpack_english_contractions (line 238) | def unpack_english_contractions(text: str) -> str: function replace_urls (line 282) | def replace_urls(text: str, replace_with: str = "*URL*") -> str: function replace_emails (line 305) | def replace_emails(text: str, replace_with: str = "*EMAIL*") -> str: function replace_phone_numbers (line 328) | def replace_phone_numbers( function replace_numbers (line 376) | def replace_numbers(text: str, replace_with: str = "*NUMBER*") -> str: function replace_currency_symbols (line 399) | def replace_currency_symbols(text: str, replace_with: Optional[str] = No... function remove_punct (line 431) | def remove_punct(text: str, marks: Optional[str] = None) -> str: function remove_accents (line 463) | def remove_accents(text: str, method: str = "unicode") -> str: function remove_multiple_spaces_and_strip_text (line 502) | def remove_multiple_spaces_and_strip_text(text: str) -> str: function filter_non_latin_characters (line 523) | def filter_non_latin_characters(text: str) -> str: FILE: nlpretext/cli/__main__.py function version_callback (line 17) | def version_callback(value: bool) -> None: FILE: nlpretext/cli/preprocess.py function run (line 13) | def run( FILE: nlpretext/preprocessor.py class Preprocessor (line 14) | class Preprocessor: method __init__ (line 15) | def __init__(self): method pipe (line 20) | def pipe(self, operation: Callable[[Any], Any], args: Optional[Dict[st... method build_pipeline (line 33) | def build_pipeline(operation_list: List[Dict[Any, Any]]) -> Pipeline: method run (line 56) | def run(self, text: str) -> str: FILE: nlpretext/social/preprocess.py function remove_mentions (line 24) | def remove_mentions(text: str) -> str: function extract_mentions (line 40) | def extract_mentions(text: str) -> List[str]: function remove_html_tags (line 56) | def remove_html_tags(text: str) -> str: function remove_emoji (line 72) | def remove_emoji(text: str) -> str: function convert_emoji_to_text (line 92) | def convert_emoji_to_text(text: str, code_delimiters: Tuple[str, str] = ... function extract_emojis (line 112) | def extract_emojis(text: str) -> List[str]: function extract_hashtags (line 133) | def extract_hashtags(text: str) -> List[str]: function remove_hashtag (line 150) | def remove_hashtag(text: str) -> str: FILE: nlpretext/textloader.py class TextLoader (line 36) | class TextLoader: method __init__ (line 37) | def __init__(self, text_column="text", encoding="utf-8", file_format=N... method __repr__ (line 72) | def __repr__(self): method _read_text_txt (line 82) | def _read_text_txt(self, files_path): method _read_text_json (line 99) | def _read_text_json(self, files_path): method _read_text_csv (line 118) | def _read_text_csv(self, files_path): method _read_text_parquet (line 137) | def _read_text_parquet(self, files_path): method read_text (line 156) | def read_text( FILE: nlpretext/token/preprocess.py function remove_stopwords (line 24) | def remove_stopwords( function remove_tokens_with_nonletters (line 57) | def remove_tokens_with_nonletters(tokens: List[str]) -> List[str]: function remove_special_caracters_from_tokenslist (line 77) | def remove_special_caracters_from_tokenslist(tokens: List[str]) -> List[... function remove_smallwords (line 97) | def remove_smallwords(tokens: List[str], smallwords_threshold: int) -> L... FILE: nlpretext/token/tokenizer.py class LanguageNotHandled (line 33) | class LanguageNotHandled(Exception): class LanguageNotInstalledError (line 37) | class LanguageNotInstalledError(Exception): class SpacyModel (line 41) | class SpacyModel: class SingletonSpacyModel (line 42) | class SingletonSpacyModel: method __init__ (line 43) | def __init__(self, lang: str) -> None: method __init__ (line 58) | def __init__(self, lang): method get_lang_model (line 62) | def get_lang_model(self) -> Optional[str]: # noqa: D102 function _load_spacy_model (line 69) | def _load_spacy_model(model: str) -> Any: function _get_spacy_tokenizer (line 83) | def _get_spacy_tokenizer(lang: str) -> Optional[spacy.tokenizer.Tokenizer]: function tokenize (line 103) | def tokenize(text: str, lang_module: str = "en_spacy") -> List[str]: function untokenize (line 145) | def untokenize(tokens: List[str], lang: str = "fr") -> str: function convert_tokens_to_string (line 165) | def convert_tokens_to_string(tokens_or_str: Optional[Union[str, List[str... function convert_string_to_tokens (line 175) | def convert_string_to_tokens( # noqa: D103 FILE: tests/test_data_augmentation.py function test_process_entities_and_text_not_altered (line 35) | def test_process_entities_and_text_not_altered(text, text_augmented, ent... function test_process_entities_and_text_altered (line 54) | def test_process_entities_and_text_altered(text, text_augmented, entities): function test_get_augmenter (line 62) | def test_get_augmenter(): FILE: tests/test_file_loader.py function create_files (line 27) | def create_files(): function test_detect_encoding (line 38) | def test_detect_encoding(): function remove_files (line 46) | def remove_files(): function test_check_text_file_format (line 105) | def test_check_text_file_format(input_filepath, raising, expected_str): FILE: tests/test_phone_number.py function test_extract_phone_number (line 22) | def test_extract_phone_number(): function test_extract_phone_number_us (line 29) | def test_extract_phone_number_us(): function test_extract_phone_number_fr (line 36) | def test_extract_phone_number_fr(): function test_extract_phone_number_international (line 43) | def test_extract_phone_number_international(): function test_phone_parser_us (line 50) | def test_phone_parser_us(): function test_phone_parser_fr (line 59) | def test_phone_parser_fr(): FILE: tests/test_preprocessor.py function test_extract_emojis (line 65) | def test_extract_emojis(text, expected_result): function test_remove_mentions (line 77) | def test_remove_mentions(text, expected_result): function test_extract_mentions (line 89) | def test_extract_mentions(text, expected_result): function test_remove_html_tags (line 104) | def test_remove_html_tags(text, expected_result): function test_remove_smallwords (line 120) | def test_remove_smallwords(tokens_list, smallwords_threshold, expected_r... function test_extract_hashtags (line 135) | def test_extract_hashtags(text, expected_result): function test_remove_hashtag (line 153) | def test_remove_hashtag(text, expected_result): function test_filter_non_latin_characters (line 167) | def test_filter_non_latin_characters(text, expected_filtered_text): function test_remove_multiple_spaces_and_strip_text (line 182) | def test_remove_multiple_spaces_and_strip_text(input_str, expected_str): function test_remove_eol_characters (line 195) | def test_remove_eol_characters(input_str, expected_str): function test_remove_tokens_with_nonletters (line 200) | def test_remove_tokens_with_nonletters(): function test_remove_special_caracters_from_tokenslist (line 207) | def test_remove_special_caracters_from_tokenslist(): function test_get_stopwords (line 214) | def test_get_stopwords(): function test_remove_stopwords_tokens (line 225) | def test_remove_stopwords_tokens(input_tokens, lang, expected_output): function test_remove_stopwords_text (line 250) | def test_remove_stopwords_text( function test_remove_custom_stopwords_text (line 269) | def test_remove_custom_stopwords_text(input_text, lang, custom_stopwords... function test_remove_accents (line 274) | def test_remove_accents(): function test_fix_bad_unicode (line 306) | def test_fix_bad_unicode(input_str, expected_str): function test_normalize_whitespace (line 315) | def test_normalize_whitespace(input_str, expected_str): function test_unpack_english_contractions (line 331) | def test_unpack_english_contractions(input_str, expected_str): function test_replace_urls (line 352) | def test_replace_urls(input_str, expected_str): function test_replace_emails (line 365) | def test_replace_emails(input_str, expected_str): function test_replace_phone_numbers (line 388) | def test_replace_phone_numbers(input_str, expected_str): function test_replace_numbers (line 406) | def test_replace_numbers(input_str, expected_str): function test_replace_currency_symbols (line 425) | def test_replace_currency_symbols(input_str, param, expected_str): function test_remove_punct (line 451) | def test_remove_punct(input_str, param, expected_str): function test_remove_emoji (line 466) | def test_remove_emoji(input_str, expected_str): function test_convert_emoji_to_text (line 481) | def test_convert_emoji_to_text(input_str, expected_str): function test_custom_preprocess (line 486) | def test_custom_preprocess(): function test_apply_preprocessor (line 514) | def test_apply_preprocessor(input_str, expected_str): FILE: tests/test_textloader.py function test__read_text_txt_dask (line 43) | def test__read_text_txt_dask(mock_read_text): function test__read_text_txt_pandas (line 66) | def test__read_text_txt_pandas(mock_read_text): function test__read_text_json_dask (line 95) | def test__read_text_json_dask(mock_read): function test__read_text_json_pandas (line 120) | def test__read_text_json_pandas(mock_read): function test__read_text_csv_dask (line 140) | def test__read_text_csv_dask(mock_read_csv): function test__read_text_csv_pandas (line 165) | def test__read_text_csv_pandas(mock_read): function test__read_text_parquet_dask (line 185) | def test__read_text_parquet_dask(mock_read_parquet): function test__read_text_parquet_pandas (line 210) | def test__read_text_parquet_pandas(mock_read): function test_read_text (line 257) | def test_read_text( FILE: tests/test_tokenizer.py function test_load_spacy_model_validation (line 16) | def test_load_spacy_model_validation(bad_model_name):