SYMBOL INDEX (140 symbols across 20 files)

FILE: nlpretext/_utils/daskloader.py
  function read_text (line 8) | def read_text(files_path: Union[str, List[str]], encoding: str):  # type...
  function read_json (line 12) | def read_json(files_path: Union[str, List[str]], encoding: str):  # type...
  function read_csv (line 16) | def read_csv(files_path: Union[str, List[str]], encoding: str):  # type:...
  function read_parquet (line 20) | def read_parquet(files_path: Union[str, List[str]], encoding: str):  # t...

FILE: nlpretext/_utils/file_loader.py
  function detect_encoding (line 26) | def detect_encoding(file_path_or_string: Union[str, bytes], n_lines: int...
  function check_text_file_format (line 51) | def check_text_file_format(filepath: Union[str, List[str]]) -> str:

FILE: nlpretext/_utils/pandasloader.py
  function _list_handler (line 7) | def _list_handler(func):
  function read_text (line 18) | def read_text(file_path: str, encoding: str) -> pd.DataFrame:
  function read_json (line 24) | def read_json(file_path: str, encoding: str) -> pd.DataFrame:
  function read_csv (line 30) | def read_csv(file_path: str, encoding: str) -> pd.DataFrame:
  function read_parquet (line 36) | def read_parquet(file_path: str, encoding: str) -> pd.DataFrame:

FILE: nlpretext/_utils/phone_number.py
  function find_phone_numbers (line 24) | def find_phone_numbers(string: str, region_code: Optional[str] = None) -...
  function extract_phone_numbers (line 55) | def extract_phone_numbers(text: str, countrylist: List[Optional[str]]) -...
  class PhoneParser (line 78) | class PhoneParser:
    method __init__ (line 84) | def __init__(self):
    method parsed_num (line 90) | def parsed_num(self) -> Optional[_phonenumbers.PhoneNumber]:
    method parsed_num (line 94) | def parsed_num(self, value: Optional[_phonenumbers.PhoneNumber]) -> None:
    method parse_number (line 97) | def parse_number(
    method format_number (line 131) | def format_number(self, num_format: str) -> str:

FILE: nlpretext/_utils/stopwords.py
  function get_stopwords (line 24) | def get_stopwords(lang: str = "en") -> List[str]:

FILE: nlpretext/augmentation/text_augmentation.py
  class CouldNotAugment (line 10) | class CouldNotAugment(ValueError):  # noqa: D101
  class UnavailableAugmenter (line 14) | class UnavailableAugmenter(ValueError):  # noqa: D101
  function augment_text (line 18) | def augment_text(
  function process_entities_and_text (line 63) | def process_entities_and_text(
  function are_entities_in_augmented_text (line 108) | def are_entities_in_augmented_text(entities: List[Dict[str, Any]], augme...
  function get_augmenter (line 142) | def get_augmenter(method: str, stopwords: Optional[List[str]] = None) ->...
  function get_augmented_entities (line 168) | def get_augmented_entities(
  function clean_sentence_entities (line 212) | def clean_sentence_entities(text: str, entities: List[Dict[str, Any]]) -...
  function check_interval_included (line 254) | def check_interval_included(

FILE: nlpretext/basic/preprocess.py
  function normalize_whitespace (line 30) | def normalize_whitespace(text: str) -> str:
  function remove_whitespace (line 56) | def remove_whitespace(text: str) -> str:
  function lower_text (line 75) | def lower_text(text: str) -> str:
  function filter_groups (line 90) | def filter_groups(token: str, ignored_stopwords: Optional[List[str]] = N...
  function ungroup_ignored_stopwords (line 112) | def ungroup_ignored_stopwords(
  function remove_stopwords (line 132) | def remove_stopwords(
  function remove_eol_characters (line 189) | def remove_eol_characters(text: str) -> str:
  function fix_bad_unicode (line 205) | def fix_bad_unicode(text: str, normalization: str = "NFC") -> str:
  function unpack_english_contractions (line 238) | def unpack_english_contractions(text: str) -> str:
  function replace_urls (line 282) | def replace_urls(text: str, replace_with: str = "*URL*") -> str:
  function replace_emails (line 305) | def replace_emails(text: str, replace_with: str = "*EMAIL*") -> str:
  function replace_phone_numbers (line 328) | def replace_phone_numbers(
  function replace_numbers (line 376) | def replace_numbers(text: str, replace_with: str = "*NUMBER*") -> str:
  function replace_currency_symbols (line 399) | def replace_currency_symbols(text: str, replace_with: Optional[str] = No...
  function remove_punct (line 431) | def remove_punct(text: str, marks: Optional[str] = None) -> str:
  function remove_accents (line 463) | def remove_accents(text: str, method: str = "unicode") -> str:
  function remove_multiple_spaces_and_strip_text (line 502) | def remove_multiple_spaces_and_strip_text(text: str) -> str:
  function filter_non_latin_characters (line 523) | def filter_non_latin_characters(text: str) -> str:

FILE: nlpretext/cli/__main__.py
  function version_callback (line 17) | def version_callback(value: bool) -> None:

FILE: nlpretext/cli/preprocess.py
  function run (line 13) | def run(

FILE: nlpretext/preprocessor.py
  class Preprocessor (line 14) | class Preprocessor:
    method __init__ (line 15) | def __init__(self):
    method pipe (line 20) | def pipe(self, operation: Callable[[Any], Any], args: Optional[Dict[st...
    method build_pipeline (line 33) | def build_pipeline(operation_list: List[Dict[Any, Any]]) -> Pipeline:
    method run (line 56) | def run(self, text: str) -> str:

FILE: nlpretext/social/preprocess.py
  function remove_mentions (line 24) | def remove_mentions(text: str) -> str:
  function extract_mentions (line 40) | def extract_mentions(text: str) -> List[str]:
  function remove_html_tags (line 56) | def remove_html_tags(text: str) -> str:
  function remove_emoji (line 72) | def remove_emoji(text: str) -> str:
  function convert_emoji_to_text (line 92) | def convert_emoji_to_text(text: str, code_delimiters: Tuple[str, str] = ...
  function extract_emojis (line 112) | def extract_emojis(text: str) -> List[str]:
  function extract_hashtags (line 133) | def extract_hashtags(text: str) -> List[str]:
  function remove_hashtag (line 150) | def remove_hashtag(text: str) -> str:

FILE: nlpretext/textloader.py
  class TextLoader (line 36) | class TextLoader:
    method __init__ (line 37) | def __init__(self, text_column="text", encoding="utf-8", file_format=N...
    method __repr__ (line 72) | def __repr__(self):
    method _read_text_txt (line 82) | def _read_text_txt(self, files_path):
    method _read_text_json (line 99) | def _read_text_json(self, files_path):
    method _read_text_csv (line 118) | def _read_text_csv(self, files_path):
    method _read_text_parquet (line 137) | def _read_text_parquet(self, files_path):
    method read_text (line 156) | def read_text(

FILE: nlpretext/token/preprocess.py
  function remove_stopwords (line 24) | def remove_stopwords(
  function remove_tokens_with_nonletters (line 57) | def remove_tokens_with_nonletters(tokens: List[str]) -> List[str]:
  function remove_special_caracters_from_tokenslist (line 77) | def remove_special_caracters_from_tokenslist(tokens: List[str]) -> List[...
  function remove_smallwords (line 97) | def remove_smallwords(tokens: List[str], smallwords_threshold: int) -> L...

FILE: nlpretext/token/tokenizer.py
  class LanguageNotHandled (line 33) | class LanguageNotHandled(Exception):
  class LanguageNotInstalledError (line 37) | class LanguageNotInstalledError(Exception):
  class SpacyModel (line 41) | class SpacyModel:
    class SingletonSpacyModel (line 42) | class SingletonSpacyModel:
      method __init__ (line 43) | def __init__(self, lang: str) -> None:
    method __init__ (line 58) | def __init__(self, lang):
    method get_lang_model (line 62) | def get_lang_model(self) -> Optional[str]:  # noqa: D102
  function _load_spacy_model (line 69) | def _load_spacy_model(model: str) -> Any:
  function _get_spacy_tokenizer (line 83) | def _get_spacy_tokenizer(lang: str) -> Optional[spacy.tokenizer.Tokenizer]:
  function tokenize (line 103) | def tokenize(text: str, lang_module: str = "en_spacy") -> List[str]:
  function untokenize (line 145) | def untokenize(tokens: List[str], lang: str = "fr") -> str:
  function convert_tokens_to_string (line 165) | def convert_tokens_to_string(tokens_or_str: Optional[Union[str, List[str...
  function convert_string_to_tokens (line 175) | def convert_string_to_tokens(  # noqa: D103

FILE: tests/test_data_augmentation.py
  function test_process_entities_and_text_not_altered (line 35) | def test_process_entities_and_text_not_altered(text, text_augmented, ent...
  function test_process_entities_and_text_altered (line 54) | def test_process_entities_and_text_altered(text, text_augmented, entities):
  function test_get_augmenter (line 62) | def test_get_augmenter():

FILE: tests/test_file_loader.py
  function create_files (line 27) | def create_files():
  function test_detect_encoding (line 38) | def test_detect_encoding():
  function remove_files (line 46) | def remove_files():
  function test_check_text_file_format (line 105) | def test_check_text_file_format(input_filepath, raising, expected_str):

FILE: tests/test_phone_number.py
  function test_extract_phone_number (line 22) | def test_extract_phone_number():
  function test_extract_phone_number_us (line 29) | def test_extract_phone_number_us():
  function test_extract_phone_number_fr (line 36) | def test_extract_phone_number_fr():
  function test_extract_phone_number_international (line 43) | def test_extract_phone_number_international():
  function test_phone_parser_us (line 50) | def test_phone_parser_us():
  function test_phone_parser_fr (line 59) | def test_phone_parser_fr():

FILE: tests/test_preprocessor.py
  function test_extract_emojis (line 65) | def test_extract_emojis(text, expected_result):
  function test_remove_mentions (line 77) | def test_remove_mentions(text, expected_result):
  function test_extract_mentions (line 89) | def test_extract_mentions(text, expected_result):
  function test_remove_html_tags (line 104) | def test_remove_html_tags(text, expected_result):
  function test_remove_smallwords (line 120) | def test_remove_smallwords(tokens_list, smallwords_threshold, expected_r...
  function test_extract_hashtags (line 135) | def test_extract_hashtags(text, expected_result):
  function test_remove_hashtag (line 153) | def test_remove_hashtag(text, expected_result):
  function test_filter_non_latin_characters (line 167) | def test_filter_non_latin_characters(text, expected_filtered_text):
  function test_remove_multiple_spaces_and_strip_text (line 182) | def test_remove_multiple_spaces_and_strip_text(input_str, expected_str):
  function test_remove_eol_characters (line 195) | def test_remove_eol_characters(input_str, expected_str):
  function test_remove_tokens_with_nonletters (line 200) | def test_remove_tokens_with_nonletters():
  function test_remove_special_caracters_from_tokenslist (line 207) | def test_remove_special_caracters_from_tokenslist():
  function test_get_stopwords (line 214) | def test_get_stopwords():
  function test_remove_stopwords_tokens (line 225) | def test_remove_stopwords_tokens(input_tokens, lang, expected_output):
  function test_remove_stopwords_text (line 250) | def test_remove_stopwords_text(
  function test_remove_custom_stopwords_text (line 269) | def test_remove_custom_stopwords_text(input_text, lang, custom_stopwords...
  function test_remove_accents (line 274) | def test_remove_accents():
  function test_fix_bad_unicode (line 306) | def test_fix_bad_unicode(input_str, expected_str):
  function test_normalize_whitespace (line 315) | def test_normalize_whitespace(input_str, expected_str):
  function test_unpack_english_contractions (line 331) | def test_unpack_english_contractions(input_str, expected_str):
  function test_replace_urls (line 352) | def test_replace_urls(input_str, expected_str):
  function test_replace_emails (line 365) | def test_replace_emails(input_str, expected_str):
  function test_replace_phone_numbers (line 388) | def test_replace_phone_numbers(input_str, expected_str):
  function test_replace_numbers (line 406) | def test_replace_numbers(input_str, expected_str):
  function test_replace_currency_symbols (line 425) | def test_replace_currency_symbols(input_str, param, expected_str):
  function test_remove_punct (line 451) | def test_remove_punct(input_str, param, expected_str):
  function test_remove_emoji (line 466) | def test_remove_emoji(input_str, expected_str):
  function test_convert_emoji_to_text (line 481) | def test_convert_emoji_to_text(input_str, expected_str):
  function test_custom_preprocess (line 486) | def test_custom_preprocess():
  function test_apply_preprocessor (line 514) | def test_apply_preprocessor(input_str, expected_str):

FILE: tests/test_textloader.py
  function test__read_text_txt_dask (line 43) | def test__read_text_txt_dask(mock_read_text):
  function test__read_text_txt_pandas (line 66) | def test__read_text_txt_pandas(mock_read_text):
  function test__read_text_json_dask (line 95) | def test__read_text_json_dask(mock_read):
  function test__read_text_json_pandas (line 120) | def test__read_text_json_pandas(mock_read):
  function test__read_text_csv_dask (line 140) | def test__read_text_csv_dask(mock_read_csv):
  function test__read_text_csv_pandas (line 165) | def test__read_text_csv_pandas(mock_read):
  function test__read_text_parquet_dask (line 185) | def test__read_text_parquet_dask(mock_read_parquet):
  function test__read_text_parquet_pandas (line 210) | def test__read_text_parquet_pandas(mock_read):
  function test_read_text (line 257) | def test_read_text(

FILE: tests/test_tokenizer.py
  function test_load_spacy_model_validation (line 16) | def test_load_spacy_model_validation(bad_model_name):