SYMBOL INDEX (104 symbols across 13 files)

FILE: generate.py
  function closest_synonym (line 69) | def closest_synonym(word):
  function html_replace (line 86) | def html_replace(text):
  function openai_backoff (line 93) | def openai_backoff(**kwargs):
  function round_to_100 (line 97) | def round_to_100(n):
  function get_wp_prompts (line 101) | def get_wp_prompts(words, prompt):
  function get_reuter_prompts (line 112) | def get_reuter_prompts(words, headline):
  function get_essay_prompts (line 123) | def get_essay_prompts(words, prompts):
  function generate_logprobs (line 134) | def generate_logprobs(generate_dataset_fn, llama_7b_model=None, llama_13...
  function format_prompt (line 195) | def format_prompt(p):
  function perturb_char_basic (line 549) | def perturb_char_basic(doc, n=1):
  function perturb_char_space (line 570) | def perturb_char_space(doc, n=1):
  function perturb_char_cap (line 588) | def perturb_char_cap(doc, n=1):
  function perturb_word_adj (line 601) | def perturb_word_adj(doc, n=1):
  function perturb_word_syn (line 613) | def perturb_word_syn(doc, n=1):
  function paraphrase (line 707) | def paraphrase(text):
  function perturb_sent_adj (line 715) | def perturb_sent_adj(doc, n=1):
  function perturb_sent_paraph (line 729) | def perturb_sent_paraph(doc, n=1):
  function perturb_para_adj (line 743) | def perturb_para_adj(doc, n=1):
  function perturb_para_paraph (line 756) | def perturb_para_paraph(doc, n=1):

FILE: llama.py
  function get_words (line 84) | def get_words(exp):
  function backtrack_functions (line 91) | def backtrack_functions(
  function score_ngram (line 119) | def score_ngram(doc, model, tokenizer, n=3):
  function get_all_logprobs (line 134) | def get_all_logprobs(
  function get_indices (line 191) | def get_indices(filter_fn):
  function calc_features (line 240) | def calc_features(file, exp):
  function get_exp_featurize (line 278) | def get_exp_featurize(best_features, vector_map):
  function train_llama (line 298) | def train_llama(data, train, test):

FILE: roberta/run_roberta.py
  class RobertaDataset (line 50) | class RobertaDataset(TorchDataset):
    method __init__ (line 51) | def __init__(self, texts, labels):
    method __len__ (line 55) | def __len__(self):
    method __getitem__ (line 58) | def __getitem__(self, idx):
  function get_scores (line 74) | def get_scores(labels, probabilities, calibrated=False, precision=6):
  function train_roberta_model (line 96) | def train_roberta_model(train_text, train_labels, output_dir, max_epochs...
  function run_roberta_model (line 156) | def run_roberta_model(model_name, texts, labels):
  function train_roberta_gen (line 213) | def train_roberta_gen(
  function get_data (line 251) | def get_data(gen_fn, indices=None, filter_fn=lambda f: True):

FILE: roberta/train.py
  class RobertaDataset (line 46) | class RobertaDataset(TorchDataset):
    method __init__ (line 47) | def __init__(self, texts, labels):
    method __len__ (line 51) | def __len__(self):
    method __getitem__ (line 54) | def __getitem__(self, idx):
  function train_roberta_model (line 70) | def train_roberta_model(train_text, train_labels, output_dir):
  function get_indices (line 160) | def get_indices(filter_fn):
  function get_texts (line 168) | def get_texts(indices):

FILE: run.py
  class RobertaDataset (line 101) | class RobertaDataset(TorchDataset):
    method __init__ (line 102) | def __init__(self, texts, labels):
    method __len__ (line 106) | def __len__(self):
    method __getitem__ (line 109) | def __getitem__(self, idx):
  function get_scores (line 124) | def get_scores(labels, probabilities, calibrated=False, precision=6):
  function get_featurized_data (line 217) | def get_featurized_data(best_features, gpt_only=False):
  function get_indices (line 230) | def get_indices(filter_fn):
  function get_roberta_predictions (line 271) | def get_roberta_predictions(data, train, test, domain):
  function train_ghostbuster (line 302) | def train_ghostbuster(data, train, test, domain):
  function train_perplexity (line 309) | def train_perplexity(data, train, test, domain):
  function run_experiment (line 315) | def run_experiment(best_features, model_name, train_fn, gpt_only=True):
  function train_ghostbuster_no_handcrafted (line 478) | def train_ghostbuster_no_handcrafted(data, train, test, domain):
  function train_ghostbuster_no_symbolic (line 490) | def train_ghostbuster_no_symbolic(data, train, test, domain):
  function get_data (line 536) | def get_data(generate_dataset_fn, best_features):
  function evaluate_on_dataset (line 552) | def evaluate_on_dataset(
  function get_data (line 770) | def get_data(generate_dataset_fn, best_features):
  function get_data (line 1122) | def get_data(generate_dataset_fn, best_features):
  function get_perturb_data (line 1142) | def get_perturb_data(perturb_names, perturb_sizes, save_file):
  function calculate_ece (line 1212) | def calculate_ece(y_true, y_probs, n_bins=10):
  function train_ghostbuster_ece (line 1232) | def train_ghostbuster_ece(data, train, test, domain):
  function train_ghostbuster_calibrated_ece (line 1239) | def train_ghostbuster_calibrated_ece(data, train, test, domain):

FILE: train.py
  function get_featurized_data (line 62) | def get_featurized_data(generate_dataset_fn, best_features):

FILE: utils/featurize.py
  function get_logprobs (line 8) | def get_logprobs(file):
  function get_tokens (line 22) | def get_tokens(file):
  function get_token_len (line 31) | def get_token_len(tokens):
  function get_diff (line 48) | def get_diff(file1, file2):
  function convolve (line 55) | def convolve(X, window=100):
  function score_ngram (line 65) | def score_ngram(doc, model, tokenizer, n=3, strip_first=False):
  function normalize (line 78) | def normalize(data, mu=None, sigma=None, ret_mu_sigma=False):
  function convert_file_to_logprob_file (line 95) | def convert_file_to_logprob_file(file_name, model):
  function t_featurize_logprobs (line 113) | def t_featurize_logprobs(davinci_logprobs, ada_logprobs, tokens):
  function t_featurize (line 139) | def t_featurize(file, num_tokens=2048):
  function select_features (line 153) | def select_features(exp_to_data, labels, verbose=True, to_normalize=True...

FILE: utils/generate.py
  function round_up (line 14) | def round_up(x, base=50):
  function openai_backoff (line 19) | def openai_backoff(**kwargs):
  function generate_documents (line 23) | def generate_documents(output_dir, prompts, verbose=True, force_regenera...

FILE: utils/load.py
  class Dataset (line 12) | class Dataset:
  function get_generate_dataset_normal (line 17) | def get_generate_dataset_normal(path: str, verbose=False):
  function get_generate_dataset_author (line 29) | def get_generate_dataset_author(path: str, author: str, verbose=False):
  function get_generate_dataset (line 48) | def get_generate_dataset(*datasets: Dataset):

FILE: utils/n_gram.py
  class NGramModel (line 5) | class NGramModel:
    method __init__ (line 10) | def __init__(self, train_text, n=2, alpha=3e-3, vocab_size=None):
    method n_gram_probability (line 26) | def n_gram_probability(self, n_gram):
  class DiscountBackoffModel (line 33) | class DiscountBackoffModel(NGramModel):
    method __init__ (line 38) | def __init__(self, train_text, lower_order_model, n=2, delta=0.9):
    method n_gram_probability (line 43) | def n_gram_probability(self, n_gram):
  class KneserNeyBaseModel (line 59) | class KneserNeyBaseModel(NGramModel):
    method __init__ (line 64) | def __init__(self, train_text, vocab_size=None):
    method n_gram_probability (line 79) | def n_gram_probability(self, n_gram):
  class TrigramBackoff (line 89) | class TrigramBackoff:
    method __init__ (line 94) | def __init__(self, train_text, delta=0.9):
    method n_gram_probability (line 101) | def n_gram_probability(self, n_gram):

FILE: utils/score.py
  function k_fold_score (line 7) | def k_fold_score(X, labels, indices=None, k=8, precision=10):

FILE: utils/symbolic.py
  function get_words (line 55) | def get_words(exp):
  function backtrack_functions (line 62) | def backtrack_functions(
  function train_trigram (line 96) | def train_trigram(verbose=True, return_tokenizer=False):
  function get_all_logprobs (line 124) | def get_all_logprobs(
  function generate_symbolic_data (line 164) | def generate_symbolic_data(
  function get_exp_featurize (line 219) | def get_exp_featurize(best_features, vector_map):

FILE: utils/write_logprobs.py
  function write_logprobs (line 22) | def write_logprobs(text, file, model):
  function write_llama_logprobs (line 54) | def write_llama_logprobs(text, file, model):