SYMBOL INDEX (54 symbols across 11 files)

FILE: prompt_builder/augment_with_cfc.py
  function get_crossfile_context_from_chunks (line 41) | def get_crossfile_context_from_chunks(
  function read_project_files (line 148) | def read_project_files(repo_name, lang):
  function find_files_within_distance_k (line 185) | def find_files_within_distance_k(current_file_path, filelist, k):
  function get_cfc (line 202) | def get_cfc(example, args, semantic_ranker, repositories):
  function attach_data (line 263) | def attach_data(args, srcfile):

FILE: prompt_builder/rerank_utils.py
  function jaccard_similarity (line 24) | def jaccard_similarity(tokenized_query, tokenized_doc, containment=False):
  function tokenize_corpus (line 32) | def tokenize_corpus(corpus, tokenizer_fn):
  function tokenize_query_and_docs (line 38) | def tokenize_query_and_docs(query, docs):
  function lexical_ranking (line 44) | def lexical_ranking(
  class SemanticReranking (line 85) | class SemanticReranking:
    method __init__ (line 87) | def __init__(self, model_type="unixcoder", **kwargs):
    method text_to_tensor (line 98) | def text_to_tensor(
    method get_pad_id (line 121) | def get_pad_id(self):
    method get_attn_mask (line 124) | def get_attn_mask(self, tokens_tensor):
    method get_representations (line 127) | def get_representations(self, list_input_ids, gpu_id):
    method rerank (line 165) | def rerank(self, query: str, docs: List[str], doc_ids: List[str] = Non...

FILE: prompt_builder/utils.py
  function tokenize_nltk (line 20) | def tokenize_nltk(text):
  function file_distance (line 29) | def file_distance(src_file, dest_file):
  function str2bool (line 43) | def str2bool(v):

FILE: scripts/build_ts_lib.py
  function build_language_lib (line 7) | def build_language_lib():

FILE: scripts/custom_generate.py
  function generate (line 86) | def generate(
  function sample (line 732) | def sample(

FILE: scripts/eval.py
  function custom_data_collator (line 52) | def custom_data_collator(features):
  function build_datasets (line 69) | def build_datasets(args, tokenizer):
  function model_inference (line 187) | def model_inference(tokenized_datasets, index2taskid, tokenizer):

FILE: scripts/eval_metric.py
  function compute_id_match (line 19) | def compute_id_match(pred_ids, target_ids):
  function compute_edit_sim (line 36) | def compute_edit_sim(samples):
  function process_examples (line 44) | def process_examples(lang, args):
  function compute_metric_stmt (line 70) | def compute_metric_stmt(args):

FILE: scripts/eval_utils.py
  function cal_edit_sim (line 44) | def cal_edit_sim(references, hypotheses):
  function split_identifier_into_parts (line 55) | def split_identifier_into_parts(identifier: str) -> List[str]:
  function is_identifier (line 77) | def is_identifier(token, lang=None):
  function extract_identifiers (line 83) | def extract_identifiers(source_code, lang):
  function tokenize_string (line 92) | def tokenize_string(input_str):
  function get_bracket_lang_statement (line 96) | def get_bracket_lang_statement(completion):
  function get_ast (line 106) | def get_ast(parser, code):
  function remove_comments (line 117) | def remove_comments(code):
  function is_parse_valid (line 123) | def is_parse_valid(parser, code):
  function is_code_parseable (line 142) | def is_code_parseable(code):
  function get_python_one_statement (line 150) | def get_python_one_statement(prompt, completion, parser):
  function postprocess_code_lines (line 161) | def postprocess_code_lines(prompt, completion, parser, lang):
  function compute_mean_logp (line 171) | def compute_mean_logp(scores, sequences, pad_token_id):

FILE: scripts/keywords/keywordlist.py
  function get_language_keywords (line 30) | def get_language_keywords(language: str) -> FrozenSet[str]:

FILE: scripts/openai_inference.py
  function query (line 30) | def query(
  function query_with_retry (line 57) | def query_with_retry(
  function truncate (line 109) | def truncate(prompt: str, max_num_tokens: int, tokenizer, side: str) -> ...
  function prepare_prompt (line 130) | def prepare_prompt(
  function get_openai_response (line 152) | def get_openai_response(
  function get_openai_responses (line 178) | def get_openai_responses(
  function main (line 209) | def main():

FILE: scripts/vllm_inference.py
  function truncate (line 21) | def truncate(prompt: str, max_num_tokens: int, side: str, tokenizer) -> ...
  function prepare_prompt (line 40) | def prepare_prompt(
  function cceval_generate (line 62) | def cceval_generate(
  function main (line 99) | def main():