gitextract_xuh1h2o1/

├── .devcontainer/
│   └── devcontainer.json
├── .gitignore
├── .streamlit/
│   └── config.toml
├── README.md
├── app/
│   ├── __init__.py
│   ├── pages/
│   │   ├── __init__.py
│   │   └── graph_visualization.py
│   └── streamlit_app.py
├── config.yaml
├── data/
│   ├── enriched/
│   │   └── papers_data_enriched.csv
│   ├── inputs/
│   │   └── papers_data.csv
│   └── outputs/
│       ├── sbert_embeddings.pkl
│       ├── sbert_model/
│       │   ├── 1_Pooling/
│       │   │   └── config.json
│       │   ├── README.md
│       │   ├── config.json
│       │   ├── config_sentence_transformers.json
│       │   ├── model.safetensors
│       │   ├── modules.json
│       │   ├── sentence_bert_config.json
│       │   ├── special_tokens_map.json
│       │   ├── tokenizer.json
│       │   ├── tokenizer_config.json
│       │   └── vocab.txt
│       ├── tfidf_matrix.pkl
│       ├── tfidf_vectorizer.pkl
│       └── train_data_with_clean.csv
├── i18n/
│   └── en.json
├── instruction.md
├── main.py
├── packages.txt
├── pipelines.py
├── requirements.txt
└── src/
    ├── __init__.py
    ├── config_loader.py
    ├── enrich_papers_incremental.py
    ├── extract_papers_to_csv.py
    ├── inference.py
    ├── preprocessing.py
    └── training.py