Copy disabled (too large)
Download .txt
Showing preview only (16,018K chars total). Download the full file to get everything.
Repository: K-Dense-AI/claude-scientific-skills
Branch: main
Commit: 1346c01d9d72
Files: 1340
Total size: 16.4 MB
Directory structure:
gitextract_rdf422j9/
├── .claude-plugin/
│ └── marketplace.json
├── .gitattributes
├── .github/
│ └── workflows/
│ └── release.yml
├── .gitignore
├── LICENSE.md
├── README.md
├── docs/
│ ├── examples.md
│ ├── open-source-sponsors.md
│ └── scientific-skills.md
└── scientific-skills/
├── adaptyv/
│ ├── SKILL.md
│ └── reference/
│ ├── api_reference.md
│ ├── examples.md
│ ├── experiments.md
│ └── protein_optimization.md
├── aeon/
│ ├── SKILL.md
│ └── references/
│ ├── anomaly_detection.md
│ ├── classification.md
│ ├── clustering.md
│ ├── datasets_benchmarking.md
│ ├── distances.md
│ ├── forecasting.md
│ ├── networks.md
│ ├── regression.md
│ ├── segmentation.md
│ ├── similarity_search.md
│ └── transformations.md
├── alpha-vantage/
│ ├── SKILL.md
│ └── references/
│ ├── commodities.md
│ ├── economic-indicators.md
│ ├── forex-crypto.md
│ ├── fundamentals.md
│ ├── intelligence.md
│ ├── options.md
│ ├── technical-indicators.md
│ └── time-series.md
├── alphafold-database/
│ ├── SKILL.md
│ └── references/
│ └── api_reference.md
├── anndata/
│ ├── SKILL.md
│ └── references/
│ ├── best_practices.md
│ ├── concatenation.md
│ ├── data_structure.md
│ ├── io_operations.md
│ └── manipulation.md
├── arboreto/
│ ├── SKILL.md
│ ├── references/
│ │ ├── algorithms.md
│ │ ├── basic_inference.md
│ │ └── distributed_computing.md
│ └── scripts/
│ └── basic_grn_inference.py
├── arxiv-database/
│ ├── SKILL.md
│ ├── references/
│ │ └── api_reference.md
│ └── scripts/
│ └── arxiv_search.py
├── astropy/
│ ├── SKILL.md
│ └── references/
│ ├── coordinates.md
│ ├── cosmology.md
│ ├── fits.md
│ ├── tables.md
│ ├── time.md
│ ├── units.md
│ └── wcs_and_other_modules.md
├── benchling-integration/
│ ├── SKILL.md
│ └── references/
│ ├── api_endpoints.md
│ ├── authentication.md
│ └── sdk_reference.md
├── bgpt-paper-search/
│ └── SKILL.md
├── bindingdb-database/
│ ├── SKILL.md
│ └── references/
│ └── affinity_queries.md
├── biopython/
│ ├── SKILL.md
│ └── references/
│ ├── advanced.md
│ ├── alignment.md
│ ├── blast.md
│ ├── databases.md
│ ├── phylogenetics.md
│ ├── sequence_io.md
│ └── structure.md
├── biorxiv-database/
│ ├── SKILL.md
│ ├── references/
│ │ └── api_reference.md
│ └── scripts/
│ └── biorxiv_search.py
├── bioservices/
│ ├── SKILL.md
│ ├── references/
│ │ ├── identifier_mapping.md
│ │ ├── services_reference.md
│ │ └── workflow_patterns.md
│ └── scripts/
│ ├── batch_id_converter.py
│ ├── compound_cross_reference.py
│ ├── pathway_analysis.py
│ └── protein_analysis_workflow.py
├── brenda-database/
│ ├── SKILL.md
│ ├── references/
│ │ └── api_reference.md
│ └── scripts/
│ ├── brenda_queries.py
│ ├── brenda_visualization.py
│ └── enzyme_pathway_builder.py
├── cbioportal-database/
│ ├── SKILL.md
│ └── references/
│ └── study_exploration.md
├── cellxgene-census/
│ ├── SKILL.md
│ └── references/
│ ├── census_schema.md
│ └── common_patterns.md
├── chembl-database/
│ ├── SKILL.md
│ ├── references/
│ │ └── api_reference.md
│ └── scripts/
│ └── example_queries.py
├── cirq/
│ ├── SKILL.md
│ └── references/
│ ├── building.md
│ ├── experiments.md
│ ├── hardware.md
│ ├── noise.md
│ ├── simulation.md
│ └── transformation.md
├── citation-management/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── bibtex_template.bib
│ │ └── citation_checklist.md
│ ├── references/
│ │ ├── bibtex_formatting.md
│ │ ├── citation_validation.md
│ │ ├── google_scholar_search.md
│ │ ├── metadata_extraction.md
│ │ └── pubmed_search.md
│ └── scripts/
│ ├── doi_to_bibtex.py
│ ├── extract_metadata.py
│ ├── format_bibtex.py
│ ├── search_google_scholar.py
│ ├── search_pubmed.py
│ └── validate_citations.py
├── clinical-decision-support/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── biomarker_report_template.tex
│ │ ├── clinical_pathway_template.tex
│ │ ├── cohort_analysis_template.tex
│ │ ├── color_schemes.tex
│ │ ├── example_gbm_cohort.md
│ │ ├── recommendation_strength_guide.md
│ │ └── treatment_recommendation_template.tex
│ ├── references/
│ │ ├── README.md
│ │ ├── biomarker_classification.md
│ │ ├── clinical_decision_algorithms.md
│ │ ├── evidence_synthesis.md
│ │ ├── outcome_analysis.md
│ │ ├── patient_cohort_analysis.md
│ │ └── treatment_recommendations.md
│ └── scripts/
│ ├── biomarker_classifier.py
│ ├── build_decision_tree.py
│ ├── create_cohort_tables.py
│ ├── generate_survival_analysis.py
│ └── validate_cds_document.py
├── clinical-reports/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── case_report_template.md
│ │ ├── clinical_trial_csr_template.md
│ │ ├── clinical_trial_sae_template.md
│ │ ├── consult_note_template.md
│ │ ├── discharge_summary_template.md
│ │ ├── hipaa_compliance_checklist.md
│ │ ├── history_physical_template.md
│ │ ├── lab_report_template.md
│ │ ├── pathology_report_template.md
│ │ ├── quality_checklist.md
│ │ ├── radiology_report_template.md
│ │ └── soap_note_template.md
│ ├── references/
│ │ ├── README.md
│ │ ├── case_report_guidelines.md
│ │ ├── clinical_trial_reporting.md
│ │ ├── data_presentation.md
│ │ ├── diagnostic_reports_standards.md
│ │ ├── medical_terminology.md
│ │ ├── patient_documentation.md
│ │ ├── peer_review_standards.md
│ │ └── regulatory_compliance.md
│ └── scripts/
│ ├── check_deidentification.py
│ ├── compliance_checker.py
│ ├── extract_clinical_data.py
│ ├── format_adverse_events.py
│ ├── generate_report_template.py
│ ├── terminology_validator.py
│ ├── validate_case_report.py
│ └── validate_trial_report.py
├── clinicaltrials-database/
│ ├── SKILL.md
│ ├── references/
│ │ └── api_reference.md
│ └── scripts/
│ └── query_clinicaltrials.py
├── clinpgx-database/
│ ├── SKILL.md
│ ├── references/
│ │ └── api_reference.md
│ └── scripts/
│ └── query_clinpgx.py
├── clinvar-database/
│ ├── SKILL.md
│ └── references/
│ ├── api_reference.md
│ ├── clinical_significance.md
│ └── data_formats.md
├── cobrapy/
│ ├── SKILL.md
│ └── references/
│ ├── api_quick_reference.md
│ └── workflows.md
├── consciousness-council/
│ ├── SKILL.md
│ └── references/
│ └── advanced-configurations.md
├── cosmic-database/
│ ├── SKILL.md
│ ├── references/
│ │ └── cosmic_data_reference.md
│ └── scripts/
│ └── download_cosmic.py
├── dask/
│ ├── SKILL.md
│ └── references/
│ ├── arrays.md
│ ├── bags.md
│ ├── best-practices.md
│ ├── dataframes.md
│ ├── futures.md
│ └── schedulers.md
├── datacommons-client/
│ ├── SKILL.md
│ └── references/
│ ├── getting_started.md
│ ├── node.md
│ ├── observation.md
│ └── resolve.md
├── datamol/
│ ├── SKILL.md
│ └── references/
│ ├── conformers_module.md
│ ├── core_api.md
│ ├── descriptors_viz.md
│ ├── fragments_scaffolds.md
│ ├── io_module.md
│ └── reactions_data.md
├── deepchem/
│ ├── SKILL.md
│ ├── references/
│ │ ├── api_reference.md
│ │ └── workflows.md
│ └── scripts/
│ ├── graph_neural_network.py
│ ├── predict_solubility.py
│ └── transfer_learning.py
├── deeptools/
│ ├── SKILL.md
│ ├── assets/
│ │ └── quick_reference.md
│ ├── references/
│ │ ├── effective_genome_sizes.md
│ │ ├── normalization_methods.md
│ │ ├── tools_reference.md
│ │ └── workflows.md
│ └── scripts/
│ ├── validate_files.py
│ └── workflow_generator.py
├── denario/
│ ├── SKILL.md
│ └── references/
│ ├── examples.md
│ ├── installation.md
│ ├── llm_configuration.md
│ └── research_pipeline.md
├── depmap/
│ ├── SKILL.md
│ └── references/
│ └── dependency_analysis.md
├── dhdna-profiler/
│ ├── SKILL.md
│ └── references/
│ └── advanced-profiling.md
├── diffdock/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── batch_template.csv
│ │ └── custom_inference_config.yaml
│ ├── references/
│ │ ├── confidence_and_limitations.md
│ │ ├── parameters_reference.md
│ │ └── workflows_examples.md
│ └── scripts/
│ ├── analyze_results.py
│ ├── prepare_batch_csv.py
│ └── setup_check.py
├── dnanexus-integration/
│ ├── SKILL.md
│ └── references/
│ ├── app-development.md
│ ├── configuration.md
│ ├── data-operations.md
│ ├── job-execution.md
│ └── python-sdk.md
├── docx/
│ ├── LICENSE.txt
│ ├── SKILL.md
│ └── scripts/
│ ├── __init__.py
│ ├── accept_changes.py
│ ├── comment.py
│ ├── office/
│ │ ├── helpers/
│ │ │ ├── __init__.py
│ │ │ ├── merge_runs.py
│ │ │ └── simplify_redlines.py
│ │ ├── pack.py
│ │ ├── schemas/
│ │ │ ├── ISO-IEC29500-4_2016/
│ │ │ │ ├── dml-chart.xsd
│ │ │ │ ├── dml-chartDrawing.xsd
│ │ │ │ ├── dml-diagram.xsd
│ │ │ │ ├── dml-lockedCanvas.xsd
│ │ │ │ ├── dml-main.xsd
│ │ │ │ ├── dml-picture.xsd
│ │ │ │ ├── dml-spreadsheetDrawing.xsd
│ │ │ │ ├── dml-wordprocessingDrawing.xsd
│ │ │ │ ├── pml.xsd
│ │ │ │ ├── shared-additionalCharacteristics.xsd
│ │ │ │ ├── shared-bibliography.xsd
│ │ │ │ ├── shared-commonSimpleTypes.xsd
│ │ │ │ ├── shared-customXmlDataProperties.xsd
│ │ │ │ ├── shared-customXmlSchemaProperties.xsd
│ │ │ │ ├── shared-documentPropertiesCustom.xsd
│ │ │ │ ├── shared-documentPropertiesExtended.xsd
│ │ │ │ ├── shared-documentPropertiesVariantTypes.xsd
│ │ │ │ ├── shared-math.xsd
│ │ │ │ ├── shared-relationshipReference.xsd
│ │ │ │ ├── sml.xsd
│ │ │ │ ├── vml-main.xsd
│ │ │ │ ├── vml-officeDrawing.xsd
│ │ │ │ ├── vml-presentationDrawing.xsd
│ │ │ │ ├── vml-spreadsheetDrawing.xsd
│ │ │ │ ├── vml-wordprocessingDrawing.xsd
│ │ │ │ ├── wml.xsd
│ │ │ │ └── xml.xsd
│ │ │ ├── ecma/
│ │ │ │ └── fouth-edition/
│ │ │ │ ├── opc-contentTypes.xsd
│ │ │ │ ├── opc-coreProperties.xsd
│ │ │ │ ├── opc-digSig.xsd
│ │ │ │ └── opc-relationships.xsd
│ │ │ ├── mce/
│ │ │ │ └── mc.xsd
│ │ │ └── microsoft/
│ │ │ ├── wml-2010.xsd
│ │ │ ├── wml-2012.xsd
│ │ │ ├── wml-2018.xsd
│ │ │ ├── wml-cex-2018.xsd
│ │ │ ├── wml-cid-2016.xsd
│ │ │ ├── wml-sdtdatahash-2020.xsd
│ │ │ └── wml-symex-2015.xsd
│ │ ├── soffice.py
│ │ ├── unpack.py
│ │ ├── validate.py
│ │ └── validators/
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── docx.py
│ │ ├── pptx.py
│ │ └── redlining.py
│ └── templates/
│ ├── comments.xml
│ ├── commentsExtended.xml
│ ├── commentsExtensible.xml
│ ├── commentsIds.xml
│ └── people.xml
├── drugbank-database/
│ ├── SKILL.md
│ ├── references/
│ │ ├── chemical-analysis.md
│ │ ├── data-access.md
│ │ ├── drug-queries.md
│ │ ├── interactions.md
│ │ └── targets-pathways.md
│ └── scripts/
│ └── drugbank_helper.py
├── edgartools/
│ ├── SKILL.md
│ └── references/
│ ├── ai-integration.md
│ ├── companies.md
│ ├── data-objects.md
│ ├── entity-facts.md
│ ├── filings.md
│ ├── financial-data.md
│ └── xbrl.md
├── ena-database/
│ ├── SKILL.md
│ └── references/
│ └── api_reference.md
├── ensembl-database/
│ ├── SKILL.md
│ ├── references/
│ │ └── api_endpoints.md
│ └── scripts/
│ └── ensembl_query.py
├── esm/
│ ├── SKILL.md
│ └── references/
│ ├── esm-c-api.md
│ ├── esm3-api.md
│ ├── forge-api.md
│ └── workflows.md
├── etetoolkit/
│ ├── SKILL.md
│ ├── references/
│ │ ├── api_reference.md
│ │ ├── visualization.md
│ │ └── workflows.md
│ └── scripts/
│ ├── quick_visualize.py
│ └── tree_operations.py
├── exploratory-data-analysis/
│ ├── SKILL.md
│ ├── assets/
│ │ └── report_template.md
│ ├── references/
│ │ ├── bioinformatics_genomics_formats.md
│ │ ├── chemistry_molecular_formats.md
│ │ ├── general_scientific_formats.md
│ │ ├── microscopy_imaging_formats.md
│ │ ├── proteomics_metabolomics_formats.md
│ │ └── spectroscopy_analytical_formats.md
│ └── scripts/
│ └── eda_analyzer.py
├── fda-database/
│ ├── SKILL.md
│ ├── references/
│ │ ├── animal_veterinary.md
│ │ ├── api_basics.md
│ │ ├── devices.md
│ │ ├── drugs.md
│ │ ├── foods.md
│ │ └── other.md
│ └── scripts/
│ ├── fda_examples.py
│ └── fda_query.py
├── flowio/
│ ├── SKILL.md
│ └── references/
│ └── api_reference.md
├── fluidsim/
│ ├── SKILL.md
│ └── references/
│ ├── advanced_features.md
│ ├── installation.md
│ ├── output_analysis.md
│ ├── parameters.md
│ ├── simulation_workflow.md
│ └── solvers.md
├── fred-economic-data/
│ ├── SKILL.md
│ ├── references/
│ │ ├── api_basics.md
│ │ ├── categories.md
│ │ ├── geofred.md
│ │ ├── releases.md
│ │ ├── series.md
│ │ ├── sources.md
│ │ └── tags.md
│ └── scripts/
│ ├── fred_examples.py
│ └── fred_query.py
├── gene-database/
│ ├── SKILL.md
│ ├── references/
│ │ ├── api_reference.md
│ │ └── common_workflows.md
│ └── scripts/
│ ├── batch_gene_lookup.py
│ ├── fetch_gene_data.py
│ └── query_gene.py
├── generate-image/
│ ├── SKILL.md
│ └── scripts/
│ └── generate_image.py
├── geniml/
│ ├── SKILL.md
│ └── references/
│ ├── bedspace.md
│ ├── consensus_peaks.md
│ ├── region2vec.md
│ ├── scembed.md
│ └── utilities.md
├── geo-database/
│ ├── SKILL.md
│ └── references/
│ └── geo_reference.md
├── geomaster/
│ ├── README.md
│ ├── SKILL.md
│ └── references/
│ ├── advanced-gis.md
│ ├── big-data.md
│ ├── code-examples.md
│ ├── coordinate-systems.md
│ ├── core-libraries.md
│ ├── data-sources.md
│ ├── gis-software.md
│ ├── industry-applications.md
│ ├── machine-learning.md
│ ├── programming-languages.md
│ ├── remote-sensing.md
│ ├── scientific-domains.md
│ ├── specialized-topics.md
│ └── troubleshooting.md
├── geopandas/
│ ├── SKILL.md
│ └── references/
│ ├── crs-management.md
│ ├── data-io.md
│ ├── data-structures.md
│ ├── geometric-operations.md
│ ├── spatial-analysis.md
│ └── visualization.md
├── get-available-resources/
│ ├── SKILL.md
│ └── scripts/
│ └── detect_resources.py
├── gget/
│ ├── SKILL.md
│ ├── references/
│ │ ├── database_info.md
│ │ ├── module_reference.md
│ │ └── workflows.md
│ └── scripts/
│ ├── batch_sequence_analysis.py
│ ├── enrichment_pipeline.py
│ └── gene_analysis.py
├── ginkgo-cloud-lab/
│ ├── SKILL.md
│ └── references/
│ ├── cell-free-protein-expression-optimization.md
│ ├── cell-free-protein-expression-validation.md
│ └── fluorescent-pixel-art-generation.md
├── glycoengineering/
│ ├── SKILL.md
│ └── references/
│ └── glycan_databases.md
├── gnomad-database/
│ ├── SKILL.md
│ └── references/
│ ├── graphql_queries.md
│ └── variant_interpretation.md
├── gtars/
│ ├── SKILL.md
│ └── references/
│ ├── cli.md
│ ├── coverage.md
│ ├── overlap.md
│ ├── python-api.md
│ ├── refget.md
│ └── tokenizers.md
├── gtex-database/
│ ├── SKILL.md
│ └── references/
│ └── api_reference.md
├── gwas-database/
│ ├── SKILL.md
│ └── references/
│ └── api_reference.md
├── hedgefundmonitor/
│ ├── SKILL.md
│ └── references/
│ ├── api-overview.md
│ ├── datasets.md
│ ├── endpoints-combined.md
│ ├── endpoints-metadata.md
│ ├── endpoints-series-data.md
│ ├── examples.md
│ └── parameters.md
├── histolab/
│ ├── SKILL.md
│ └── references/
│ ├── filters_preprocessing.md
│ ├── slide_management.md
│ ├── tile_extraction.md
│ ├── tissue_masks.md
│ └── visualization.md
├── hmdb-database/
│ ├── SKILL.md
│ └── references/
│ └── hmdb_data_fields.md
├── hypogenic/
│ ├── SKILL.md
│ └── references/
│ └── config_template.yaml
├── hypothesis-generation/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── FORMATTING_GUIDE.md
│ │ ├── hypothesis_generation.sty
│ │ └── hypothesis_report_template.tex
│ └── references/
│ ├── experimental_design_patterns.md
│ ├── hypothesis_quality_criteria.md
│ └── literature_search_strategies.md
├── imaging-data-commons/
│ ├── SKILL.md
│ └── references/
│ ├── bigquery_guide.md
│ ├── cli_guide.md
│ ├── clinical_data_guide.md
│ ├── cloud_storage_guide.md
│ ├── dicomweb_guide.md
│ ├── digital_pathology_guide.md
│ ├── index_tables_guide.md
│ ├── sql_patterns.md
│ └── use_cases.md
├── infographics/
│ ├── SKILL.md
│ ├── references/
│ │ ├── color_palettes.md
│ │ ├── design_principles.md
│ │ └── infographic_types.md
│ └── scripts/
│ ├── generate_infographic.py
│ └── generate_infographic_ai.py
├── interpro-database/
│ ├── SKILL.md
│ └── references/
│ └── domain_analysis.md
├── iso-13485-certification/
│ ├── SKILL.md
│ ├── assets/
│ │ └── templates/
│ │ ├── procedures/
│ │ │ ├── CAPA-procedure-template.md
│ │ │ └── document-control-procedure-template.md
│ │ └── quality-manual-template.md
│ ├── references/
│ │ ├── gap-analysis-checklist.md
│ │ ├── iso-13485-requirements.md
│ │ ├── mandatory-documents.md
│ │ └── quality-manual-guide.md
│ └── scripts/
│ └── gap_analyzer.py
├── jaspar-database/
│ ├── SKILL.md
│ └── references/
│ └── api_reference.md
├── kegg-database/
│ ├── SKILL.md
│ ├── references/
│ │ └── kegg_reference.md
│ └── scripts/
│ └── kegg_api.py
├── labarchive-integration/
│ ├── SKILL.md
│ ├── references/
│ │ ├── api_reference.md
│ │ ├── authentication_guide.md
│ │ └── integrations.md
│ └── scripts/
│ ├── entry_operations.py
│ ├── notebook_operations.py
│ └── setup_config.py
├── lamindb/
│ ├── SKILL.md
│ └── references/
│ ├── annotation-validation.md
│ ├── core-concepts.md
│ ├── data-management.md
│ ├── integrations.md
│ ├── ontologies.md
│ └── setup-deployment.md
├── latchbio-integration/
│ ├── SKILL.md
│ └── references/
│ ├── data-management.md
│ ├── resource-configuration.md
│ ├── verified-workflows.md
│ └── workflow-creation.md
├── latex-posters/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── baposter_template.tex
│ │ ├── beamerposter_template.tex
│ │ ├── poster_quality_checklist.md
│ │ └── tikzposter_template.tex
│ ├── references/
│ │ ├── README.md
│ │ ├── latex_poster_packages.md
│ │ ├── poster_content_guide.md
│ │ ├── poster_design_principles.md
│ │ └── poster_layout_design.md
│ └── scripts/
│ └── review_poster.sh
├── literature-review/
│ ├── SKILL.md
│ ├── assets/
│ │ └── review_template.md
│ ├── references/
│ │ ├── citation_styles.md
│ │ └── database_strategies.md
│ └── scripts/
│ ├── generate_pdf.py
│ ├── search_databases.py
│ └── verify_citations.py
├── markdown-mermaid-writing/
│ ├── SKILL.md
│ ├── assets/
│ │ └── examples/
│ │ └── example-research-report.md
│ ├── references/
│ │ ├── diagrams/
│ │ │ ├── architecture.md
│ │ │ ├── block.md
│ │ │ ├── c4.md
│ │ │ ├── class.md
│ │ │ ├── complex_examples.md
│ │ │ ├── er.md
│ │ │ ├── flowchart.md
│ │ │ ├── gantt.md
│ │ │ ├── git_graph.md
│ │ │ ├── kanban.md
│ │ │ ├── mindmap.md
│ │ │ ├── packet.md
│ │ │ ├── pie.md
│ │ │ ├── quadrant.md
│ │ │ ├── radar.md
│ │ │ ├── requirement.md
│ │ │ ├── sankey.md
│ │ │ ├── sequence.md
│ │ │ ├── state.md
│ │ │ ├── timeline.md
│ │ │ ├── treemap.md
│ │ │ ├── user_journey.md
│ │ │ ├── xy_chart.md
│ │ │ └── zenuml.md
│ │ ├── markdown_style_guide.md
│ │ └── mermaid_style_guide.md
│ └── templates/
│ ├── decision_record.md
│ ├── how_to_guide.md
│ ├── issue.md
│ ├── kanban.md
│ ├── presentation.md
│ ├── project_documentation.md
│ ├── pull_request.md
│ ├── research_paper.md
│ └── status_report.md
├── market-research-reports/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── FORMATTING_GUIDE.md
│ │ ├── market_report_template.tex
│ │ └── market_research.sty
│ ├── references/
│ │ ├── data_analysis_patterns.md
│ │ ├── report_structure_guide.md
│ │ └── visual_generation_guide.md
│ └── scripts/
│ └── generate_market_visuals.py
├── markitdown/
│ ├── SKILL.md
│ ├── assets/
│ │ └── example_usage.md
│ ├── references/
│ │ ├── api_reference.md
│ │ └── file_formats.md
│ └── scripts/
│ ├── batch_convert.py
│ ├── convert_literature.py
│ └── convert_with_ai.py
├── matchms/
│ ├── SKILL.md
│ └── references/
│ ├── filtering.md
│ ├── importing_exporting.md
│ ├── similarity.md
│ └── workflows.md
├── matlab/
│ ├── SKILL.md
│ └── references/
│ ├── data-import-export.md
│ ├── executing-scripts.md
│ ├── graphics-visualization.md
│ ├── mathematics.md
│ ├── matrices-arrays.md
│ ├── octave-compatibility.md
│ ├── programming.md
│ └── python-integration.md
├── matplotlib/
│ ├── SKILL.md
│ ├── references/
│ │ ├── api_reference.md
│ │ ├── common_issues.md
│ │ ├── plot_types.md
│ │ └── styling_guide.md
│ └── scripts/
│ ├── plot_template.py
│ └── style_configurator.py
├── medchem/
│ ├── SKILL.md
│ ├── references/
│ │ ├── api_guide.md
│ │ └── rules_catalog.md
│ └── scripts/
│ └── filter_molecules.py
├── metabolomics-workbench-database/
│ ├── SKILL.md
│ └── references/
│ └── api_reference.md
├── modal/
│ ├── SKILL.md
│ └── references/
│ ├── api_reference.md
│ ├── examples.md
│ ├── functions.md
│ ├── getting-started.md
│ ├── gpu.md
│ ├── images.md
│ ├── resources.md
│ ├── scaling.md
│ ├── scheduled-jobs.md
│ ├── secrets.md
│ ├── volumes.md
│ └── web-endpoints.md
├── molecular-dynamics/
│ ├── SKILL.md
│ └── references/
│ └── mdanalysis_analysis.md
├── molfeat/
│ ├── SKILL.md
│ └── references/
│ ├── api_reference.md
│ ├── available_featurizers.md
│ └── examples.md
├── monarch-database/
│ ├── SKILL.md
│ └── references/
│ └── phenotype_ontology.md
├── networkx/
│ ├── SKILL.md
│ └── references/
│ ├── algorithms.md
│ ├── generators.md
│ ├── graph-basics.md
│ ├── io.md
│ └── visualization.md
├── neurokit2/
│ ├── SKILL.md
│ └── references/
│ ├── bio_module.md
│ ├── complexity.md
│ ├── ecg_cardiac.md
│ ├── eda.md
│ ├── eeg.md
│ ├── emg.md
│ ├── eog.md
│ ├── epochs_events.md
│ ├── hrv.md
│ ├── ppg.md
│ ├── rsp.md
│ └── signal_processing.md
├── neuropixels-analysis/
│ ├── SKILL.md
│ ├── assets/
│ │ └── analysis_template.py
│ ├── references/
│ │ ├── AI_CURATION.md
│ │ ├── ANALYSIS.md
│ │ ├── AUTOMATED_CURATION.md
│ │ ├── MOTION_CORRECTION.md
│ │ ├── PREPROCESSING.md
│ │ ├── QUALITY_METRICS.md
│ │ ├── SPIKE_SORTING.md
│ │ ├── api_reference.md
│ │ ├── plotting_guide.md
│ │ └── standard_workflow.md
│ └── scripts/
│ ├── compute_metrics.py
│ ├── explore_recording.py
│ ├── export_to_phy.py
│ ├── neuropixels_pipeline.py
│ ├── preprocess_recording.py
│ └── run_sorting.py
├── offer-k-dense-web/
│ └── SKILL.md
├── omero-integration/
│ ├── SKILL.md
│ └── references/
│ ├── advanced.md
│ ├── connection.md
│ ├── data_access.md
│ ├── image_processing.md
│ ├── metadata.md
│ ├── rois.md
│ ├── scripts.md
│ └── tables.md
├── open-notebook/
│ ├── SKILL.md
│ ├── references/
│ │ ├── api_reference.md
│ │ ├── architecture.md
│ │ ├── configuration.md
│ │ └── examples.md
│ └── scripts/
│ ├── chat_interaction.py
│ ├── notebook_management.py
│ ├── source_ingestion.py
│ └── test_open_notebook_skill.py
├── openalex-database/
│ ├── SKILL.md
│ ├── references/
│ │ ├── api_guide.md
│ │ └── common_queries.md
│ └── scripts/
│ ├── openalex_client.py
│ └── query_helpers.py
├── opentargets-database/
│ ├── SKILL.md
│ ├── references/
│ │ ├── api_reference.md
│ │ ├── evidence_types.md
│ │ └── target_annotations.md
│ └── scripts/
│ └── query_opentargets.py
├── opentrons-integration/
│ ├── SKILL.md
│ ├── references/
│ │ └── api_reference.md
│ └── scripts/
│ ├── basic_protocol_template.py
│ ├── pcr_setup_template.py
│ └── serial_dilution_template.py
├── paper-2-web/
│ ├── SKILL.md
│ └── references/
│ ├── installation.md
│ ├── paper2poster.md
│ ├── paper2video.md
│ ├── paper2web.md
│ └── usage_examples.md
├── parallel-web/
│ ├── SKILL.md
│ ├── references/
│ │ ├── api_reference.md
│ │ ├── deep_research_guide.md
│ │ ├── extraction_patterns.md
│ │ ├── search_best_practices.md
│ │ └── workflow_recipes.md
│ └── scripts/
│ └── parallel_web.py
├── pathml/
│ ├── SKILL.md
│ └── references/
│ ├── data_management.md
│ ├── graphs.md
│ ├── image_loading.md
│ ├── machine_learning.md
│ ├── multiparametric.md
│ └── preprocessing.md
├── pdb-database/
│ ├── SKILL.md
│ └── references/
│ └── api_reference.md
├── pdf/
│ ├── LICENSE.txt
│ ├── SKILL.md
│ ├── forms.md
│ ├── reference.md
│ └── scripts/
│ ├── check_bounding_boxes.py
│ ├── check_fillable_fields.py
│ ├── convert_pdf_to_images.py
│ ├── create_validation_image.py
│ ├── extract_form_field_info.py
│ ├── extract_form_structure.py
│ ├── fill_fillable_fields.py
│ └── fill_pdf_form_with_annotations.py
├── peer-review/
│ ├── SKILL.md
│ └── references/
│ ├── common_issues.md
│ └── reporting_standards.md
├── pennylane/
│ ├── SKILL.md
│ └── references/
│ ├── advanced_features.md
│ ├── devices_backends.md
│ ├── getting_started.md
│ ├── optimization.md
│ ├── quantum_chemistry.md
│ ├── quantum_circuits.md
│ └── quantum_ml.md
├── perplexity-search/
│ ├── SKILL.md
│ ├── references/
│ │ ├── model_comparison.md
│ │ ├── openrouter_setup.md
│ │ └── search_strategies.md
│ └── scripts/
│ ├── perplexity_search.py
│ └── setup_env.py
├── phylogenetics/
│ ├── SKILL.md
│ ├── references/
│ │ └── iqtree_inference.md
│ └── scripts/
│ └── phylogenetic_analysis.py
├── plotly/
│ ├── SKILL.md
│ └── references/
│ ├── chart-types.md
│ ├── export-interactivity.md
│ ├── graph-objects.md
│ ├── layouts-styling.md
│ └── plotly-express.md
├── polars/
│ ├── SKILL.md
│ └── references/
│ ├── best_practices.md
│ ├── core_concepts.md
│ ├── io_guide.md
│ ├── operations.md
│ ├── pandas_migration.md
│ └── transformations.md
├── polars-bio/
│ ├── SKILL.md
│ └── references/
│ ├── bioframe_migration.md
│ ├── configuration.md
│ ├── file_io.md
│ ├── interval_operations.md
│ ├── pileup_operations.md
│ └── sql_processing.md
├── pptx/
│ ├── LICENSE.txt
│ ├── SKILL.md
│ ├── editing.md
│ ├── pptxgenjs.md
│ └── scripts/
│ ├── __init__.py
│ ├── add_slide.py
│ ├── clean.py
│ ├── office/
│ │ ├── helpers/
│ │ │ ├── __init__.py
│ │ │ ├── merge_runs.py
│ │ │ └── simplify_redlines.py
│ │ ├── pack.py
│ │ ├── schemas/
│ │ │ ├── ISO-IEC29500-4_2016/
│ │ │ │ ├── dml-chart.xsd
│ │ │ │ ├── dml-chartDrawing.xsd
│ │ │ │ ├── dml-diagram.xsd
│ │ │ │ ├── dml-lockedCanvas.xsd
│ │ │ │ ├── dml-main.xsd
│ │ │ │ ├── dml-picture.xsd
│ │ │ │ ├── dml-spreadsheetDrawing.xsd
│ │ │ │ ├── dml-wordprocessingDrawing.xsd
│ │ │ │ ├── pml.xsd
│ │ │ │ ├── shared-additionalCharacteristics.xsd
│ │ │ │ ├── shared-bibliography.xsd
│ │ │ │ ├── shared-commonSimpleTypes.xsd
│ │ │ │ ├── shared-customXmlDataProperties.xsd
│ │ │ │ ├── shared-customXmlSchemaProperties.xsd
│ │ │ │ ├── shared-documentPropertiesCustom.xsd
│ │ │ │ ├── shared-documentPropertiesExtended.xsd
│ │ │ │ ├── shared-documentPropertiesVariantTypes.xsd
│ │ │ │ ├── shared-math.xsd
│ │ │ │ ├── shared-relationshipReference.xsd
│ │ │ │ ├── sml.xsd
│ │ │ │ ├── vml-main.xsd
│ │ │ │ ├── vml-officeDrawing.xsd
│ │ │ │ ├── vml-presentationDrawing.xsd
│ │ │ │ ├── vml-spreadsheetDrawing.xsd
│ │ │ │ ├── vml-wordprocessingDrawing.xsd
│ │ │ │ ├── wml.xsd
│ │ │ │ └── xml.xsd
│ │ │ ├── ecma/
│ │ │ │ └── fouth-edition/
│ │ │ │ ├── opc-contentTypes.xsd
│ │ │ │ ├── opc-coreProperties.xsd
│ │ │ │ ├── opc-digSig.xsd
│ │ │ │ └── opc-relationships.xsd
│ │ │ ├── mce/
│ │ │ │ └── mc.xsd
│ │ │ └── microsoft/
│ │ │ ├── wml-2010.xsd
│ │ │ ├── wml-2012.xsd
│ │ │ ├── wml-2018.xsd
│ │ │ ├── wml-cex-2018.xsd
│ │ │ ├── wml-cid-2016.xsd
│ │ │ ├── wml-sdtdatahash-2020.xsd
│ │ │ └── wml-symex-2015.xsd
│ │ ├── soffice.py
│ │ ├── unpack.py
│ │ ├── validate.py
│ │ └── validators/
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── docx.py
│ │ ├── pptx.py
│ │ └── redlining.py
│ └── thumbnail.py
├── pptx-posters/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── poster_html_template.html
│ │ └── poster_quality_checklist.md
│ └── references/
│ ├── poster_content_guide.md
│ ├── poster_design_principles.md
│ └── poster_layout_design.md
├── primekg/
│ ├── SKILL.md
│ └── scripts/
│ └── query_primekg.py
├── protocolsio-integration/
│ ├── SKILL.md
│ └── references/
│ ├── additional_features.md
│ ├── authentication.md
│ ├── discussions.md
│ ├── file_manager.md
│ ├── protocols_api.md
│ └── workspaces.md
├── pubchem-database/
│ ├── SKILL.md
│ ├── references/
│ │ └── api_reference.md
│ └── scripts/
│ ├── bioactivity_query.py
│ └── compound_search.py
├── pubmed-database/
│ ├── SKILL.md
│ └── references/
│ ├── api_reference.md
│ ├── common_queries.md
│ └── search_syntax.md
├── pufferlib/
│ ├── SKILL.md
│ ├── references/
│ │ ├── environments.md
│ │ ├── integration.md
│ │ ├── policies.md
│ │ ├── training.md
│ │ └── vectorization.md
│ └── scripts/
│ ├── env_template.py
│ └── train_template.py
├── pydeseq2/
│ ├── SKILL.md
│ ├── references/
│ │ ├── api_reference.md
│ │ └── workflow_guide.md
│ └── scripts/
│ └── run_deseq2_analysis.py
├── pydicom/
│ ├── SKILL.md
│ ├── references/
│ │ ├── common_tags.md
│ │ └── transfer_syntaxes.md
│ └── scripts/
│ ├── anonymize_dicom.py
│ ├── dicom_to_image.py
│ └── extract_metadata.py
├── pyhealth/
│ ├── SKILL.md
│ └── references/
│ ├── datasets.md
│ ├── medical_coding.md
│ ├── models.md
│ ├── preprocessing.md
│ ├── tasks.md
│ └── training_evaluation.md
├── pylabrobot/
│ ├── SKILL.md
│ └── references/
│ ├── analytical-equipment.md
│ ├── hardware-backends.md
│ ├── liquid-handling.md
│ ├── material-handling.md
│ ├── resources.md
│ └── visualization.md
├── pymatgen/
│ ├── SKILL.md
│ ├── references/
│ │ ├── analysis_modules.md
│ │ ├── core_classes.md
│ │ ├── io_formats.md
│ │ ├── materials_project_api.md
│ │ └── transformations_workflows.md
│ └── scripts/
│ ├── phase_diagram_generator.py
│ ├── structure_analyzer.py
│ └── structure_converter.py
├── pymc/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── hierarchical_model_template.py
│ │ └── linear_regression_template.py
│ ├── references/
│ │ ├── distributions.md
│ │ ├── sampling_inference.md
│ │ └── workflows.md
│ └── scripts/
│ ├── model_comparison.py
│ └── model_diagnostics.py
├── pymoo/
│ ├── SKILL.md
│ ├── references/
│ │ ├── algorithms.md
│ │ ├── constraints_mcdm.md
│ │ ├── operators.md
│ │ ├── problems.md
│ │ └── visualization.md
│ └── scripts/
│ ├── custom_problem_example.py
│ ├── decision_making_example.py
│ ├── many_objective_example.py
│ ├── multi_objective_example.py
│ └── single_objective_example.py
├── pyopenms/
│ ├── SKILL.md
│ └── references/
│ ├── data_structures.md
│ ├── feature_detection.md
│ ├── file_io.md
│ ├── identification.md
│ ├── metabolomics.md
│ └── signal_processing.md
├── pysam/
│ ├── SKILL.md
│ └── references/
│ ├── alignment_files.md
│ ├── common_workflows.md
│ ├── sequence_files.md
│ └── variant_files.md
├── pytdc/
│ ├── SKILL.md
│ ├── references/
│ │ ├── datasets.md
│ │ ├── oracles.md
│ │ └── utilities.md
│ └── scripts/
│ ├── benchmark_evaluation.py
│ ├── load_and_split_data.py
│ └── molecular_generation.py
├── pytorch-lightning/
│ ├── SKILL.md
│ ├── references/
│ │ ├── best_practices.md
│ │ ├── callbacks.md
│ │ ├── data_module.md
│ │ ├── distributed_training.md
│ │ ├── lightning_module.md
│ │ ├── logging.md
│ │ └── trainer.md
│ └── scripts/
│ ├── quick_trainer_setup.py
│ ├── template_datamodule.py
│ └── template_lightning_module.py
├── pyzotero/
│ ├── SKILL.md
│ └── references/
│ ├── authentication.md
│ ├── cli.md
│ ├── collections.md
│ ├── error-handling.md
│ ├── exports.md
│ ├── files-attachments.md
│ ├── full-text.md
│ ├── pagination.md
│ ├── read-api.md
│ ├── saved-searches.md
│ ├── search-params.md
│ ├── tags.md
│ └── write-api.md
├── qiskit/
│ ├── SKILL.md
│ └── references/
│ ├── algorithms.md
│ ├── backends.md
│ ├── circuits.md
│ ├── patterns.md
│ ├── primitives.md
│ ├── setup.md
│ ├── transpilation.md
│ └── visualization.md
├── qutip/
│ ├── SKILL.md
│ └── references/
│ ├── advanced.md
│ ├── analysis.md
│ ├── core_concepts.md
│ ├── time_evolution.md
│ └── visualization.md
├── rdkit/
│ ├── SKILL.md
│ ├── references/
│ │ ├── api_reference.md
│ │ ├── descriptors_reference.md
│ │ └── smarts_patterns.md
│ └── scripts/
│ ├── molecular_properties.py
│ ├── similarity_search.py
│ └── substructure_filter.py
├── reactome-database/
│ ├── SKILL.md
│ ├── references/
│ │ └── api_reference.md
│ └── scripts/
│ └── reactome_query.py
├── research-grants/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── budget_justification_template.md
│ │ ├── nih_specific_aims_template.md
│ │ └── nsf_project_summary_template.md
│ └── references/
│ ├── README.md
│ ├── broader_impacts.md
│ ├── darpa_guidelines.md
│ ├── doe_guidelines.md
│ ├── nih_guidelines.md
│ ├── nsf_guidelines.md
│ ├── nstc_guidelines.md
│ └── specific_aims_guide.md
├── research-lookup/
│ ├── README.md
│ ├── SKILL.md
│ ├── examples.py
│ ├── lookup.py
│ ├── research_lookup.py
│ └── scripts/
│ └── research_lookup.py
├── rowan/
│ ├── SKILL.md
│ └── references/
│ ├── api_reference.md
│ ├── molecule_handling.md
│ ├── proteins_and_organization.md
│ ├── rdkit_native.md
│ ├── results_interpretation.md
│ └── workflow_types.md
├── scanpy/
│ ├── SKILL.md
│ ├── assets/
│ │ └── analysis_template.py
│ ├── references/
│ │ ├── api_reference.md
│ │ ├── plotting_guide.md
│ │ └── standard_workflow.md
│ └── scripts/
│ └── qc_analysis.py
├── scholar-evaluation/
│ ├── SKILL.md
│ ├── references/
│ │ └── evaluation_framework.md
│ └── scripts/
│ └── calculate_scores.py
├── scientific-brainstorming/
│ ├── SKILL.md
│ └── references/
│ └── brainstorming_methods.md
├── scientific-critical-thinking/
│ ├── SKILL.md
│ └── references/
│ ├── common_biases.md
│ ├── evidence_hierarchy.md
│ ├── experimental_design.md
│ ├── logical_fallacies.md
│ ├── scientific_method.md
│ └── statistical_pitfalls.md
├── scientific-schematics/
│ ├── SKILL.md
│ ├── references/
│ │ ├── QUICK_REFERENCE.md
│ │ ├── README.md
│ │ └── best_practices.md
│ └── scripts/
│ ├── example_usage.sh
│ ├── generate_schematic.py
│ └── generate_schematic_ai.py
├── scientific-slides/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── beamer_template_conference.tex
│ │ ├── beamer_template_defense.tex
│ │ ├── beamer_template_seminar.tex
│ │ ├── powerpoint_design_guide.md
│ │ └── timing_guidelines.md
│ ├── references/
│ │ ├── beamer_guide.md
│ │ ├── data_visualization_slides.md
│ │ ├── presentation_structure.md
│ │ ├── slide_design_principles.md
│ │ ├── talk_types_guide.md
│ │ └── visual_review_workflow.md
│ └── scripts/
│ ├── generate_slide_image.py
│ ├── generate_slide_image_ai.py
│ ├── pdf_to_images.py
│ ├── slides_to_pdf.py
│ └── validate_presentation.py
├── scientific-visualization/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── color_palettes.py
│ │ ├── nature.mplstyle
│ │ ├── presentation.mplstyle
│ │ └── publication.mplstyle
│ ├── references/
│ │ ├── color_palettes.md
│ │ ├── journal_requirements.md
│ │ ├── matplotlib_examples.md
│ │ └── publication_guidelines.md
│ └── scripts/
│ ├── figure_export.py
│ └── style_presets.py
├── scientific-writing/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── REPORT_FORMATTING_GUIDE.md
│ │ ├── scientific_report.sty
│ │ └── scientific_report_template.tex
│ └── references/
│ ├── citation_styles.md
│ ├── figures_tables.md
│ ├── imrad_structure.md
│ ├── professional_report_formatting.md
│ ├── reporting_guidelines.md
│ └── writing_principles.md
├── scikit-bio/
│ ├── SKILL.md
│ └── references/
│ └── api_reference.md
├── scikit-learn/
│ ├── SKILL.md
│ ├── references/
│ │ ├── model_evaluation.md
│ │ ├── pipelines_and_composition.md
│ │ ├── preprocessing.md
│ │ ├── quick_reference.md
│ │ ├── supervised_learning.md
│ │ └── unsupervised_learning.md
│ └── scripts/
│ ├── classification_pipeline.py
│ └── clustering_analysis.py
├── scikit-survival/
│ ├── SKILL.md
│ └── references/
│ ├── competing-risks.md
│ ├── cox-models.md
│ ├── data-handling.md
│ ├── ensemble-models.md
│ ├── evaluation-metrics.md
│ └── svm-models.md
├── scvelo/
│ ├── SKILL.md
│ ├── references/
│ │ └── velocity_models.md
│ └── scripts/
│ └── rna_velocity_workflow.py
├── scvi-tools/
│ ├── SKILL.md
│ └── references/
│ ├── differential-expression.md
│ ├── models-atac-seq.md
│ ├── models-multimodal.md
│ ├── models-scrna-seq.md
│ ├── models-spatial.md
│ ├── models-specialized.md
│ ├── theoretical-foundations.md
│ └── workflows.md
├── seaborn/
│ ├── SKILL.md
│ └── references/
│ ├── examples.md
│ ├── function_reference.md
│ └── objects_interface.md
├── shap/
│ ├── SKILL.md
│ └── references/
│ ├── explainers.md
│ ├── plots.md
│ ├── theory.md
│ └── workflows.md
├── simpy/
│ ├── SKILL.md
│ ├── references/
│ │ ├── events.md
│ │ ├── monitoring.md
│ │ ├── process-interaction.md
│ │ ├── real-time.md
│ │ └── resources.md
│ └── scripts/
│ ├── basic_simulation_template.py
│ └── resource_monitor.py
├── stable-baselines3/
│ ├── SKILL.md
│ ├── references/
│ │ ├── algorithms.md
│ │ ├── callbacks.md
│ │ ├── custom_environments.md
│ │ └── vectorized_envs.md
│ └── scripts/
│ ├── custom_env_template.py
│ ├── evaluate_agent.py
│ └── train_rl_agent.py
├── statistical-analysis/
│ ├── SKILL.md
│ ├── references/
│ │ ├── assumptions_and_diagnostics.md
│ │ ├── bayesian_statistics.md
│ │ ├── effect_sizes_and_power.md
│ │ ├── reporting_standards.md
│ │ └── test_selection_guide.md
│ └── scripts/
│ └── assumption_checks.py
├── statsmodels/
│ ├── SKILL.md
│ └── references/
│ ├── discrete_choice.md
│ ├── glm.md
│ ├── linear_models.md
│ ├── stats_diagnostics.md
│ └── time_series.md
├── string-database/
│ ├── SKILL.md
│ ├── references/
│ │ └── string_reference.md
│ └── scripts/
│ └── string_api.py
├── sympy/
│ ├── SKILL.md
│ └── references/
│ ├── advanced-topics.md
│ ├── code-generation-printing.md
│ ├── core-capabilities.md
│ ├── matrices-linear-algebra.md
│ └── physics-mechanics.md
├── tiledbvcf/
│ └── SKILL.md
├── timesfm-forecasting/
│ ├── SKILL.md
│ ├── examples/
│ │ ├── anomaly-detection/
│ │ │ ├── detect_anomalies.py
│ │ │ └── output/
│ │ │ └── anomaly_detection.json
│ │ ├── covariates-forecasting/
│ │ │ ├── demo_covariates.py
│ │ │ └── output/
│ │ │ ├── covariates_metadata.json
│ │ │ └── sales_with_covariates.csv
│ │ └── global-temperature/
│ │ ├── README.md
│ │ ├── generate_animation_data.py
│ │ ├── generate_gif.py
│ │ ├── generate_html.py
│ │ ├── output/
│ │ │ ├── animation_data.json
│ │ │ ├── forecast_output.csv
│ │ │ ├── forecast_output.json
│ │ │ └── interactive_forecast.html
│ │ ├── run_example.sh
│ │ ├── run_forecast.py
│ │ ├── temperature_anomaly.csv
│ │ └── visualize_forecast.py
│ ├── references/
│ │ ├── api_reference.md
│ │ ├── data_preparation.md
│ │ └── system_requirements.md
│ └── scripts/
│ ├── check_system.py
│ └── forecast_csv.py
├── torch-geometric/
│ ├── SKILL.md
│ ├── references/
│ │ ├── datasets_reference.md
│ │ ├── layers_reference.md
│ │ └── transforms_reference.md
│ └── scripts/
│ ├── benchmark_model.py
│ ├── create_gnn_template.py
│ └── visualize_graph.py
├── torchdrug/
│ ├── SKILL.md
│ └── references/
│ ├── core_concepts.md
│ ├── datasets.md
│ ├── knowledge_graphs.md
│ ├── models_architectures.md
│ ├── molecular_generation.md
│ ├── molecular_property_prediction.md
│ ├── protein_modeling.md
│ └── retrosynthesis.md
├── transformers/
│ ├── SKILL.md
│ └── references/
│ ├── generation.md
│ ├── models.md
│ ├── pipelines.md
│ ├── tokenizers.md
│ └── training.md
├── treatment-plans/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── STYLING_QUICK_REFERENCE.md
│ │ ├── chronic_disease_management_plan.tex
│ │ ├── general_medical_treatment_plan.tex
│ │ ├── medical_treatment_plan.sty
│ │ ├── mental_health_treatment_plan.tex
│ │ ├── one_page_treatment_plan.tex
│ │ ├── pain_management_plan.tex
│ │ ├── perioperative_care_plan.tex
│ │ ├── quality_checklist.md
│ │ └── rehabilitation_treatment_plan.tex
│ ├── references/
│ │ ├── README.md
│ │ ├── goal_setting_frameworks.md
│ │ ├── intervention_guidelines.md
│ │ ├── regulatory_compliance.md
│ │ ├── specialty_specific_guidelines.md
│ │ └── treatment_plan_standards.md
│ └── scripts/
│ ├── check_completeness.py
│ ├── generate_template.py
│ ├── timeline_generator.py
│ └── validate_treatment_plan.py
├── umap-learn/
│ ├── SKILL.md
│ └── references/
│ └── api_reference.md
├── uniprot-database/
│ ├── SKILL.md
│ ├── references/
│ │ ├── api_examples.md
│ │ ├── api_fields.md
│ │ ├── id_mapping_databases.md
│ │ └── query_syntax.md
│ └── scripts/
│ └── uniprot_client.py
├── usfiscaldata/
│ ├── SKILL.md
│ └── references/
│ ├── api-basics.md
│ ├── datasets-debt.md
│ ├── datasets-fiscal.md
│ ├── datasets-interest-rates.md
│ ├── datasets-securities.md
│ ├── examples.md
│ ├── parameters.md
│ └── response-format.md
├── uspto-database/
│ ├── SKILL.md
│ ├── references/
│ │ ├── additional_apis.md
│ │ ├── patentsearch_api.md
│ │ ├── peds_api.md
│ │ └── trademark_api.md
│ └── scripts/
│ ├── patent_search.py
│ ├── peds_client.py
│ └── trademark_client.py
├── vaex/
│ ├── SKILL.md
│ └── references/
│ ├── core_dataframes.md
│ ├── data_processing.md
│ ├── io_operations.md
│ ├── machine_learning.md
│ ├── performance.md
│ └── visualization.md
├── venue-templates/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── examples/
│ │ │ ├── cell_summary_example.md
│ │ │ ├── medical_structured_abstract.md
│ │ │ ├── nature_abstract_examples.md
│ │ │ └── neurips_introduction_example.md
│ │ ├── grants/
│ │ │ ├── nih_specific_aims.tex
│ │ │ └── nsf_proposal_template.tex
│ │ ├── journals/
│ │ │ ├── nature_article.tex
│ │ │ ├── neurips_article.tex
│ │ │ └── plos_one.tex
│ │ └── posters/
│ │ └── beamerposter_academic.tex
│ ├── references/
│ │ ├── cell_press_style.md
│ │ ├── conferences_formatting.md
│ │ ├── cs_conference_style.md
│ │ ├── grants_requirements.md
│ │ ├── journals_formatting.md
│ │ ├── medical_journal_styles.md
│ │ ├── ml_conference_style.md
│ │ ├── nature_science_style.md
│ │ ├── posters_guidelines.md
│ │ ├── reviewer_expectations.md
│ │ └── venue_writing_styles.md
│ └── scripts/
│ ├── customize_template.py
│ ├── query_template.py
│ └── validate_format.py
├── what-if-oracle/
│ ├── SKILL.md
│ └── references/
│ └── scenario-templates.md
├── xlsx/
│ ├── LICENSE.txt
│ ├── SKILL.md
│ └── scripts/
│ ├── office/
│ │ ├── helpers/
│ │ │ ├── __init__.py
│ │ │ ├── merge_runs.py
│ │ │ └── simplify_redlines.py
│ │ ├── pack.py
│ │ ├── schemas/
│ │ │ ├── ISO-IEC29500-4_2016/
│ │ │ │ ├── dml-chart.xsd
│ │ │ │ ├── dml-chartDrawing.xsd
│ │ │ │ ├── dml-diagram.xsd
│ │ │ │ ├── dml-lockedCanvas.xsd
│ │ │ │ ├── dml-main.xsd
│ │ │ │ ├── dml-picture.xsd
│ │ │ │ ├── dml-spreadsheetDrawing.xsd
│ │ │ │ ├── dml-wordprocessingDrawing.xsd
│ │ │ │ ├── pml.xsd
│ │ │ │ ├── shared-additionalCharacteristics.xsd
│ │ │ │ ├── shared-bibliography.xsd
│ │ │ │ ├── shared-commonSimpleTypes.xsd
│ │ │ │ ├── shared-customXmlDataProperties.xsd
│ │ │ │ ├── shared-customXmlSchemaProperties.xsd
│ │ │ │ ├── shared-documentPropertiesCustom.xsd
│ │ │ │ ├── shared-documentPropertiesExtended.xsd
│ │ │ │ ├── shared-documentPropertiesVariantTypes.xsd
│ │ │ │ ├── shared-math.xsd
│ │ │ │ ├── shared-relationshipReference.xsd
│ │ │ │ ├── sml.xsd
│ │ │ │ ├── vml-main.xsd
│ │ │ │ ├── vml-officeDrawing.xsd
│ │ │ │ ├── vml-presentationDrawing.xsd
│ │ │ │ ├── vml-spreadsheetDrawing.xsd
│ │ │ │ ├── vml-wordprocessingDrawing.xsd
│ │ │ │ ├── wml.xsd
│ │ │ │ └── xml.xsd
│ │ │ ├── ecma/
│ │ │ │ └── fouth-edition/
│ │ │ │ ├── opc-contentTypes.xsd
│ │ │ │ ├── opc-coreProperties.xsd
│ │ │ │ ├── opc-digSig.xsd
│ │ │ │ └── opc-relationships.xsd
│ │ │ ├── mce/
│ │ │ │ └── mc.xsd
│ │ │ └── microsoft/
│ │ │ ├── wml-2010.xsd
│ │ │ ├── wml-2012.xsd
│ │ │ ├── wml-2018.xsd
│ │ │ ├── wml-cex-2018.xsd
│ │ │ ├── wml-cid-2016.xsd
│ │ │ ├── wml-sdtdatahash-2020.xsd
│ │ │ └── wml-symex-2015.xsd
│ │ ├── soffice.py
│ │ ├── unpack.py
│ │ ├── validate.py
│ │ └── validators/
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── docx.py
│ │ ├── pptx.py
│ │ └── redlining.py
│ └── recalc.py
├── zarr-python/
│ ├── SKILL.md
│ └── references/
│ └── api_reference.md
└── zinc-database/
├── SKILL.md
└── references/
└── api_reference.md
================================================
FILE CONTENTS
================================================
================================================
FILE: .claude-plugin/marketplace.json
================================================
{
"name": "claude-scientific-skills",
"owner": {
"name": "K-Dense Inc.",
"email": "contact@k-dense.ai"
},
"metadata": {
"description": "Claude scientific skills from K-Dense Inc",
"version": "2.28.0"
},
"plugins": [
{
"name": "scientific-skills",
"description": "Collection of scientific skills",
"source": "./",
"strict": false,
"skills": [
"./scientific-skills/adaptyv",
"./scientific-skills/aeon",
"./scientific-skills/anndata",
"./scientific-skills/arboreto",
"./scientific-skills/astropy",
"./scientific-skills/biopython",
"./scientific-skills/bioservices",
"./scientific-skills/cellxgene-census",
"./scientific-skills/cirq",
"./scientific-skills/cobrapy",
"./scientific-skills/dask",
"./scientific-skills/datacommons-client",
"./scientific-skills/datamol",
"./scientific-skills/deepchem",
"./scientific-skills/deeptools",
"./scientific-skills/denario",
"./scientific-skills/depmap",
"./scientific-skills/diffdock",
"./scientific-skills/esm",
"./scientific-skills/etetoolkit",
"./scientific-skills/flowio",
"./scientific-skills/fluidsim",
"./scientific-skills/geniml",
"./scientific-skills/geopandas",
"./scientific-skills/geomaster",
"./scientific-skills/gget",
"./scientific-skills/ginkgo-cloud-lab",
"./scientific-skills/glycoengineering",
"./scientific-skills/gtars",
"./scientific-skills/histolab",
"./scientific-skills/imaging-data-commons",
"./scientific-skills/hypogenic",
"./scientific-skills/lamindb",
"./scientific-skills/markitdown",
"./scientific-skills/matlab",
"./scientific-skills/matchms",
"./scientific-skills/matplotlib",
"./scientific-skills/medchem",
"./scientific-skills/modal",
"./scientific-skills/molecular-dynamics",
"./scientific-skills/molfeat",
"./scientific-skills/neurokit2",
"./scientific-skills/neuropixels-analysis",
"./scientific-skills/networkx",
"./scientific-skills/paper-2-web",
"./scientific-skills/pathml",
"./scientific-skills/pennylane",
"./scientific-skills/perplexity-search",
"./scientific-skills/parallel-web",
"./scientific-skills/phylogenetics",
"./scientific-skills/plotly",
"./scientific-skills/polars",
"./scientific-skills/pydeseq2",
"./scientific-skills/pydicom",
"./scientific-skills/pyhealth",
"./scientific-skills/pylabrobot",
"./scientific-skills/pymatgen",
"./scientific-skills/pymc",
"./scientific-skills/pymoo",
"./scientific-skills/pyopenms",
"./scientific-skills/pufferlib",
"./scientific-skills/pysam",
"./scientific-skills/pytdc",
"./scientific-skills/pytorch-lightning",
"./scientific-skills/pyzotero",
"./scientific-skills/qiskit",
"./scientific-skills/qutip",
"./scientific-skills/rdkit",
"./scientific-skills/rowan",
"./scientific-skills/scanpy",
"./scientific-skills/scikit-bio",
"./scientific-skills/scikit-learn",
"./scientific-skills/scikit-survival",
"./scientific-skills/scvelo",
"./scientific-skills/scvi-tools",
"./scientific-skills/seaborn",
"./scientific-skills/shap",
"./scientific-skills/simpy",
"./scientific-skills/stable-baselines3",
"./scientific-skills/statsmodels",
"./scientific-skills/sympy",
"./scientific-skills/tiledbvcf",
"./scientific-skills/timesfm-forecasting",
"./scientific-skills/torch-geometric",
"./scientific-skills/torchdrug",
"./scientific-skills/transformers",
"./scientific-skills/umap-learn",
"./scientific-skills/vaex",
"./scientific-skills/zarr-python",
"./scientific-skills/alphafold-database",
"./scientific-skills/bindingdb-database",
"./scientific-skills/biorxiv-database",
"./scientific-skills/brenda-database",
"./scientific-skills/cbioportal-database",
"./scientific-skills/chembl-database",
"./scientific-skills/clinicaltrials-database",
"./scientific-skills/clinpgx-database",
"./scientific-skills/clinvar-database",
"./scientific-skills/cosmic-database",
"./scientific-skills/drugbank-database",
"./scientific-skills/ena-database",
"./scientific-skills/ensembl-database",
"./scientific-skills/fda-database",
"./scientific-skills/fred-economic-data",
"./scientific-skills/gene-database",
"./scientific-skills/geo-database",
"./scientific-skills/gnomad-database",
"./scientific-skills/gtex-database",
"./scientific-skills/gwas-database",
"./scientific-skills/hmdb-database",
"./scientific-skills/interpro-database",
"./scientific-skills/jaspar-database",
"./scientific-skills/kegg-database",
"./scientific-skills/metabolomics-workbench-database",
"./scientific-skills/monarch-database",
"./scientific-skills/openalex-database",
"./scientific-skills/opentargets-database",
"./scientific-skills/pdb-database",
"./scientific-skills/pubchem-database",
"./scientific-skills/pubmed-database",
"./scientific-skills/reactome-database",
"./scientific-skills/string-database",
"./scientific-skills/uniprot-database",
"./scientific-skills/uspto-database",
"./scientific-skills/zinc-database",
"./scientific-skills/exploratory-data-analysis",
"./scientific-skills/hypothesis-generation",
"./scientific-skills/literature-review",
"./scientific-skills/peer-review",
"./scientific-skills/scholar-evaluation",
"./scientific-skills/scientific-brainstorming",
"./scientific-skills/consciousness-council",
"./scientific-skills/dhdna-profiler",
"./scientific-skills/what-if-oracle",
"./scientific-skills/scientific-critical-thinking",
"./scientific-skills/scientific-writing",
"./scientific-skills/statistical-analysis",
"./scientific-skills/scientific-visualization",
"./scientific-skills/citation-management",
"./scientific-skills/clinical-decision-support",
"./scientific-skills/clinical-reports",
"./scientific-skills/generate-image",
"./scientific-skills/bgpt-paper-search",
"./scientific-skills/infographics",
"./scientific-skills/latex-posters",
"./scientific-skills/market-research-reports",
"./scientific-skills/markdown-mermaid-writing",
"./scientific-skills/pptx-posters",
"./scientific-skills/research-grants",
"./scientific-skills/research-lookup",
"./scientific-skills/scientific-schematics",
"./scientific-skills/scientific-slides",
"./scientific-skills/treatment-plans",
"./scientific-skills/venue-templates",
"./scientific-skills/docx",
"./scientific-skills/pdf",
"./scientific-skills/pptx",
"./scientific-skills/xlsx",
"./scientific-skills/benchling-integration",
"./scientific-skills/dnanexus-integration",
"./scientific-skills/labarchive-integration",
"./scientific-skills/latchbio-integration",
"./scientific-skills/omero-integration",
"./scientific-skills/open-notebook",
"./scientific-skills/opentrons-integration",
"./scientific-skills/offer-k-dense-web",
"./scientific-skills/protocolsio-integration",
"./scientific-skills/get-available-resources",
"./scientific-skills/iso-13485-certification",
"./scientific-skills/edgartools",
"./scientific-skills/usfiscaldata",
"./scientific-skills/hedgefundmonitor",
"./scientific-skills/alpha-vantage"
]
}
]
}
================================================
FILE: .gitattributes
================================================
# Git LFS tracking for binary files
# Images
*.png filter=lfs diff=lfs merge=lfs -text
*.jpg filter=lfs diff=lfs merge=lfs -text
*.jpeg filter=lfs diff=lfs merge=lfs -text
*.gif filter=lfs diff=lfs merge=lfs -text
*.svg filter=lfs diff=lfs merge=lfs -text
*.webp filter=lfs diff=lfs merge=lfs -text
# Model weights and checkpoints
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
# Data files
*.parquet filter=lfs diff=lfs merge=lfs -text
*.feather filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
# Archives
*.zip filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tar.gz filter=lfs diff=lfs merge=lfs -text
================================================
FILE: .github/workflows/release.yml
================================================
name: Create Release
on:
push:
branches:
- main
paths:
- '.claude-plugin/marketplace.json'
workflow_dispatch:
permissions:
contents: write
jobs:
release:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v6
with:
fetch-depth: 0 # Fetch all history for release notes
- name: Extract version from marketplace.json
id: get_version
run: |
VERSION=$(jq -r '.metadata.version' .claude-plugin/marketplace.json)
echo "version=$VERSION" >> $GITHUB_OUTPUT
echo "tag=v$VERSION" >> $GITHUB_OUTPUT
echo "Extracted version: $VERSION"
- name: Check if tag already exists
id: check_tag
run: |
if git rev-parse "v${{ steps.get_version.outputs.version }}" >/dev/null 2>&1; then
echo "exists=true" >> $GITHUB_OUTPUT
echo "Tag v${{ steps.get_version.outputs.version }} already exists"
else
echo "exists=false" >> $GITHUB_OUTPUT
echo "Tag v${{ steps.get_version.outputs.version }} does not exist"
fi
- name: Get previous tag
id: previous_tag
if: steps.check_tag.outputs.exists == 'false'
run: |
PREVIOUS_TAG=$(git describe --tags --abbrev=0 2>/dev/null || echo "")
if [ -z "$PREVIOUS_TAG" ]; then
echo "previous_tag=" >> $GITHUB_OUTPUT
echo "No previous tag found"
else
echo "previous_tag=$PREVIOUS_TAG" >> $GITHUB_OUTPUT
echo "Previous tag: $PREVIOUS_TAG"
fi
- name: Generate release notes
id: release_notes
if: steps.check_tag.outputs.exists == 'false'
run: |
PREVIOUS_TAG="${{ steps.previous_tag.outputs.previous_tag }}"
# Start release notes
cat > release_notes.md << 'EOF'
## What's Changed
EOF
# Generate changelog from commits
if [ -n "$PREVIOUS_TAG" ]; then
echo "Changes since $PREVIOUS_TAG:" >> release_notes.md
echo "" >> release_notes.md
# Get commits with nice formatting
git log ${PREVIOUS_TAG}..HEAD --pretty=format:"* %s (%h)" --no-merges >> release_notes.md
else
echo "Initial release of Claude Scientific Skills" >> release_notes.md
echo "" >> release_notes.md
echo "This release includes:" >> release_notes.md
git log --pretty=format:"* %s (%h)" --no-merges --max-count=20 >> release_notes.md
fi
cat release_notes.md
- name: Create Release
if: steps.check_tag.outputs.exists == 'false'
uses: softprops/action-gh-release@v2
with:
tag_name: ${{ steps.get_version.outputs.tag }}
name: v${{ steps.get_version.outputs.version }}
body_path: release_notes.md
draft: false
prerelease: false
generate_release_notes: false
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Skip release creation
if: steps.check_tag.outputs.exists == 'true'
run: |
echo "Release v${{ steps.get_version.outputs.version }} already exists. Skipping release creation."
================================================
FILE: .gitignore
================================================
.claude
.DS_Store
temp/
pyproject.toml
uv.lock
.venv/
.python-version
main.py
__pycache__/
.env
scan_skills.py
================================================
FILE: LICENSE.md
================================================
MIT License
Copyright (c) 2025 K-Dense Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
# Claude Scientific Skills
[](LICENSE.md)
[](#whats-included)
[](#whats-included)
[](https://agentskills.io/)
[](#getting-started)
[](https://x.com/k_dense_ai)
[](https://www.linkedin.com/company/k-dense-inc)
[](https://www.youtube.com/@K-Dense-Inc)
A comprehensive collection of **170+ ready-to-use scientific and research skills** (now including cancer genomics, drug-target binding, molecular dynamics, RNA velocity, geospatial science, time series forecasting, FRED economic data, and more) for any AI agent that supports the open [Agent Skills](https://agentskills.io/) standard, created by [K-Dense](https://k-dense.ai). Works with **Cursor, Claude Code, Codex, and more**. Transform your AI agent into a research assistant capable of executing complex multi-step scientific workflows across biology, chemistry, medicine, and beyond.
<p align="center">
<a href="https://k-dense.ai">
<img src="docs/k-dense-web.gif" alt="K-Dense Web Demo" width="800"/>
</a>
<br/>
<em>The demo above shows <a href="https://k-dense.ai">K-Dense Web</a> — the hosted platform built on top of these skills. Claude Scientific Skills is the open-source skill collection; K-Dense Web is the full AI co-scientist platform with more power and zero setup.</em>
</p>
---
These skills enable your AI agent to seamlessly work with specialized scientific libraries, databases, and tools across multiple scientific domains. While the agent can use any Python package or API on its own, these explicitly defined skills provide curated documentation and examples that make it significantly stronger and more reliable for the workflows below:
- 🧬 Bioinformatics & Genomics - Sequence analysis, single-cell RNA-seq, gene regulatory networks, variant annotation, phylogenetic analysis
- 🧪 Cheminformatics & Drug Discovery - Molecular property prediction, virtual screening, ADMET analysis, molecular docking, lead optimization
- 🔬 Proteomics & Mass Spectrometry - LC-MS/MS processing, peptide identification, spectral matching, protein quantification
- 🏥 Clinical Research & Precision Medicine - Clinical trials, pharmacogenomics, variant interpretation, drug safety, clinical decision support, treatment planning
- 🧠 Healthcare AI & Clinical ML - EHR analysis, physiological signal processing, medical imaging, clinical prediction models
- 🖼️ Medical Imaging & Digital Pathology - DICOM processing, whole slide image analysis, computational pathology, radiology workflows
- 🤖 Machine Learning & AI - Deep learning, reinforcement learning, time series analysis, model interpretability, Bayesian methods
- 🔮 Materials Science & Chemistry - Crystal structure analysis, phase diagrams, metabolic modeling, computational chemistry
- 🌌 Physics & Astronomy - Astronomical data analysis, coordinate transformations, cosmological calculations, symbolic mathematics, physics computations
- ⚙️ Engineering & Simulation - Discrete-event simulation, multi-objective optimization, metabolic engineering, systems modeling, process optimization
- 📊 Data Analysis & Visualization - Statistical analysis, network analysis, time series, publication-quality figures, large-scale data processing, EDA
- 🌍 Geospatial Science & Remote Sensing - Satellite imagery processing, GIS analysis, spatial statistics, terrain analysis, machine learning for Earth observation
- 🧪 Laboratory Automation - Liquid handling protocols, lab equipment control, workflow automation, LIMS integration
- 📚 Scientific Communication - Literature review, peer review, scientific writing, document processing, posters, slides, schematics, citation management
- 🔬 Multi-omics & Systems Biology - Multi-modal data integration, pathway analysis, network biology, systems-level insights
- 🧬 Protein Engineering & Design - Protein language models, structure prediction, sequence design, function annotation
- 🎓 Research Methodology - Hypothesis generation, scientific brainstorming, critical thinking, grant writing, scholar evaluation
**Transform your AI coding agent into an 'AI Scientist' on your desktop!**
> ⭐ **If you find this repository useful**, please consider giving it a star! It helps others discover these tools and encourages us to continue maintaining and expanding this collection.
> 🎬 **New to Claude Scientific Skills?** Watch our [Getting Started with Claude Scientific Skills](https://youtu.be/ZxbnDaD_FVg) video for a quick walkthrough.
---
## 📦 What's Included
This repository provides **170 scientific and research skills** organized into the following categories:
- **250+ Scientific & Financial Databases** - Collectively, these skills provide access to over 250 databases and data sources. Dedicated skills cover PubMed, ChEMBL, UniProt, COSMIC, ClinicalTrials.gov, SEC EDGAR, Alpha Vantage, and more; multi-database packages like BioServices (~40 bioinformatics services + 30+ PSICQUIC interaction databases), BioPython (38 NCBI sub-databases via Entrez), and gget (20+ genomics databases) account for the rest
- **60+ Optimized Python Package Skills** - Explicitly defined skills for RDKit, Scanpy, PyTorch Lightning, scikit-learn, BioPython, pyzotero, BioServices, PennyLane, Qiskit, OpenMM, MDAnalysis, scVelo, TimesFM, and others — with curated documentation, examples, and best practices. Note: the agent can write code using *any* Python package, not just these; these skills simply provide stronger, more reliable performance for the packages listed
- **15+ Scientific Integration Skills** - Explicitly defined skills for Benchling, DNAnexus, LatchBio, OMERO, Protocols.io, and more. Again, the agent is not limited to these — any API or platform reachable from Python is fair game; these skills are the optimized, pre-documented paths
- **35+ Analysis & Communication Tools** - Literature review, scientific writing, peer review, document processing, posters, slides, schematics, infographics, Mermaid diagrams, and more
- **10+ Research & Clinical Tools** - Hypothesis generation, grant writing, clinical decision support, treatment plans, regulatory compliance
Each skill includes:
- ✅ Comprehensive documentation (`SKILL.md`)
- ✅ Practical code examples
- ✅ Use cases and best practices
- ✅ Integration guides
- ✅ Reference materials
---
## 📋 Table of Contents
- [What's Included](#whats-included)
- [Why Use This?](#why-use-this)
- [Getting Started](#getting-started)
- [Support Open Source](#-support-the-open-source-community)
- [Prerequisites](#prerequisites)
- [Quick Examples](#quick-examples)
- [Use Cases](#use-cases)
- [Available Skills](#available-skills)
- [Contributing](#contributing)
- [Troubleshooting](#troubleshooting)
- [FAQ](#faq)
- [Support](#support)
- [Join Our Community](#join-our-community)
- [Citation](#citation)
- [License](#license)
---
## 🚀 Why Use This?
### ⚡ **Accelerate Your Research**
- **Save Days of Work** - Skip API documentation research and integration setup
- **Production-Ready Code** - Tested, validated examples following scientific best practices
- **Multi-Step Workflows** - Execute complex pipelines with a single prompt
### 🎯 **Comprehensive Coverage**
- **170 Skills** - Extensive coverage across all major scientific domains
- **250+ Databases** - Collective access to 250+ databases and data sources spanning genomics, chemistry, clinical, financial, and more — through dedicated database skills and multi-database packages like BioServices, BioPython, and gget
- **60+ Optimized Python Package Skills** - RDKit, Scanpy, PyTorch Lightning, scikit-learn, BioServices, PennyLane, Qiskit, OpenMM, scVelo, TimesFM, and others (the agent can use any Python package; these are the pre-documented, higher-performing paths)
### 🔧 **Easy Integration**
- **Simple Setup** - Copy skills to your skills directory and start working
- **Automatic Discovery** - Your agent automatically finds and uses relevant skills
- **Well Documented** - Each skill includes examples, use cases, and best practices
### 🌟 **Maintained & Supported**
- **Regular Updates** - Continuously maintained and expanded by K-Dense team
- **Community Driven** - Open source with active community contributions
- **Enterprise Ready** - Commercial support available for advanced needs
---
## 🎯 Getting Started
Claude Scientific Skills follows the open [Agent Skills](https://agentskills.io/) standard. Simply copy the skill folders into your skills directory and your AI agent will automatically discover and use them.
### Step 1: Clone the Repository
```bash
git clone https://github.com/K-Dense-AI/claude-scientific-skills.git
```
### Step 2: Copy Skills to Your Skills Directory
Copy the individual skill folders from `scientific-skills/` to one of the supported skill directories below. You can install skills **globally** (available across all projects) or **per-project** (available only in that project).
**Global installation** (recommended — skills available everywhere):
| Tool | Directory |
|------|-----------|
| Cursor | `~/.cursor/skills/` |
| Claude Code | `~/.claude/skills/` |
| Codex | `~/.codex/skills/` |
| Gemini CLI | `~/.gemini/skills/` |
**Project-level installation** (skills scoped to a single project):
| Tool | Directory |
|------|-----------|
| Cursor | `.cursor/skills/` (in your project root) |
| Claude Code | `.claude/skills/` (in your project root) |
| Codex | `.codex/skills/` (in your project root) |
| Gemini CLI | `.gemini/skills/` (in your project root) |
> **Note:** Cursor also reads from `.claude/skills/`, `.codex/skills/`, and `.gemini/skills/` directories, and vice versa, so skills are cross-compatible between tools.
**Example — global install for Cursor:**
```bash
cp -r claude-scientific-skills/scientific-skills/* ~/.cursor/skills/
```
**Example — global install for Claude Code:**
```bash
cp -r claude-scientific-skills/scientific-skills/* ~/.claude/skills/
```
**Example — global install for Gemini CLI:**
```bash
cp -r claude-scientific-skills/scientific-skills/* ~/.gemini/skills/
```
**Example — project-level install:**
```bash
mkdir -p .cursor/skills
cp -r /path/to/claude-scientific-skills/scientific-skills/* .cursor/skills/
```
**That's it!** Your AI agent will automatically discover the skills and use them when relevant to your scientific tasks. You can also invoke any skill manually by mentioning the skill name in your prompt.
---
## ❤️ Support the Open Source Community
Claude Scientific Skills is powered by **50+ incredible open source projects** maintained by dedicated developers and research communities worldwide. Projects like Biopython, Scanpy, RDKit, scikit-learn, PyTorch Lightning, and many others form the foundation of these skills.
**If you find value in this repository, please consider supporting the projects that make it possible:**
- ⭐ **Star their repositories** on GitHub
- 💰 **Sponsor maintainers** via GitHub Sponsors or NumFOCUS
- 📝 **Cite projects** in your publications
- 💻 **Contribute** code, docs, or bug reports
👉 **[View the full list of projects to support](docs/open-source-sponsors.md)**
---
## ⚙️ Prerequisites
- **Python**: 3.9+ (3.12+ recommended for best compatibility)
- **uv**: Python package manager (required for installing skill dependencies)
- **Client**: Any agent that supports the [Agent Skills](https://agentskills.io/) standard (Cursor, Claude Code, Gemini CLI, Codex, etc.)
- **System**: macOS, Linux, or Windows with WSL2
- **Dependencies**: Automatically handled by individual skills (check `SKILL.md` files for specific requirements)
### Installing uv
The skills use `uv` as the package manager for installing Python dependencies. Install it using the instructions for your operating system:
**macOS and Linux:**
```bash
curl -LsSf https://astral.sh/uv/install.sh | sh
```
**Windows:**
```powershell
powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"
```
**Alternative (via pip):**
```bash
pip install uv
```
After installation, verify it works by running:
```bash
uv --version
```
For more installation options and details, visit the [official uv documentation](https://docs.astral.sh/uv/).
---
## 💡 Quick Examples
Once you've installed the skills, you can ask your AI agent to execute complex multi-step scientific workflows. Here are some example prompts:
### 🧪 Drug Discovery Pipeline
**Goal**: Find novel EGFR inhibitors for lung cancer treatment
**Prompt**:
```
Use available skills you have access to whenever possible. Query ChEMBL for EGFR inhibitors (IC50 < 50nM), analyze structure-activity relationships
with RDKit, generate improved analogs with datamol, perform virtual screening with DiffDock
against AlphaFold EGFR structure, search PubMed for resistance mechanisms, check COSMIC for
mutations, and create visualizations and a comprehensive report.
```
**Skills Used**: ChEMBL, RDKit, datamol, DiffDock, AlphaFold DB, PubMed, COSMIC, scientific visualization
*Need cloud GPUs and a publication-ready report at the end? [Run this on K-Dense Web free.](https://k-dense.ai)*
---
### 🔬 Single-Cell RNA-seq Analysis
**Goal**: Comprehensive analysis of 10X Genomics data with public data integration
**Prompt**:
```
Use available skills you have access to whenever possible. Load 10X dataset with Scanpy, perform QC and doublet removal, integrate with Cellxgene
Census data, identify cell types using NCBI Gene markers, run differential expression with
PyDESeq2, infer gene regulatory networks with Arboreto, enrich pathways via Reactome/KEGG,
and identify therapeutic targets with Open Targets.
```
**Skills Used**: Scanpy, Cellxgene Census, NCBI Gene, PyDESeq2, Arboreto, Reactome, KEGG, Open Targets
*Want zero-setup cloud execution and shareable outputs? [Try K-Dense Web free.](https://k-dense.ai)*
---
### 🧬 Multi-Omics Biomarker Discovery
**Goal**: Integrate RNA-seq, proteomics, and metabolomics to predict patient outcomes
**Prompt**:
```
Use available skills you have access to whenever possible. Analyze RNA-seq with PyDESeq2, process mass spec with pyOpenMS, integrate metabolites from
HMDB/Metabolomics Workbench, map proteins to pathways (UniProt/KEGG), find interactions via
STRING, correlate omics layers with statsmodels, build predictive model with scikit-learn,
and search ClinicalTrials.gov for relevant trials.
```
**Skills Used**: PyDESeq2, pyOpenMS, HMDB, Metabolomics Workbench, UniProt, KEGG, STRING, statsmodels, scikit-learn, ClinicalTrials.gov
*This pipeline is heavy on compute. [Run it on K-Dense Web with cloud GPUs, free to start.](https://k-dense.ai)*
---
### 🎯 Virtual Screening Campaign
**Goal**: Discover allosteric modulators for protein-protein interactions
**Prompt**:
```
Use available skills you have access to whenever possible. Retrieve AlphaFold structures, identify interaction interface with BioPython, search ZINC
for allosteric candidates (MW 300-500, logP 2-4), filter with RDKit, dock with DiffDock,
rank with DeepChem, check PubChem suppliers, search USPTO patents, and optimize leads with
MedChem/molfeat.
```
**Skills Used**: AlphaFold DB, BioPython, ZINC, RDKit, DiffDock, DeepChem, PubChem, USPTO, MedChem, molfeat
*Skip the local GPU bottleneck. [Run virtual screening on K-Dense Web free.](https://k-dense.ai)*
---
### 🏥 Clinical Variant Interpretation
**Goal**: Analyze VCF file for hereditary cancer risk assessment
**Prompt**:
```
Use available skills you have access to whenever possible. Parse VCF with pysam, annotate variants with Ensembl VEP, query ClinVar for pathogenicity,
check COSMIC for cancer mutations, retrieve gene info from NCBI Gene, analyze protein impact
with UniProt, search PubMed for case reports, check ClinPGx for pharmacogenomics, generate
clinical report with document processing tools, and find matching trials on ClinicalTrials.gov.
```
**Skills Used**: pysam, Ensembl, ClinVar, COSMIC, NCBI Gene, UniProt, PubMed, ClinPGx, Document Skills, ClinicalTrials.gov
*Need a polished clinical report at the end, not just code? [K-Dense Web delivers publication-ready outputs. Try it free.](https://k-dense.ai)*
---
### 🌐 Systems Biology Network Analysis
**Goal**: Analyze gene regulatory networks from RNA-seq data
**Prompt**:
```
Use available skills you have access to whenever possible. Query NCBI Gene for annotations, retrieve sequences from UniProt, identify interactions via
STRING, map to Reactome/KEGG pathways, analyze topology with Torch Geometric, reconstruct
GRNs with Arboreto, assess druggability with Open Targets, model with PyMC, visualize
networks, and search GEO for similar patterns.
```
**Skills Used**: NCBI Gene, UniProt, STRING, Reactome, KEGG, Torch Geometric, Arboreto, Open Targets, PyMC, GEO
*Want end-to-end pipelines with shareable outputs and no setup? [Try K-Dense Web free.](https://k-dense.ai)*
> 📖 **Want more examples?** Check out [docs/examples.md](docs/examples.md) for comprehensive workflow examples and detailed use cases across all scientific domains.
---
## 🚀 Want to Skip the Setup and Just Do the Science?
**Recognize any of these?**
- You spent more time configuring environments than running analyses
- Your workflow needs a GPU your local machine does not have
- You need a shareable, publication-ready figure or report, not just a script
- You want to run a complex multi-step pipeline right now, without reading package docs first
If so, **[K-Dense Web](https://k-dense.ai)** was built for you. It is the full AI co-scientist platform: everything in this repo plus cloud GPUs, 200+ skills, and outputs you can drop directly into a paper or presentation. Zero setup required.
| Feature | This Repo | K-Dense Web |
|---------|-----------|-------------|
| Scientific Skills | 170 skills | **200+ skills** (exclusive access) |
| Setup | Manual installation | **Zero setup, works instantly** |
| Compute | Your machine | **Cloud GPUs and HPC included** |
| Workflows | Prompt and code | **End-to-end research pipelines** |
| Outputs | Code and analysis | **Publication-ready figures, reports, and papers** |
| Integrations | Local tools | **Lab systems, ELNs, and cloud storage** |
> *"K-Dense Web took me from raw sequencing data to a draft figure in one afternoon. What used to take three days of environment setup and scripting now just works."*
> **Computational biologist, drug discovery**
> ### 💰 $50 in free credits, no credit card required
> Start running real scientific workflows in minutes.
>
> **[Try K-Dense Web free](https://k-dense.ai)**
*[k-dense.ai](https://k-dense.ai) | [Read the full comparison](https://k-dense.ai/blog/k-dense-web-vs-claude-scientific-skills)*
---
## 🔬 Use Cases
### 🧪 Drug Discovery & Medicinal Chemistry
- **Virtual Screening**: Screen millions of compounds from PubChem/ZINC against protein targets
- **Lead Optimization**: Analyze structure-activity relationships with RDKit, generate analogs with datamol
- **ADMET Prediction**: Predict absorption, distribution, metabolism, excretion, and toxicity with DeepChem
- **Molecular Docking**: Predict binding poses and affinities with DiffDock
- **Bioactivity Mining**: Query ChEMBL for known inhibitors and analyze SAR patterns
### 🧬 Bioinformatics & Genomics
- **Sequence Analysis**: Process DNA/RNA/protein sequences with BioPython and pysam
- **Single-Cell Analysis**: Analyze 10X Genomics data with Scanpy, identify cell types, infer GRNs with Arboreto
- **Variant Annotation**: Annotate VCF files with Ensembl VEP, query ClinVar for pathogenicity
- **Variant Database Management**: Build scalable VCF databases with TileDB-VCF for incremental sample addition, efficient population-scale queries, and compressed storage of genomic variant data
- **Gene Discovery**: Query NCBI Gene, UniProt, and Ensembl for comprehensive gene information
- **Network Analysis**: Identify protein-protein interactions via STRING, map to pathways (KEGG, Reactome)
### 🏥 Clinical Research & Precision Medicine
- **Clinical Trials**: Search ClinicalTrials.gov for relevant studies, analyze eligibility criteria
- **Variant Interpretation**: Annotate variants with ClinVar, COSMIC, and ClinPGx for pharmacogenomics
- **Drug Safety**: Query FDA databases for adverse events, drug interactions, and recalls
- **Precision Therapeutics**: Match patient variants to targeted therapies and clinical trials
### 🔬 Multi-Omics & Systems Biology
- **Multi-Omics Integration**: Combine RNA-seq, proteomics, and metabolomics data
- **Pathway Analysis**: Enrich differentially expressed genes in KEGG/Reactome pathways
- **Network Biology**: Reconstruct gene regulatory networks, identify hub genes
- **Biomarker Discovery**: Integrate multi-omics layers to predict patient outcomes
### 📊 Data Analysis & Visualization
- **Statistical Analysis**: Perform hypothesis testing, power analysis, and experimental design
- **Publication Figures**: Create publication-quality visualizations with matplotlib and seaborn
- **Network Visualization**: Visualize biological networks with NetworkX
- **Report Generation**: Generate comprehensive PDF reports with Document Skills
### 🧪 Laboratory Automation
- **Protocol Design**: Create Opentrons protocols for automated liquid handling
- **LIMS Integration**: Integrate with Benchling and LabArchives for data management
- **Workflow Automation**: Automate multi-step laboratory workflows
---
## 📚 Available Skills
This repository contains **170 scientific and research skills** organized across multiple domains. Each skill provides comprehensive documentation, code examples, and best practices for working with scientific libraries, databases, and tools.
### Skill Categories
> **Note:** The Python package and integration skills listed below are *explicitly defined* skills — curated with documentation, examples, and best practices for stronger, more reliable performance. They are not a ceiling: the agent can install and use *any* Python package or call *any* API, even without a dedicated skill. The skills listed simply make common workflows faster and more dependable.
#### 🧬 **Bioinformatics & Genomics** (20+ skills)
- Sequence analysis: BioPython, pysam, scikit-bio, BioServices
- Single-cell analysis: Scanpy, AnnData, scvi-tools, scVelo (RNA velocity), Arboreto, Cellxgene Census
- Genomic tools: gget, geniml, gtars, deepTools, FlowIO, Zarr, TileDB-VCF
- Phylogenetics: ETE Toolkit, Phylogenetics (MAFFT, IQ-TREE 2, FastTree)
#### 🧪 **Cheminformatics & Drug Discovery** (13+ skills)
- Molecular manipulation: RDKit, Datamol, Molfeat
- Deep learning: DeepChem, TorchDrug
- Docking & screening: DiffDock
- Molecular dynamics: OpenMM + MDAnalysis (MD simulation & trajectory analysis)
- Cloud quantum chemistry: Rowan (pKa, docking, cofolding)
- Drug-likeness: MedChem
- Binding affinities: BindingDB (Ki, Kd, IC50, EC50 for drug-target pairs)
- Benchmarks: PyTDC
#### 🔬 **Proteomics & Mass Spectrometry** (2 skills)
- Spectral processing: matchms, pyOpenMS
#### 🏥 **Clinical Research & Precision Medicine** (16+ skills)
- Clinical databases: ClinicalTrials.gov, ClinVar, ClinPGx, COSMIC, FDA Databases
- Cancer genomics: cBioPortal (somatic mutations, CNAs, expression, survival across 400+ studies), DepMap (cancer dependency scores, drug sensitivity)
- Disease-gene associations: Monarch Initiative (OMIM, ORPHANET, HPO, ClinVar, model organism data)
- Cancer imaging: NCI Imaging Data Commons (radiology & pathology datasets via idc-index)
- Healthcare AI: PyHealth, NeuroKit2, Clinical Decision Support
- Clinical documentation: Clinical Reports, Treatment Plans
- Variant analysis: Ensembl, NCBI Gene
#### 🖼️ **Medical Imaging & Digital Pathology** (3 skills)
- DICOM processing: pydicom
- Whole slide imaging: histolab, PathML
#### 🧠 **Neuroscience & Electrophysiology** (1 skill)
- Neural recordings: Neuropixels-Analysis (extracellular spikes, silicon probes, spike sorting)
#### 🤖 **Machine Learning & AI** (16+ skills)
- Deep learning: PyTorch Lightning, Transformers, Stable Baselines3, PufferLib
- Classical ML: scikit-learn, scikit-survival, SHAP
- Time series: aeon, TimesFM (Google's zero-shot foundation model for univariate forecasting)
- Bayesian methods: PyMC
- Optimization: PyMOO
- Graph ML: Torch Geometric
- Dimensionality reduction: UMAP-learn
- Statistical modeling: statsmodels
#### 🔮 **Materials Science, Chemistry & Physics** (7 skills)
- Materials: Pymatgen
- Metabolic modeling: COBRApy
- Astronomy: Astropy
- Quantum computing: Cirq, PennyLane, Qiskit, QuTiP
#### ⚙️ **Engineering & Simulation** (4 skills)
- Numerical computing: MATLAB/Octave
- Computational fluid dynamics: FluidSim
- Discrete-event simulation: SimPy
- Data processing: Dask, Polars, Vaex
#### 📊 **Data Analysis & Visualization** (17+ skills)
- Visualization: Matplotlib, Seaborn, Plotly, Scientific Visualization
- Geospatial analysis: GeoPandas, GeoMaster (remote sensing, GIS, satellite imagery, spatial ML, 500+ examples)
- Network analysis: NetworkX
- Symbolic math: SymPy
- Document processing: Document Skills (PDF, DOCX, PPTX, XLSX)
- Infographics: Infographics (AI-powered professional infographic creation)
- Diagrams: Markdown & Mermaid Writing (text-based diagrams as default documentation standard)
- Data access: Data Commons
- Exploratory data analysis: EDA workflows
- Statistical analysis: Statistical Analysis workflows
#### 🧪 **Laboratory Automation** (4 skills)
- Liquid handling: PyLabRobot
- Cloud lab: Ginkgo Cloud Lab (cell-free protein expression, fluorescent pixel art via autonomous RAC infrastructure)
- Protocol management: Protocols.io
- LIMS integration: Benchling, LabArchives
#### 🔬 **Multi-omics & Systems Biology** (5+ skills)
- Pathway analysis: KEGG, Reactome, STRING
- Multi-omics: Denario, HypoGeniC
- Data management: LaminDB
#### 🧬 **Protein Engineering & Design** (3 skills)
- Protein language models: ESM
- Glycoengineering: Glycoengineering (N/O-glycosylation prediction, therapeutic antibody optimization)
- Cloud laboratory platform: Adaptyv (automated protein testing and validation)
#### 📚 **Scientific Communication** (24+ skills)
- Literature: OpenAlex, PubMed, bioRxiv, Literature Review
- Advanced paper search: BGPT Paper Search (25+ structured fields per paper — methods, results, sample sizes, quality scores — from full text, not just abstracts)
- Web search: Perplexity Search (AI-powered search with real-time information), Parallel Web (synthesized summaries with citations)
- Research notebooks: Open Notebook (self-hosted NotebookLM alternative — PDFs, videos, audio, web pages; 16+ AI providers; multi-speaker podcast generation)
- Writing: Scientific Writing, Peer Review
- Document processing: XLSX, MarkItDown, Document Skills
- Publishing: Paper-2-Web, Venue Templates
- Presentations: Scientific Slides, LaTeX Posters, PPTX Posters
- Diagrams: Scientific Schematics, Markdown & Mermaid Writing
- Infographics: Infographics (10 types, 8 styles, colorblind-safe palettes)
- Citations: Citation Management
- Illustration: Generate Image (AI image generation with FLUX.2 Pro and Gemini 3 Pro (Nano Banana Pro))
#### 🔬 **Scientific Databases** (37+ dedicated skills → 250+ databases total)
> These 37+ skills each provide direct, optimized access to a named database. Collectively, however, these skills unlock **250+ databases and data sources** — multi-database packages like BioServices (~40 bioinformatics services + 30+ PSICQUIC interaction databases), BioPython (38 NCBI sub-databases via Entrez), and gget (20+ genomics databases) add far more coverage beyond what's listed here.
- Protein: UniProt, PDB, AlphaFold DB, InterPro (protein families, domains, Pfam, PANTHER, SMART + 11 others)
- Chemical: PubChem, ChEMBL, DrugBank, ZINC, HMDB, BindingDB (drug-target binding affinities)
- Genomic: Ensembl, NCBI Gene, GEO, ENA, GWAS Catalog, gnomAD (population allele frequencies, pLI/LOEUF), GTEx (tissue-specific expression, eQTLs), JASPAR (transcription factor binding site profiles)
- Literature: bioRxiv (preprints)
- Clinical: ClinVar, COSMIC, ClinicalTrials.gov, ClinPGx, FDA Databases, cBioPortal (cancer genomics), DepMap (cancer cell line dependencies), Monarch Initiative (rare disease, HPO, cross-species)
- Imaging: NCI Imaging Data Commons (radiology & pathology datasets)
- Pathways: KEGG, Reactome, STRING
- Targets: Open Targets
- Metabolomics: Metabolomics Workbench
- Enzymes: BRENDA
- Patents: USPTO
#### 🔧 **Infrastructure & Platforms** (6+ skills)
- Cloud compute: Modal
- Genomics platforms: DNAnexus, LatchBio
- Microscopy: OMERO
- Automation: Opentrons
- Resource detection: Get Available Resources
#### 🎓 **Research Methodology & Planning** (11+ skills)
- Ideation: Scientific Brainstorming, Hypothesis Generation
- Critical analysis: Scientific Critical Thinking, Scholar Evaluation
- Scenario analysis: What-If Oracle (multi-branch possibility exploration, risk analysis, strategic options)
- Multi-perspective deliberation: Consciousness Council (diverse expert viewpoints, devil's advocate analysis)
- Cognitive profiling: DHDNA Profiler (extract thinking patterns and cognitive signatures from any text)
- Funding: Research Grants
- Discovery: Research Lookup
- Market analysis: Market Research Reports
#### ⚖️ **Regulatory & Standards** (1 skill)
- Medical device standards: ISO 13485 Certification
#### 💹 **Financial & SEC Research** (5 skills)
- SEC filings & financial data: edgartools (10-K, 10-Q, 8-K, 13F, Form 4, XBRL, insider trading, institutional holdings)
- U.S. federal fiscal data: usfiscaldata (national debt, Daily/Monthly Treasury Statements, Treasury auctions, interest rates, exchange rates, savings bonds)
- Macroeconomic data: FRED (800,000+ economic time series from 100+ sources — GDP, unemployment, inflation, housing, regional data via Federal Reserve Economic Data API)
- Hedge fund systemic risk: hedgefundmonitor (OFR Hedge Fund Monitor API — Form PF aggregated stats, CFTC futures positioning, FICC sponsored repo, SCOOS dealer financing)
- Global market data: alpha-vantage (real-time & historical stocks, options, forex, crypto, commodities, economic indicators, 50+ technical indicators via Alpha Vantage API)
> 📖 **For complete details on all skills**, see [docs/scientific-skills.md](docs/scientific-skills.md)
> 💡 **Looking for practical examples?** Check out [docs/examples.md](docs/examples.md) for comprehensive workflow examples across all scientific domains.
---
## 🤝 Contributing
We welcome contributions to expand and improve this scientific skills repository!
### Ways to Contribute
✨ **Add New Skills**
- Create skills for additional scientific packages or databases
- Add integrations for scientific platforms and tools
📚 **Improve Existing Skills**
- Enhance documentation with more examples and use cases
- Add new workflows and reference materials
- Improve code examples and scripts
- Fix bugs or update outdated information
🐛 **Report Issues**
- Submit bug reports with detailed reproduction steps
- Suggest improvements or new features
### How to Contribute
1. **Fork** the repository
2. **Create** a feature branch (`git checkout -b feature/amazing-skill`)
3. **Follow** the existing directory structure and documentation patterns
4. **Ensure** all new skills include comprehensive `SKILL.md` files
5. **Test** your examples and workflows thoroughly
6. **Commit** your changes (`git commit -m 'Add amazing skill'`)
7. **Push** to your branch (`git push origin feature/amazing-skill`)
8. **Submit** a pull request with a clear description of your changes
### Contribution Guidelines
✅ **Adhere to the [Agent Skills Specification](https://agentskills.io/specification)** — Every skill must follow the official spec (valid `SKILL.md` frontmatter, naming conventions, directory structure)
✅ Maintain consistency with existing skill documentation format
✅ Ensure all code examples are tested and functional
✅ Follow scientific best practices in examples and workflows
✅ Update relevant documentation when adding new capabilities
✅ Provide clear comments and docstrings in code
✅ Include references to official documentation
### Security Scanning
All skills in this repository are security-scanned using [Cisco AI Defense Skill Scanner](https://github.com/cisco-ai-defense/skill-scanner), an open-source tool that detects prompt injection, data exfiltration, and malicious code patterns in Agent Skills.
If you are contributing a new skill, we recommend running the scanner locally before submitting a pull request:
```bash
uv pip install cisco-ai-skill-scanner
skill-scanner scan /path/to/your/skill --use-behavioral
```
> **Note:** A clean scan result reduces noise in review, but does not guarantee a skill is free of all risk. Contributed skills are also reviewed manually before merging.
### Recognition
Contributors are recognized in our community and may be featured in:
- Repository contributors list
- Special mentions in release notes
- K-Dense community highlights
Your contributions help make scientific computing more accessible and enable researchers to leverage AI tools more effectively!
### Support Open Source
This project builds on 50+ amazing open source projects. If you find value in these skills, please consider [supporting the projects we depend on](docs/open-source-sponsors.md).
---
## 🔧 Troubleshooting
### Common Issues
**Problem: Skills not loading**
- Verify skill folders are in the correct directory (see [Getting Started](#getting-started))
- Each skill folder must contain a `SKILL.md` file
- Restart your agent/IDE after copying skills
- In Cursor, check Settings → Rules to confirm skills are discovered
**Problem: Missing Python dependencies**
- Solution: Check the specific `SKILL.md` file for required packages
- Install dependencies: `uv pip install package-name`
**Problem: API rate limits**
- Solution: Many databases have rate limits. Review the specific database documentation
- Consider implementing caching or batch requests
**Problem: Authentication errors**
- Solution: Some services require API keys. Check the `SKILL.md` for authentication setup
- Verify your credentials and permissions
**Problem: Outdated examples**
- Solution: Report the issue via GitHub Issues
- Check the official package documentation for updated syntax
---
## ❓ FAQ
### General Questions
**Q: Is this free to use?**
A: Yes! This repository is MIT licensed. However, each individual skill has its own license specified in the `license` metadata field within its `SKILL.md` file—be sure to review and comply with those terms.
**Q: Why are all skills grouped together instead of separate packages?**
A: We believe good science in the age of AI is inherently interdisciplinary. Bundling all skills together makes it trivial for you (and your agent) to bridge across fields—e.g., combining genomics, cheminformatics, clinical data, and machine learning in one workflow—without worrying about which individual skills to install or wire together.
**Q: Can I use this for commercial projects?**
A: The repository itself is MIT licensed, which allows commercial use. However, individual skills may have different licenses—check the `license` field in each skill's `SKILL.md` file to ensure compliance with your intended use.
**Q: Do all skills have the same license?**
A: No. Each skill has its own license specified in the `license` metadata field within its `SKILL.md` file. These licenses may differ from the repository's MIT License. Users are responsible for reviewing and adhering to the license terms of each individual skill they use.
**Q: How often is this updated?**
A: We regularly update skills to reflect the latest versions of packages and APIs. Major updates are announced in release notes.
**Q: Can I use this with other AI models?**
A: The skills follow the open [Agent Skills](https://agentskills.io/) standard and work with any compatible agent, including Cursor, Claude Code, and Codex.
### Installation & Setup
**Q: Do I need all the Python packages installed?**
A: No! Only install the packages you need. Each skill specifies its requirements in its `SKILL.md` file.
**Q: What if a skill doesn't work?**
A: First check the [Troubleshooting](#troubleshooting) section. If the issue persists, file an issue on GitHub with detailed reproduction steps.
**Q: Do the skills work offline?**
A: Database skills require internet access to query APIs. Package skills work offline once Python dependencies are installed.
### Contributing
**Q: Can I contribute my own skills?**
A: Absolutely! We welcome contributions. See the [Contributing](#contributing) section for guidelines and best practices.
**Q: How do I report bugs or suggest features?**
A: Open an issue on GitHub with a clear description. For bugs, include reproduction steps and expected vs actual behavior.
---
## 💬 Support
Need help? Here's how to get support:
- 📖 **Documentation**: Check the relevant `SKILL.md` and `references/` folders
- 🐛 **Bug Reports**: [Open an issue](https://github.com/K-Dense-AI/claude-scientific-skills/issues)
- 💡 **Feature Requests**: [Submit a feature request](https://github.com/K-Dense-AI/claude-scientific-skills/issues/new)
- 💼 **Enterprise Support**: Contact [K-Dense](https://k-dense.ai/) for commercial support
- 🌐 **Community**: [Join our Slack](https://join.slack.com/t/k-densecommunity/shared_invite/zt-3iajtyls1-EwmkwIZk0g_o74311Tkf5g)
---
## 🎉 Join Our Community!
**We'd love to have you join us!** 🚀
Connect with other scientists, researchers, and AI enthusiasts using AI agents for scientific computing. Share your discoveries, ask questions, get help with your projects, and collaborate with the community!
🌟 **[Join our Slack Community](https://join.slack.com/t/k-densecommunity/shared_invite/zt-3iajtyls1-EwmkwIZk0g_o74311Tkf5g)** 🌟
Whether you're just getting started or you're a power user, our community is here to support you. We share tips, troubleshoot issues together, showcase cool projects, and discuss the latest developments in AI-powered scientific research.
**See you there!** 💬
---
## 📖 Citation
If you use Claude Scientific Skills in your research or project, please cite it as:
### BibTeX
```bibtex
@software{claude_scientific_skills_2026,
author = {{K-Dense Inc.}},
title = {Claude Scientific Skills: A Comprehensive Collection of Scientific Tools for Claude AI},
year = {2026},
url = {https://github.com/K-Dense-AI/claude-scientific-skills},
note = {skills covering databases, packages, integrations, and analysis tools}
}
```
### APA
```
K-Dense Inc. (2026). Claude Scientific Skills: A comprehensive collection of scientific tools for Claude AI [Computer software]. https://github.com/K-Dense-AI/claude-scientific-skills
```
### MLA
```
K-Dense Inc. Claude Scientific Skills: A Comprehensive Collection of Scientific Tools for Claude AI. 2026, github.com/K-Dense-AI/claude-scientific-skills.
```
### Plain Text
```
Claude Scientific Skills by K-Dense Inc. (2026)
Available at: https://github.com/K-Dense-AI/claude-scientific-skills
```
We appreciate acknowledgment in publications, presentations, or projects that benefit from these skills!
---
## 📄 License
This project is licensed under the **MIT License**.
**Copyright © 2026 K-Dense Inc.** ([k-dense.ai](https://k-dense.ai/))
### Key Points:
- ✅ **Free for any use** (commercial and noncommercial)
- ✅ **Open source** - modify, distribute, and use freely
- ✅ **Permissive** - minimal restrictions on reuse
- ⚠️ **No warranty** - provided "as is" without warranty of any kind
See [LICENSE.md](LICENSE.md) for full terms.
### Individual Skill Licenses
> ⚠️ **Important**: Each skill has its own license specified in the `license` metadata field within its `SKILL.md` file. These licenses may differ from the repository's MIT License and may include additional terms or restrictions. **Users are responsible for reviewing and adhering to the license terms of each individual skill they use.**
## Star History
[](https://www.star-history.com/#K-Dense-AI/claude-scientific-skills&type=date&legend=top-left)
================================================
FILE: docs/examples.md
================================================
# Real-World Scientific Examples
This document provides comprehensive, practical examples demonstrating how to combine Claude Scientific Skills to solve real scientific problems across multiple domains.
---
## 📋 Table of Contents
1. [Drug Discovery & Medicinal Chemistry](#drug-discovery--medicinal-chemistry)
2. [Cancer Genomics & Precision Medicine](#cancer-genomics--precision-medicine)
3. [Single-Cell Transcriptomics](#single-cell-transcriptomics)
4. [Protein Structure & Function](#protein-structure--function)
5. [Chemical Safety & Toxicology](#chemical-safety--toxicology)
6. [Clinical Trial Analysis](#clinical-trial-analysis)
7. [Metabolomics & Systems Biology](#metabolomics--systems-biology)
8. [Materials Science & Chemistry](#materials-science--chemistry)
9. [Digital Pathology](#digital-pathology)
10. [Lab Automation & Protocol Design](#lab-automation--protocol-design)
11. [Agricultural Genomics](#agricultural-genomics)
12. [Neuroscience & Brain Imaging](#neuroscience--brain-imaging)
13. [Environmental Microbiology](#environmental-microbiology)
14. [Infectious Disease Research](#infectious-disease-research)
15. [Multi-Omics Integration](#multi-omics-integration)
16. [Computational Chemistry & Synthesis](#computational-chemistry--synthesis)
17. [Clinical Research & Real-World Evidence](#clinical-research--real-world-evidence)
18. [Experimental Physics & Data Analysis](#experimental-physics--data-analysis)
19. [Chemical Engineering & Process Optimization](#chemical-engineering--process-optimization)
20. [Scientific Illustration & Visual Communication](#scientific-illustration--visual-communication)
21. [Quantum Computing for Chemistry](#quantum-computing-for-chemistry)
22. [Research Grant Writing](#research-grant-writing)
23. [Flow Cytometry & Immunophenotyping](#flow-cytometry--immunophenotyping)
---
## Drug Discovery & Medicinal Chemistry
### Example 1: Discovery of Novel EGFR Inhibitors for Lung Cancer
**Objective**: Identify novel small molecule inhibitors of EGFR with improved properties compared to existing drugs.
**Skills Used**:
- `chembl-database` - Query bioactivity data
- `pubchem-database` - Search compound libraries
- `rdkit` - Analyze molecular properties
- `datamol` - Generate analogs
- `medchem` - Medicinal chemistry filters
- `molfeat` - Molecular featurization
- `diffdock` - Molecular docking
- `alphafold-database` - Retrieve protein structure
- `pubmed-database` - Literature review
- `cosmic-database` - Query mutations
- `deepchem` - Property prediction
- `torchdrug` - Graph neural networks for molecules
- `scientific-visualization` - Create figures
- `clinical-reports` - Generate PDF reports
**Workflow**:
```bash
# Always use available 'skills' when possible. Keep the output organized.
Step 1: Query ChEMBL for known EGFR inhibitors with high potency
- Search for compounds targeting EGFR (CHEMBL203)
- Filter: IC50 < 50 nM, pChEMBL value > 7
- Extract SMILES strings and activity data
- Export to DataFrame for analysis
Step 2: Analyze structure-activity relationships
- Load compounds into RDKit
- Calculate molecular descriptors (MW, LogP, TPSA, HBD, HBA)
- Generate Morgan fingerprints (radius=2, 2048 bits)
- Perform hierarchical clustering to identify scaffolds
- Visualize top scaffolds with activity annotations
Step 3: Identify resistance mutations from COSMIC
- Query COSMIC for EGFR mutations in lung cancer
- Focus on gatekeeper mutations (T790M, C797S)
- Extract mutation frequencies and clinical significance
- Cross-reference with literature in PubMed
Step 4: Retrieve EGFR structure from AlphaFold
- Download AlphaFold prediction for EGFR kinase domain
- Alternatively, use experimental structure from PDB (if available)
- Prepare structure for docking (add hydrogens, optimize)
Step 5: Generate novel analogs using datamol
- Select top 5 scaffolds from ChEMBL analysis
- Use scaffold decoration to generate 100 analogs per scaffold
- Apply Lipinski's Rule of Five filtering
- Ensure synthetic accessibility (SA score < 4)
- Check for PAINS and unwanted substructures
Step 6: Predict properties with DeepChem
- Train graph convolutional model on ChEMBL EGFR data
- Predict pIC50 for generated analogs
- Predict ADMET properties (solubility, permeability, hERG)
- Rank candidates by predicted potency and drug-likeness
Step 7: Virtual screening with DiffDock
- Perform molecular docking on top 50 candidates
- Dock into wild-type EGFR and T790M mutant
- Calculate binding energies and interaction patterns
- Identify compounds with favorable binding to both forms
Step 8: Search PubChem for commercial availability
- Query PubChem for top 10 candidates by InChI key
- Check supplier information and purchasing options
- Identify close analogs if exact matches unavailable
Step 9: Literature validation with PubMed
- Search for any prior art on top scaffolds
- Query: "[scaffold_name] AND EGFR AND inhibitor"
- Summarize relevant findings and potential liabilities
Step 10: Create comprehensive report
- Generate 2D structure visualizations of top hits
- Create scatter plots: MW vs LogP, TPSA vs potency
- Produce binding pose figures for top 3 compounds
- Generate table comparing properties to approved drugs (gefitinib, erlotinib)
- Write scientific summary with methodology, results, and recommendations
- Export to PDF with proper citations
Expected Output:
- Ranked list of 10-20 novel EGFR inhibitor candidates
- Predicted activity and ADMET properties
- Docking poses and binding analysis
- Comprehensive scientific report with publication-quality figures
```
---
### Example 2: Drug Repurposing for Rare Diseases
**Objective**: Identify FDA-approved drugs that could be repurposed for treating a rare metabolic disorder.
**Skills Used**:
- `drugbank-database` - Query approved drugs
- `opentargets-database` - Target-disease associations
- `string-database` - Protein interactions
- `kegg-database` - Pathway analysis
- `reactome-database` - Pathway enrichment
- `clinicaltrials-database` - Check ongoing trials
- `fda-database` - Drug approvals and safety
- `networkx` - Network analysis
- `bioservices` - Biological database queries
- `literature-review` - Systematic review
- `openalex-database` - Academic literature search
- `biorxiv-database` - Preprint search
**Workflow**:
```bash
Step 1: Define disease pathway
- Query KEGG and Reactome for disease-associated pathways
- Identify key proteins and enzymes involved
- Map upstream and downstream pathway components
Step 2: Find protein-protein interactions
- Query STRING database for interaction partners
- Build protein interaction network around key disease proteins
- Identify hub proteins and bottlenecks using NetworkX
- Calculate centrality metrics (betweenness, closeness)
Step 3: Query Open Targets for druggable targets
- Search for targets associated with disease phenotype
- Filter by clinical precedence and tractability
- Prioritize targets with existing approved drugs
Step 4: Search DrugBank for drugs targeting identified proteins
- Query for approved drugs and their targets
- Filter by mechanism of action relevant to disease
- Retrieve drug properties and safety information
Step 5: Query FDA databases for safety profiles
- Check FDA adverse event database (FAERS)
- Review drug labels and black box warnings
- Assess risk-benefit for rare disease population
Step 6: Search ClinicalTrials.gov for prior repurposing attempts
- Query for disease name + drug names
- Check for failed trials (and reasons for failure)
- Identify ongoing trials that may compete
Step 7: Perform pathway enrichment analysis
- Map drug targets to disease pathways
- Calculate enrichment scores with Reactome
- Identify drugs affecting multiple pathway nodes
Step 8: Conduct systematic literature review
- Search PubMed for drug name + disease associations
- Include bioRxiv for recent unpublished findings
- Document any case reports or off-label use
- Use literature-review skill to generate comprehensive review
Step 9: Prioritize candidates
- Rank by: pathway relevance, safety profile, existing evidence
- Consider factors: oral availability, blood-brain barrier penetration
- Assess commercial viability and patent status
Step 10: Generate repurposing report
- Create network visualization of drug-target-pathway relationships
- Generate comparison table of top 5 candidates
- Write detailed rationale for each candidate
- Include mechanism of action diagrams
- Provide recommendations for preclinical validation
- Format as professional PDF with citations
Expected Output:
- Ranked list of 5-10 repurposing candidates
- Network analysis of drug-target-disease relationships
- Safety and efficacy evidence summary
- Repurposing strategy report with next steps
```
---
## Cancer Genomics & Precision Medicine
### Example 3: Clinical Variant Interpretation Pipeline
**Objective**: Analyze a patient's tumor sequencing data to identify actionable mutations and therapeutic recommendations.
**Skills Used**:
- `pysam` - Parse VCF files
- `ensembl-database` - Variant annotation
- `gget` - Unified gene/protein data retrieval
- `clinvar-database` - Clinical significance
- `cosmic-database` - Somatic mutations
- `gene-database` - Gene information
- `uniprot-database` - Protein impact
- `clinpgx-database` - Pharmacogenomics data
- `drugbank-database` - Drug-gene associations
- `clinicaltrials-database` - Matching trials
- `opentargets-database` - Target validation
- `pubmed-database` - Literature evidence
- `clinical-reports` - Generate clinical report PDF
**Workflow**:
```bash
Step 1: Parse and filter VCF file
- Use pysam to read tumor VCF
- Filter for high-quality variants (QUAL > 30, DP > 20)
- Extract variant positions, alleles, and VAF (variant allele frequency)
- Separate SNVs, indels, and structural variants
Step 2: Annotate variants with Ensembl
- Query Ensembl VEP API for functional consequences
- Classify variants: missense, nonsense, frameshift, splice site
- Extract transcript information and protein changes
- Identify canonical transcripts for each gene
Step 3: Query ClinVar for known pathogenic variants
- Search ClinVar by genomic coordinates
- Extract clinical significance classifications
- Note conflicting interpretations and review status
- Prioritize variants with "Pathogenic" or "Likely Pathogenic" labels
Step 4: Query COSMIC for somatic cancer mutations
- Search COSMIC for each variant
- Extract mutation frequency across cancer types
- Identify hotspot mutations (high recurrence)
- Note drug resistance mutations
Step 5: Retrieve gene information from NCBI Gene
- Get detailed gene descriptions
- Extract associated phenotypes and diseases
- Identify oncogene vs tumor suppressor classification
- Note gene function and biological pathways
Step 6: Assess protein-level impact with UniProt
- Query UniProt for protein domain information
- Map variants to functional domains (kinase domain, binding site)
- Check if variant affects active sites or protein stability
- Retrieve post-translational modification sites
Step 7: Search DrugBank for targetable alterations
- Query for drugs targeting mutated genes
- Filter for FDA-approved and investigational drugs
- Extract mechanism of action and indications
- Prioritize variants with approved targeted therapies
Step 8: Query Open Targets for target-disease associations
- Validate therapeutic hypotheses
- Assess target tractability scores
- Review clinical precedence for each gene-disease pair
Step 9: Search ClinicalTrials.gov for matching trials
- Build query with: cancer type + gene names + variants
- Filter for: recruiting status, phase II/III trials
- Extract trial eligibility criteria
- Note geographic locations and contact information
Step 10: Literature search for clinical evidence
- PubMed query: "[gene] AND [variant] AND [cancer type]"
- Focus on: case reports, clinical outcomes, resistance mechanisms
- Extract relevant prognostic or predictive information
Step 11: Classify variants by actionability
Tier 1: FDA-approved therapy for this variant
Tier 2: Clinical trial available for this variant
Tier 3: Therapy approved for variant in different cancer
Tier 4: Biological evidence but no approved therapy
Step 12: Generate clinical genomics report
- Executive summary of key findings
- Table of actionable variants with evidence levels
- Therapeutic recommendations with supporting evidence
- Clinical trial options with eligibility information
- Prognostic implications based on mutation profile
- References to guidelines (NCCN, ESMO, AMP/ASCO/CAP)
- Generate professional PDF using clinical-reports skill
Expected Output:
- Annotated variant list with clinical significance
- Tiered list of actionable mutations
- Therapeutic recommendations with evidence levels
- Matching clinical trials
- Comprehensive clinical genomics report (PDF)
```
---
### Example 4: Cancer Subtype Classification from Gene Expression
**Objective**: Classify breast cancer subtypes using RNA-seq data and identify subtype-specific therapeutic vulnerabilities.
**Skills Used**:
- `pydeseq2` - Differential expression
- `scanpy` - Clustering and visualization
- `scikit-learn` - Machine learning classification
- `gene-database` - Gene annotation
- `gget` - Gene data retrieval
- `reactome-database` - Pathway analysis
- `opentargets-database` - Drug targets
- `pubmed-database` - Literature validation
- `matplotlib` - Visualization
- `seaborn` - Heatmaps
- `plotly` - Interactive visualization
- `scikit-survival` - Survival analysis
**Workflow**:
```bash
Step 1: Load and preprocess RNA-seq data
- Load count matrix (genes × samples)
- Filter low-expression genes (mean counts < 10)
- Normalize with DESeq2 size factors
- Apply variance-stabilizing transformation (VST)
Step 2: Classify samples using PAM50 genes
- Query NCBI Gene for PAM50 classifier gene list
- Extract expression values for PAM50 genes
- Train Random Forest classifier on labeled training data
- Predict subtypes: Luminal A, Luminal B, HER2+, Basal, Normal-like
- Validate with published markers (ESR1, PGR, ERBB2, MKI67)
Step 3: Perform differential expression for each subtype
- Use PyDESeq2 to compare each subtype vs all others
- Apply multiple testing correction (FDR < 0.05)
- Filter by log2 fold change (|LFC| > 1.5)
- Identify subtype-specific signature genes
Step 4: Annotate differentially expressed genes
- Query NCBI Gene for detailed annotations
- Classify as oncogene, tumor suppressor, or other
- Extract biological process and molecular function terms
Step 5: Pathway enrichment analysis
- Submit gene lists to Reactome API
- Identify enriched pathways for each subtype (p < 0.01)
- Focus on druggable pathways (kinase signaling, metabolism)
- Compare pathway profiles across subtypes
Step 6: Identify therapeutic targets with Open Targets
- Query Open Targets for each upregulated gene
- Filter by tractability score > 5
- Prioritize targets with clinical precedence
- Extract associated drugs and development phase
Step 7: Create comprehensive visualization
- Generate UMAP projection of all samples colored by subtype
- Create heatmap of PAM50 genes across subtypes
- Produce volcano plots for each subtype comparison
- Generate pathway enrichment dot plots
- Create drug target-pathway network diagrams
Step 8: Literature validation
- Search PubMed for each predicted therapeutic target
- Query: "[gene] AND [subtype] AND breast cancer AND therapy"
- Summarize clinical evidence and ongoing trials
- Note any resistance mechanisms reported
Step 9: Generate subtype-specific recommendations
For each subtype:
- List top 5 differentially expressed genes
- Identify enriched biological pathways
- Recommend therapeutic strategies based on vulnerabilities
- Cite supporting evidence from literature
Step 10: Create comprehensive report
- Classification results with confidence scores
- Differential expression tables for each subtype
- Pathway enrichment summaries
- Therapeutic target recommendations
- Publication-quality figures
- Export to PDF with citations
Expected Output:
- Sample classification into molecular subtypes
- Subtype-specific gene signatures
- Pathway enrichment profiles
- Prioritized therapeutic targets for each subtype
- Scientific report with visualizations and recommendations
```
---
## Single-Cell Transcriptomics
### Example 5: Single-Cell Atlas of Tumor Microenvironment
**Objective**: Characterize immune cell populations in tumor microenvironment and identify immunotherapy biomarkers.
**Skills Used**:
- `scanpy` - Single-cell analysis
- `scvi-tools` - Batch correction and integration
- `cellxgene-census` - Reference data
- `gene-database` - Cell type markers
- `gget` - Gene data retrieval
- `anndata` - Data structure
- `arboreto` - Gene regulatory networks
- `pytorch-lightning` - Deep learning
- `matplotlib` - Visualization
- `plotly` - Interactive visualization
- `statistical-analysis` - Hypothesis testing
- `geniml` - Genomic ML embeddings
**Workflow**:
```bash
Step 1: Load and QC 10X Genomics data
- Use Scanpy to read 10X h5 files
- Calculate QC metrics: n_genes, n_counts, pct_mitochondrial
- Identify mitochondrial genes (MT- prefix)
- Filter cells: 200 < n_genes < 5000, pct_mt < 20%
- Filter genes: expressed in at least 10 cells
- Document filtering criteria and cell retention rate
Step 2: Normalize and identify highly variable genes
- Normalize to 10,000 counts per cell
- Log-transform data (log1p)
- Store raw counts in adata.raw
- Identify 3,000 highly variable genes
- Regress out technical variation (n_counts, pct_mt)
- Scale to unit variance, clip at 10 standard deviations
Step 3: Integrate with reference atlas using scVI
- Download reference tumor microenvironment data from Cellxgene Census
- Train scVI model on combined dataset for batch correction
- Use scVI latent representation for downstream analysis
- Generate batch-corrected expression matrix
Step 4: Dimensionality reduction and clustering
- Compute neighborhood graph (n_neighbors=15, n_pcs=50)
- Calculate UMAP embedding for visualization
- Perform Leiden clustering at multiple resolutions (0.3, 0.5, 0.8)
- Select optimal resolution based on silhouette score
Step 5: Identify cell type markers
- Run differential expression for each cluster (Wilcoxon test)
- Calculate marker scores (log fold change, p-value, pct expressed)
- Query NCBI Gene for canonical immune cell markers:
* T cells: CD3D, CD3E, CD4, CD8A
* B cells: CD19, MS4A1 (CD20), CD79A
* Myeloid: CD14, CD68, CD163
* NK cells: NKG7, GNLY, NCAM1
* Dendritic: CD1C, CLEC9A, LILRA4
Step 6: Annotate cell types
- Assign cell type labels based on marker expression
- Refine annotations with CellTypist or manual curation
- Identify T cell subtypes: CD4+, CD8+, Tregs, exhausted T cells
- Characterize myeloid cells: M1/M2 macrophages, dendritic cells
- Create cell type proportion tables by sample/condition
Step 7: Identify tumor-specific features
- Compare tumor samples vs normal tissue (if available)
- Identify expanded T cell clones (high proliferation markers)
- Detect exhausted T cells (PDCD1, CTLA4, LAG3, HAVCR2)
- Characterize immunosuppressive populations (Tregs, M2 macrophages)
Step 8: Gene regulatory network inference
- Use Arboreto/GRNBoost2 on each major cell type
- Identify transcription factors driving cell states
- Focus on exhaustion TFs: TOX, TCF7, EOMES
- Build regulatory networks for visualization
Step 9: Statistical analysis of cell proportions
- Calculate cell type frequencies per sample
- Test for significant differences between groups (responders vs non-responders)
- Use statistical-analysis skill for appropriate tests (t-test, Mann-Whitney)
- Calculate effect sizes and confidence intervals
Step 10: Biomarker discovery for immunotherapy response
- Correlate cell type abundances with clinical response
- Identify gene signatures associated with response
- Test signatures: T cell exhaustion, antigen presentation, inflammation
- Validate with published immunotherapy response signatures
Step 11: Create comprehensive visualizations
- UMAP plots colored by: cell type, sample, treatment, key genes
- Dot plots of canonical markers across cell types
- Cell type proportion bar plots by condition
- Heatmap of top differentially expressed genes per cell type
- Gene regulatory network diagrams
- Volcano plots for differentially abundant cell types
Step 12: Generate scientific report
- Methods: QC, normalization, batch correction, clustering
- Results: Cell type composition, differential abundance, markers
- Biomarker analysis: Predictive signatures and validation
- High-quality figures suitable for publication
- Export processed h5ad file and PDF report
Expected Output:
- Annotated single-cell atlas with cell type labels
- Cell type composition analysis
- Biomarker signatures for immunotherapy response
- Gene regulatory networks for key cell states
- Comprehensive report with publication-quality figures
```
---
## Protein Structure & Function
### Example 6: Structure-Based Design of Protein-Protein Interaction Inhibitors
**Objective**: Design small molecules to disrupt a therapeutically relevant protein-protein interaction.
**Skills Used**:
- `alphafold-database` - Protein structures
- `pdb-database` - Experimental structures
- `uniprot-database` - Protein information
- `biopython` - Structure analysis
- `esm` - Protein language models and embeddings
- `rdkit` - Chemical library generation
- `datamol` - Molecule manipulation
- `diffdock` - Molecular docking
- `zinc-database` - Screening library
- `deepchem` - Property prediction
- `scientific-visualization` - Structure visualization
- `medchem` - Medicinal chemistry filters
**Workflow**:
```bash
Step 1: Retrieve protein structures
- Query AlphaFold Database for both proteins in the interaction
- Download PDB files and confidence scores
- If available, get experimental structures from PDB database
- Compare AlphaFold predictions with experimental structures (if any)
Step 2: Analyze protein interaction interface
- Load structures with BioPython
- Identify interface residues (distance < 5Å between proteins)
- Calculate interface area and binding energy contribution
- Identify hot spot residues (key for binding)
- Map to UniProt to get functional annotations
Step 3: Characterize binding pocket
- Identify cavities at the protein-protein interface
- Calculate pocket volume and surface area
- Assess druggability: depth, hydrophobicity, shape
- Identify hydrogen bond donors/acceptors
- Note any known allosteric sites
Step 4: Query UniProt for known modulators
- Search UniProt for both proteins
- Extract information on known inhibitors or modulators
- Review PTMs that affect interaction
- Check disease-associated mutations in interface
Step 5: Search ZINC15 for fragment library
- Query ZINC for fragments matching pocket criteria:
* Molecular weight: 150-300 Da
* LogP: 0-3 (appropriate for PPI inhibitors)
* Exclude PAINS and aggregators
- Download 1,000-5,000 fragment SMILES
Step 6: Virtual screening with fragment library
- Use DiffDock to dock fragments into interface pocket
- Rank by predicted binding affinity
- Identify fragments binding to hot spot residues
- Select top 50 fragments for elaboration
Step 7: Fragment elaboration with RDKit
- For each fragment hit, generate elaborated molecules:
* Add substituents to core scaffold
* Merge fragments binding to adjacent pockets
* Apply medicinal chemistry filters
- Generate 20-50 analogs per fragment
- Filter by Lipinski's Ro5 and PPI-specific rules (MW 400-700)
Step 8: Second round of virtual screening
- Dock elaborated molecules with DiffDock
- Calculate binding energies and interaction patterns
- Prioritize molecules with:
* Strong binding to hot spot residues
* Multiple H-bonds and hydrophobic contacts
* Favorable predicted ΔG
Step 9: Predict ADMET properties with DeepChem
- Train models on ChEMBL data
- Predict: solubility, permeability, hERG liability
- Filter for drug-like properties
- Rank by overall score (affinity + ADMET)
Step 10: Literature and patent search
- PubMed: "[protein A] AND [protein B] AND inhibitor"
- USPTO: Check for prior art on top scaffolds
- Assess freedom to operate
- Identify any reported PPI inhibitors for this target
Step 11: Prepare molecules for synthesis
- Assess synthetic accessibility (SA score < 4)
- Identify commercial building blocks
- Propose synthetic routes for top 10 candidates
- Calculate estimated synthesis cost
Step 12: Generate comprehensive design report
- Interface analysis with hot spot identification
- Fragment screening results
- Top 10 designed molecules with predicted properties
- Docking poses and interaction diagrams
- Synthetic accessibility assessment
- Comparison to known PPI inhibitors
- Recommendations for experimental validation
- Publication-quality figures and PDF report
Expected Output:
- Interface characterization and hot spot analysis
- Ranked library of designed PPI inhibitors
- Predicted binding modes and affinities
- ADMET property predictions
- Synthetic accessibility assessment
- Comprehensive drug design report
```
---
## Chemical Safety & Toxicology
### Example 7: Predictive Toxicology Assessment
**Objective**: Assess potential toxicity and safety liabilities of drug candidates before synthesis.
**Skills Used**:
- `rdkit` - Molecular descriptors
- `medchem` - Toxicophore detection
- `deepchem` - Toxicity prediction
- `pytdc` - Therapeutics data commons
- `chembl-database` - Toxicity data
- `pubchem-database` - Bioassay data
- `drugbank-database` - Known drug toxicities
- `fda-database` - Adverse events
- `hmdb-database` - Metabolite prediction
- `scikit-learn` - Classification models
- `shap` - Model interpretability
- `clinical-reports` - Safety assessment reports
**Workflow**:
```bash
Step 1: Calculate molecular descriptors
- Load candidate molecules with RDKit
- Calculate physicochemical properties:
* MW, LogP, TPSA, rotatable bonds, H-bond donors/acceptors
* Aromatic rings, sp3 fraction, formal charge
- Calculate structural alerts:
* PAINS patterns
* Toxic functional groups (nitroaromatics, epoxides, etc.)
* Genotoxic alerts (Ames mutagenicity)
Step 2: Screen for known toxicophores
- Search for structural alerts using SMARTS patterns:
* Michael acceptors
* Aldehyde/ketone reactivity
* Quinones and quinone-like structures
* Thioureas and isocyanates
- Flag molecules with high-risk substructures
Step 3: Query ChEMBL for similar compounds with toxicity data
- Perform similarity search (Tanimoto > 0.7)
- Extract toxicity assay results:
* Cytotoxicity (IC50 values)
* Hepatotoxicity markers
* Cardiotoxicity (hERG inhibition)
* Genotoxicity (Ames test results)
- Analyze structure-toxicity relationships
Step 4: Search PubChem BioAssays for toxicity screening
- Query relevant assays:
* Tox21 panel (cell viability, stress response, genotoxicity)
* Liver toxicity assays
* hERG channel inhibition
- Extract activity data for similar compounds
- Calculate hit rates for concerning assays
Step 5: Train toxicity prediction models with DeepChem
- Load Tox21 dataset from DeepChem
- Train graph convolutional models for:
* Nuclear receptor signaling
* Stress response pathways
* Genotoxicity endpoints
- Validate models with cross-validation
- Predict toxicity for candidate molecules
Step 6: Predict hERG cardiotoxicity liability
- Train DeepChem model on hERG inhibition data from ChEMBL
- Predict IC50 for hERG channel
- Flag compounds with predicted IC50 < 10 μM
- Identify structural features associated with hERG liability
Step 7: Predict hepatotoxicity risk
- Train models on DILI (drug-induced liver injury) datasets
- Extract features: reactive metabolites, mitochondrial toxicity
- Predict hepatotoxicity risk class (low/medium/high)
- Use SHAP values to explain predictions
Step 8: Predict metabolic stability and metabolites
- Identify sites of metabolism using RDKit SMARTS patterns
- Predict CYP450 interactions
- Query HMDB for potential metabolite structures
- Assess if metabolites contain toxic substructures
- Predict metabolic stability (half-life)
Step 9: Check FDA adverse event database
- Query FAERS for approved drugs similar to candidates
- Extract common adverse events
- Identify target organ toxicities
- Calculate reporting odds ratios for serious events
Step 10: Literature review of toxicity mechanisms
- PubMed search: "[scaffold] AND (toxicity OR hepatotoxicity OR cardiotoxicity)"
- Identify mechanistic studies on similar compounds
- Note any case reports of adverse events
- Review preclinical and clinical safety data
Step 11: Assess ADME liabilities
- Predict solubility, permeability, plasma protein binding
- Identify potential drug-drug interaction risks
- Assess blood-brain barrier penetration (for CNS or non-CNS drugs)
- Evaluate metabolic stability
Step 12: Generate safety assessment report
- Executive summary of safety profile for each candidate
- Red flags: structural alerts, predicted toxicities
- Yellow flags: moderate concerns requiring testing
- Green light: acceptable predicted safety profile
- Comparison table of all candidates
- Recommendations for risk mitigation:
* Structural modifications to reduce toxicity
* Priority in vitro assays to run
* Preclinical study design recommendations
- Comprehensive PDF report with:
* Toxicophore analysis
* Prediction model results with confidence
* SHAP interpretation plots
* Literature evidence
* Risk assessment matrix
Expected Output:
- Toxicity predictions for all candidates
- Structural alert analysis
- hERG, hepatotoxicity, and genotoxicity risk scores
- Metabolite predictions
- Prioritized list with safety rankings
- Comprehensive toxicology assessment report
```
---
## Clinical Trial Analysis
### Example 8: Competitive Landscape Analysis for New Indication
**Objective**: Analyze the clinical trial landscape for a specific indication to inform development strategy.
**Skills Used**:
- `clinicaltrials-database` - Trial registry
- `fda-database` - Drug approvals
- `pubmed-database` - Published results
- `openalex-database` - Academic literature
- `drugbank-database` - Approved drugs
- `opentargets-database` - Target validation
- `polars` - Data manipulation
- `matplotlib` - Visualization
- `seaborn` - Statistical plots
- `plotly` - Interactive plots
- `clinical-reports` - Report generation
- `market-research-reports` - Competitive intelligence
**Workflow**:
```bash
Step 1: Search ClinicalTrials.gov for all trials in indication
- Query: "[disease/indication]"
- Filter: All phases, all statuses
- Extract fields:
* NCT ID, title, phase, status
* Start date, completion date, enrollment
* Intervention/drug names
* Primary/secondary outcomes
* Sponsor and collaborators
- Export to structured JSON/CSV
Step 2: Categorize trials by mechanism of action
- Extract drug names and intervention types
- Query DrugBank for mechanism of action
- Query Open Targets for target information
- Classify into categories:
* Small molecules vs biologics
* Target class (kinase inhibitor, antibody, etc.)
* Novel vs repurposing
Step 3: Analyze trial phase progression
- Calculate success rates by phase (I → II, II → III)
- Identify terminated trials and reasons for termination
- Track time from phase I start to NDA submission
- Calculate median development timelines
Step 4: Search FDA database for recent approvals
- Query FDA drug approvals in the indication (last 10 years)
- Extract approval dates, indications, priority review status
- Note any accelerated approvals or breakthroughs
- Review FDA drug labels for safety information
Step 5: Extract outcome measures
- Compile all primary endpoints used
- Identify most common endpoints:
* Survival (OS, PFS, DFS)
* Response rates (ORR, CR, PR)
* Biomarker endpoints
* Patient-reported outcomes
- Note emerging or novel endpoints
Step 6: Analyze competitive dynamics
- Identify leading companies and their pipelines
- Map trials by phase for each major competitor
- Note partnership and licensing deals
- Assess crowded vs underserved patient segments
Step 7: Search PubMed for published trial results
- Query: "[NCT ID]" for each completed trial
- Extract published outcomes and conclusions
- Identify trends in efficacy and safety
- Note any unmet needs highlighted in discussions
Step 8: Analyze target validation evidence
- Query Open Targets for target-disease associations
- Extract genetic evidence scores
- Review tractability assessments
- Compare targets being pursued across trials
Step 9: Identify unmet needs and opportunities
- Analyze trial failures for common patterns
- Identify patient populations excluded from trials
- Note resistance mechanisms or limitations mentioned
- Assess gaps in current therapeutic approaches
Step 10: Perform temporal trend analysis
- Plot trial starts over time (by phase, mechanism)
- Identify increasing or decreasing interest in targets
- Correlate with publication trends and scientific advances
- Predict future trends in the space
Step 11: Create comprehensive visualizations
- Timeline of all trials (Gantt chart style)
- Phase distribution pie chart
- Mechanism of action breakdown
- Geographic distribution of trials
- Enrollment trends over time
- Success rate funnels (Phase I → II → III → Approval)
- Sponsor/company market share
Step 12: Generate competitive intelligence report
- Executive summary of competitive landscape
- Total number of active programs by phase
- Key players and their development stage
- Standard of care and approved therapies
- Emerging approaches and novel targets
- Identified opportunities and white space
- Risk analysis (crowded targets, high failure rates)
- Strategic recommendations:
* Patient population to target
* Differentiation strategies
* Partnership opportunities
* Regulatory pathway considerations
- Export as professional PDF with citations and data tables using clinical-reports skill
Expected Output:
- Comprehensive trial database for indication
- Success rate and timeline statistics
- Competitive landscape mapping
- Unmet need analysis
- Strategic recommendations
- Publication-ready report with visualizations
```
---
## Metabolomics & Systems Biology
### Example 9: Multi-Omics Integration for Metabolic Disease
**Objective**: Integrate transcriptomics, proteomics, and metabolomics to identify dysregulated pathways in metabolic disease.
**Skills Used**:
- `pydeseq2` - RNA-seq analysis
- `pyopenms` - Mass spectrometry
- `matchms` - Mass spectra matching
- `hmdb-database` - Metabolite identification
- `metabolomics-workbench-database` - Public datasets
- `kegg-database` - Pathway mapping
- `reactome-database` - Pathway analysis
- `string-database` - Protein interactions
- `cobrapy` - Constraint-based metabolic modeling
- `statsmodels` - Multi-omics correlation
- `networkx` - Network analysis
- `pymc` - Bayesian modeling
- `plotly` - Interactive network visualization
**Workflow**:
```bash
Step 1: Process RNA-seq data
- Load gene count matrix
- Run differential expression with PyDESeq2
- Compare disease vs control (adjusted p < 0.05, |LFC| > 1)
- Extract gene symbols and fold changes
- Map to KEGG gene IDs
Step 2: Process proteomics data
- Load LC-MS/MS results with PyOpenMS
- Perform peptide identification and quantification
- Normalize protein abundances
- Run statistical testing (t-test or limma)
- Extract significant proteins (p < 0.05, |FC| > 1.5)
Step 3: Process metabolomics data
- Load untargeted metabolomics data (mzML format) with PyOpenMS
- Perform peak detection and alignment
- Match features to HMDB database by accurate mass
- Annotate metabolites with MS/MS fragmentation
- Extract putative identifications (Level 2/3)
- Perform statistical analysis (FDR < 0.05, |FC| > 2)
Step 4: Search Metabolomics Workbench for public data
- Query for same disease or tissue type
- Download relevant studies
- Reprocess for consistency with own data
- Use as validation cohort
Step 5: Map all features to KEGG pathways
- Map genes to KEGG orthology (KO) terms
- Map proteins to KEGG identifiers
- Map metabolites to KEGG compound IDs
- Identify pathways with multi-omics coverage
Step 6: Perform pathway enrichment analysis
- Test for enrichment in KEGG pathways
- Test for enrichment in Reactome pathways
- Apply Fisher's exact test with multiple testing correction
- Focus on pathways with hits in ≥2 omics layers
Step 7: Build protein-metabolite networks
- Query STRING for protein-protein interactions
- Map proteins to KEGG reactions
- Connect enzymes to their substrates/products
- Build integrated network with genes → proteins → metabolites
Step 8: Network topology analysis with NetworkX
- Calculate node centrality (degree, betweenness)
- Identify hub metabolites and key enzymes
- Find bottleneck reactions
- Detect network modules with community detection
- Identify dysregulated subnetworks
Step 9: Correlation analysis across omics layers
- Calculate Spearman correlations between:
* Gene expression and protein abundance
* Protein abundance and metabolite levels
* Gene expression and metabolites (for enzyme-product pairs)
- Use statsmodels for significance testing
- Focus on enzyme-metabolite pairs with expected relationships
Step 10: Bayesian network modeling with PyMC
- Build probabilistic graphical model of pathway
- Model causal relationships: gene → protein → metabolite
- Incorporate prior knowledge from KEGG/Reactome
- Perform inference to identify key regulatory nodes
- Estimate effect sizes and uncertainties
Step 11: Identify therapeutic targets
- Prioritize enzymes with:
* Significant changes in all three omics layers
* High network centrality
* Druggable target class (kinases, transporters, etc.)
- Query DrugBank for existing inhibitors
- Search PubMed for validation in disease models
Step 12: Create comprehensive multi-omics report
- Summary statistics for each omics layer
- Venn diagram of overlapping pathway hits
- Pathway enrichment dot plots
- Integrated network visualization (color by fold change)
- Correlation heatmaps (enzyme-metabolite pairs)
- Bayesian network structure
- Table of prioritized therapeutic targets
- Biological interpretation and mechanistic insights
- Generate publication-quality figures
- Export PDF report with all results
Expected Output:
- Integrated multi-omics dataset
- Dysregulated pathway identification
- Multi-omics network model
- Prioritized list of therapeutic targets
- Comprehensive systems biology report
```
---
## Materials Science & Chemistry
### Example 10: High-Throughput Materials Discovery for Battery Applications
**Objective**: Discover novel solid electrolyte materials for lithium-ion batteries using computational screening.
**Skills Used**:
- `pymatgen` - Materials analysis and feature engineering
- `scikit-learn` - Machine learning
- `pymoo` - Multi-objective optimization
- `sympy` - Symbolic math
- `vaex` - Large dataset handling
- `dask` - Parallel computing
- `matplotlib` - Visualization
- `plotly` - Interactive visualization
- `scientific-writing` - Report generation
- `scientific-visualization` - Publication figures
**Workflow**:
```bash
Step 1: Generate candidate materials library
- Use Pymatgen to enumerate compositions:
* Li-containing compounds (Li₁₋ₓM₁₊ₓX₂)
* M = transition metals (Zr, Ti, Ta, Nb)
* X = O, S, Se
- Generate ~10,000 candidate compositions
- Apply charge neutrality constraints
Step 2: Filter by thermodynamic stability
- Query Materials Project database via Pymatgen
- Calculate formation energy from elements
- Calculate energy above convex hull (E_hull)
- Filter: E_hull < 50 meV/atom (likely stable)
- Retain ~2,000 thermodynamically plausible compounds
Step 3: Predict crystal structures
- Use Pymatgen structure predictor
- Generate most likely crystal structures for each composition
- Consider common structure types: LISICON, NASICON, garnet, perovskite
- Calculate structural descriptors
Step 4: Calculate material properties with Pymatgen
- Lattice parameters and volume
- Density
- Packing fraction
- Ionic radii and bond lengths
- Coordination environments
Step 5: Feature engineering with Pymatgen
- Calculate compositional features using Pymatgen's featurizers:
* Elemental property statistics (electronegativity, ionic radius)
* Valence electron concentrations
* Stoichiometric attributes
- Calculate structural features:
* Pore size distribution
* Site disorder parameters
* Partial radial distribution functions
Step 6: Build ML models for Li⁺ conductivity prediction
- Collect training data from literature (experimental conductivities)
- Train ensemble models with scikit-learn:
* Random Forest
* Gradient Boosting
* Neural Network
- Use 5-fold cross-validation
- Predict ionic conductivity for all candidates
Step 7: Predict additional properties
- Electrochemical stability window (ML model)
- Mechanical properties (bulk modulus, shear modulus)
- Interfacial resistance (estimate from structure)
- Synthesis temperature (ML prediction from similar compounds)
Step 8: Multi-objective optimization with PyMOO
Define optimization objectives:
- Maximize: ionic conductivity (>10⁻³ S/cm target)
- Maximize: electrochemical window (>4.5V target)
- Minimize: synthesis temperature (<800°C preferred)
- Minimize: cost (based on elemental abundance)
Run NSGA-II to find Pareto optimal solutions
Extract top 50 candidates from Pareto front
Step 9: Analyze Pareto optimal materials
- Identify composition trends (which elements appear frequently)
- Analyze structure-property relationships
- Calculate trade-offs between objectives
- Identify "sweet spot" compositions
Step 10: Validate predictions with DFT calculations
- Select top 10 candidates for detailed study
- Set up DFT calculations using Pymatgen's interface
- Calculate:
* Accurate formation energies
* Li⁺ migration barriers (NEB calculations)
* Electronic band gap
* Elastic constants
- Compare DFT results with ML predictions
Step 11: Literature and patent search
- Search for prior art on top candidates
- PubMed and Google Scholar: "[composition] AND electrolyte"
- USPTO: Check for existing patents on similar compositions
- Identify any experimental reports on related materials
Step 12: Generate materials discovery report
- Summary of screening workflow and statistics
- Pareto front visualization (conductivity vs stability vs cost)
- Structure visualization of top candidates
- Property comparison table
- Composition-property trend analysis
- DFT validation results
- Predicted performance vs state-of-art materials
- Synthesis recommendations
- IP landscape summary
- Prioritized list of 5-10 materials for experimental validation
- Export as publication-ready PDF
Expected Output:
- Screened library of 10,000+ materials
- ML models for property prediction
- Pareto-optimal set of 50 candidates
- Detailed analysis of top 10 materials
- DFT validation results
- Comprehensive materials discovery report
```
---
## Digital Pathology
### Example 11: Automated Tumor Detection in Whole Slide Images
**Objective**: Develop and validate a deep learning model for automated tumor detection in histopathology images.
**Skills Used**:
- `histolab` - Whole slide image processing
- `pathml` - Computational pathology
- `pytorch-lightning` - Deep learning and image models
- `scikit-learn` - Model evaluation
- `pydicom` - DICOM handling
- `omero-integration` - Image management
- `matplotlib` - Visualization
- `plotly` - Interactive visualization
- `shap` - Model interpretability
- `clinical-reports` - Clinical validation reports
**Workflow**:
```bash
Step 1: Load whole slide images with HistoLab
- Load WSI files (SVS, TIFF formats)
- Extract slide metadata and magnification levels
- Visualize slide thumbnails
- Inspect tissue area vs background
Step 2: Tile extraction and preprocessing
- Use HistoLab to extract tiles (256×256 pixels at 20× magnification)
- Filter tiles:
* Remove background (tissue percentage > 80%)
* Apply color normalization (Macenko or Reinhard method)
* Filter out artifacts and bubbles
- Extract ~100,000 tiles per slide across all slides
Step 3: Create annotations (if training from scratch)
- Load pathologist annotations (if available via OMERO)
- Convert annotations to tile-level labels
- Categories: tumor, stroma, necrosis, normal
- Balance classes through stratified sampling
Step 4: Set up PathML pipeline
- Create PathML SlideData objects
- Define preprocessing pipeline:
* Stain normalization
* Color augmentation (HSV jitter)
* Rotation and flipping
- Split data: 70% train, 15% validation, 15% test
Step 5: Build deep learning model with PyTorch Lightning
- Architecture: ResNet50 or EfficientNet backbone
- Add custom classification head for tissue types
- Define training pipeline:
* Loss function: Cross-entropy or Focal loss
* Optimizer: Adam with learning rate scheduling
* Augmentations: rotation, flip, color jitter, elastic deformation
* Batch size: 32
* Mixed precision training
Step 6: Train model
- Train on tile-level labels
- Monitor metrics: accuracy, F1 score, AUC
- Use early stopping on validation loss
- Save best model checkpoint
- Training time: ~6-12 hours on GPU
Step 7: Evaluate model performance
- Test on held-out test set
- Calculate metrics with scikit-learn:
* Accuracy, precision, recall, F1 per class
* Confusion matrix
* ROC curves and AUC
- Compute confidence intervals with bootstrapping
Step 8: Slide-level aggregation
- Apply model to all tiles in each test slide
- Aggregate predictions:
* Majority voting
* Weighted average by confidence
* Spatial smoothing with convolution
- Generate probability heatmaps overlaid on WSI
Step 9: Model interpretability with SHAP
- Apply GradCAM or SHAP to explain predictions
- Visualize which regions contribute to tumor classification
- Generate attention maps showing model focus
- Validate that model attends to relevant histological features
Step 10: Clinical validation
- Compare model predictions with pathologist diagnosis
- Calculate inter-rater agreement (kappa score)
- Identify discordant cases for review
- Analyze error types: false positives, false negatives
Step 11: Integration with OMERO
- Upload processed slides and heatmaps to OMERO server
- Attach model predictions as slide metadata
- Enable pathologist review interface
- Store annotations and corrections for model retraining
Step 12: Generate clinical validation report
- Model architecture and training details
- Performance metrics with confidence intervals
- Slide-level accuracy vs pathologist ground truth
- Heatmap visualizations for representative cases
- Analysis of failure modes
- Comparison with published methods
- Discussion of clinical applicability
- Recommendations for deployment and monitoring
- Export PDF report for regulatory submission (if needed)
Expected Output:
- Trained deep learning model for tumor detection
- Tile-level and slide-level predictions
- Probability heatmaps for visualization
- Performance metrics and validation results
- Model interpretation visualizations
- Clinical validation report
```
---
## Lab Automation & Protocol Design
### Example 12: Automated High-Throughput Screening Protocol
**Objective**: Design and execute an automated compound screening workflow using liquid handling robots.
**Skills Used**:
- `pylabrobot` - Lab automation
- `opentrons-integration` - Opentrons protocol
- `benchling-integration` - Sample tracking
- `labarchive-integration` - Electronic lab notebook
- `protocolsio-integration` - Protocol documentation
- `simpy` - Process simulation
- `polars` - Data processing
- `matplotlib` - Plate visualization
- `plotly` - Interactive plate heatmaps
- `rdkit` - PAINS filtering for hits
- `clinical-reports` - Screening report generation
**Workflow**:
```bash
Step 1: Define screening campaign in Benchling
- Create compound library in Benchling registry
- Register all compounds with structure, concentration, location
- Define plate layouts (384-well format)
- Track compound source plates in inventory
- Set up ELN entry for campaign documentation
Step 2: Design assay protocol
- Define assay steps:
* Dispense cells (5000 cells/well)
* Add compounds (dose-response curve, 10 concentrations)
* Incubate 48 hours at 37°C
* Add detection reagent (cell viability assay)
* Read luminescence signal
- Calculate required reagent volumes
- Document protocol in Protocols.io
- Share with team for review
Step 3: Simulate workflow with SimPy
- Model liquid handler, incubator, plate reader as resources
- Simulate timing for 20 plates (7,680 wells)
- Identify bottlenecks (plate reader reads take 5 min/plate)
- Optimize scheduling: stagger plate processing
- Validate that throughput goal is achievable (20 plates/day)
Step 4: Design plate layout
- Use PyLabRobot to generate plate maps:
* Columns 1-2: positive controls (DMSO)
* Columns 3-22: compound titrations (10 concentrations in duplicate)
* Columns 23-24: negative controls (cytotoxic control)
- Randomize compound positions across plates
- Account for edge effects (avoid outer wells for samples)
- Export plate maps to CSV
Step 5: Create Opentrons protocol for cell seeding
- Write Python protocol using Opentrons API 2.0
- Steps:
* Aspirate cells from reservoir
* Dispense 40 μL cell suspension per well
* Tips: use P300 multi-channel for speed
* Include mixing steps to prevent settling
- Simulate protocol in Opentrons app
- Test on one plate before full run
Step 6: Create Opentrons protocol for compound addition
- Acoustic liquid handler (Echo) or pin tool for nanoliter transfers
- If using Opentrons:
* Source: 384-well compound plates
* Transfer 100 nL compound (in DMSO) to assay plates
* Use P20 for precision
* Prepare serial dilutions on deck if needed
- Account for DMSO normalization (1% final)
Step 7: Integrate with Benchling for sample tracking
- Use Benchling API to:
* Retrieve compound information (structure, batch, concentration)
* Log plate creation in inventory
* Create transfer records for audit trail
* Link assay plates to ELN entry
Step 8: Execute automated workflow
- Day 1: Seed cells with Opentrons
- Day 1 (4h later): Add compounds with Opentrons
- Day 3: Add detection reagent (manual or automated)
- Day 3 (2h later): Read plates on plate reader
- Store plates at 4°C between steps
Step 9: Collect and process data
- Export raw luminescence data from plate reader
- Load data with Polars for fast processing
- Normalize data:
* Subtract background (media-only wells)
* Calculate % viability relative to DMSO control
* Apply plate-wise normalization to correct systematic effects
- Quality control:
* Z' factor calculation (> 0.5 for acceptable assay)
* Coefficient of variation for controls (< 10%)
* Flag plates with poor QC metrics
Step 10: Dose-response curve fitting
- Fit 4-parameter logistic curves for each compound
- Calculate IC50, Hill slope, max/min response
- Use scikit-learn or scipy for curve fitting
- Compute 95% confidence intervals
- Flag compounds with poor curve fits (R² < 0.8)
Step 11: Hit identification and triage
- Define hit criteria:
* IC50 < 10 μM
* Max inhibition > 50%
* Curve quality: R² > 0.8
- Prioritize hits by potency
- Check for PAINS patterns with RDKit
- Cross-reference with known aggregators/frequent hitters
Step 12: Visualize results and generate report
- Create plate heatmaps showing % viability
- Dose-response curve plots for hits
- Scatter plot: potency vs max effect
- QC metric summary across plates
- Structure visualization of top 20 hits
- Generate campaign summary report:
* Screening statistics (compounds tested, hit rate)
* QC metrics and data quality assessment
* Hit list with structures and IC50 values
* Protocol documentation from Protocols.io
* Raw data files and analysis code
* Recommendations for confirmation assays
- Update Benchling ELN with results
- Export PDF report for stakeholders
Expected Output:
- Automated screening protocols (Opentrons Python files)
- Executed screen of 384-well plates
- Quality-controlled dose-response data
- Hit list with IC50 values
- Comprehensive screening report
```
---
## Agricultural Genomics
### Example 13: GWAS for Crop Yield Improvement
**Objective**: Identify genetic markers associated with drought tolerance and yield in a crop species.
**Skills Used**:
- `biopython` - Sequence analysis
- `pysam` - VCF processing
- `gwas-database` - Public GWAS data
- `ensembl-database` - Plant genomics
- `gene-database` - Gene annotation
- `gget` - Gene data retrieval
- `scanpy` - Population structure analysis
- `scikit-learn` - PCA and clustering
- `statsmodels` - Association testing
- `statistical-analysis` - Hypothesis testing
- `matplotlib` - Manhattan plots
- `seaborn` - Visualization
- `plotly` - Interactive visualizations
**Workflow**:
```bash
Step 1: Load and QC genotype data
- Load VCF file with pysam
- Filter variants:
* Call rate > 95%
* Minor allele frequency (MAF) > 5%
* Hardy-Weinberg equilibrium p > 1e-6
- Convert to numeric genotype matrix (0, 1, 2)
- Retain ~500,000 SNPs after QC
Step 2: Assess population structure
- Calculate genetic relationship matrix
- Perform PCA with scikit-learn (use top 10 PCs)
- Visualize population structure (PC1 vs PC2)
- Identify distinct subpopulations or admixture
- Note: will use PCs as covariates in GWAS
Step 3: Load and process phenotype data
- Drought tolerance score (1-10 scale, measured under stress)
- Grain yield (kg/hectare)
- Days to flowering
- Plant height
- Quality control:
* Remove outliers (> 3 SD from mean)
* Transform if needed (log or rank-based for skewed traits)
* Adjust for environmental covariates (field, year)
Step 4: Calculate kinship matrix
- Compute genetic relatedness matrix
- Account for population structure and relatedness
- Will use in mixed linear model to control for confounding
Step 5: Run genome-wide association study
- For each phenotype, test association with each SNP
- Use mixed linear model (MLM) in statsmodels:
* Fixed effects: SNP genotype, PCs (top 10)
* Random effects: kinship matrix
* Bonferroni threshold: p < 5e-8 (genome-wide significance)
- Multiple testing correction: Bonferroni or FDR
- Calculate genomic inflation factor (λ) to check for inflation
Step 6: Identify significant associations
- Extract SNPs passing significance threshold
- Determine lead SNPs (most significant in each locus)
- Define loci: extend ±500 kb around lead SNP
- Identify independent associations via conditional analysis
Step 7: Annotate significant loci
- Map SNPs to genes using Ensembl Plants API
- Identify genic vs intergenic SNPs
- For genic SNPs:
* Determine consequence (missense, synonymous, intronic, UTR)
* Extract gene names and descriptions
- Query NCBI Gene for gene function
- Prioritize genes with known roles in stress response or development
Step 8: Search GWAS Catalog for prior reports
- Query GWAS Catalog for similar traits in same or related species
- Check for replication of known loci
- Identify novel vs known associations
Step 9: Functional enrichment analysis
- Extract all genes within significant loci
- Perform GO enrichment analysis
- Test for enrichment in KEGG pathways
- Focus on pathways related to:
* Drought stress response (ABA signaling, osmotic adjustment)
* Photosynthesis and carbon fixation
* Root development
Step 10: Estimate SNP heritability and genetic architecture
- Calculate variance explained by significant SNPs
- Estimate SNP-based heritability (proportion of variance explained)
- Assess genetic architecture: few large-effect vs many small-effect loci
Step 11: Build genomic prediction model
- Train genomic selection model with scikit-learn:
* Ridge regression (GBLUP equivalent)
* Elastic net
* Random Forest
- Use all SNPs (not just significant ones)
- Cross-validate to predict breeding values
- Assess prediction accuracy
Step 12: Generate GWAS report
- Manhattan plots for each trait
- QQ plots to assess test calibration
- Regional association plots for significant loci
- Gene models overlaid on loci
- Table of significant SNPs with annotations
- Functional enrichment results
- Genomic prediction accuracy
- Biological interpretation:
* Candidate genes for drought tolerance
* Potential molecular mechanisms
* Implications for breeding programs
- Recommendations:
* SNPs to use for marker-assisted selection
* Genes for functional validation
* Crosses to generate mapping populations
- Export publication-quality PDF with all results
Expected Output:
- Significant SNP-trait associations
- Annotated candidate genes
- Functional enrichment analysis
- Genomic prediction models
- Comprehensive GWAS report
- Recommendations for breeding programs
```
---
## Neuroscience & Brain Imaging
### Example 14: Brain Connectivity Analysis from fMRI Data
**Objective**: Analyze resting-state fMRI data to identify altered brain connectivity patterns in disease.
**Skills Used**:
- `neurokit2` - Neurophysiological signal processing
- `neuropixels-analysis` - Neural data analysis
- `scikit-learn` - Classification and clustering
- `networkx` - Graph theory analysis
- `statsmodels` - Statistical testing
- `statistical-analysis` - Hypothesis testing
- `torch_geometric` - Graph neural networks
- `pymc` - Bayesian modeling
- `matplotlib` - Brain visualization
- `seaborn` - Connectivity matrices
- `plotly` - Interactive brain networks
**Workflow**:
```bash
Step 1: Load and preprocess fMRI data
# Note: Use nilearn or similar for fMRI-specific preprocessing
- Load 4D fMRI images (BOLD signal)
- Preprocessing:
* Motion correction (realignment)
* Slice timing correction
* Spatial normalization to MNI space
* Smoothing (6mm FWHM Gaussian kernel)
* Temporal filtering (0.01-0.1 Hz bandpass)
* Nuisance regression (motion, CSF, white matter)
Step 2: Define brain regions (parcellation)
- Apply brain atlas (e.g., AAL, Schaefer 200-region atlas)
- Extract average time series for each region
- Result: 200 time series per subject (one per brain region)
Step 3: Signal cleaning with NeuroKit2
- Denoise time series
- Remove physiological artifacts
- Apply additional bandpass filtering if needed
- Identify and handle outlier time points
Step 4: Calculate functional connectivity
- Compute pairwise Pearson correlations between all regions
- Result: 200×200 connectivity matrix per subject
- Fisher z-transform correlations for group statistics
- Threshold weak connections (|r| < 0.2)
Step 5: Graph theory analysis with NetworkX
- Convert connectivity matrices to graphs
- Calculate global network metrics:
* Clustering coefficient (local connectivity)
* Path length (integration)
* Small-worldness (balance of segregation and integration)
* Modularity (community structure)
- Calculate node-level metrics:
* Degree centrality
* Betweenness centrality
* Eigenvector centrality
* Participation coefficient (inter-module connectivity)
Step 6: Statistical comparison between groups
- Compare patients vs healthy controls
- Use statsmodels for group comparisons:
* Paired or unpaired t-tests for connectivity edges
* FDR correction for multiple comparisons across all edges
* Identify edges with significantly different connectivity
- Compare global and node-level network metrics
- Calculate effect sizes (Cohen's d)
Step 7: Identify altered subnetworks
- Threshold statistical maps (FDR < 0.05)
- Identify clusters of altered connectivity
- Map to functional brain networks:
* Default mode network (DMN)
* Salience network (SN)
* Central executive network (CEN)
* Sensorimotor network
- Visualize altered connections on brain surfaces
Step 8: Machine learning classification
- Train classifier to distinguish patients from controls
- Use scikit-learn Random Forest or SVM
- Features: connectivity values or network metrics
- Cross-validation (10-fold)
- Calculate accuracy, sensitivity, specificity, AUC
- Identify most discriminative features (connectivity edges)
Step 9: Graph neural network analysis with Torch Geometric
- Build graph neural network (GCN or GAT)
- Input: connectivity matrices as adjacency matrices
- Train to predict diagnosis
- Extract learned representations
- Visualize latent space (UMAP)
- Interpret which brain regions are most important
Step 10: Bayesian network modeling with PyMC
- Build directed graphical model of brain networks
- Estimate effective connectivity (directional influence)
- Incorporate prior knowledge about anatomical connections
- Perform posterior inference
- Identify key driver regions in disease
Step 11: Clinical correlation analysis
- Correlate network metrics with clinical scores:
* Symptom severity
* Cognitive performance
* Treatment response
- Use Spearman or Pearson correlation
- Identify brain-behavior relationships
Step 12: Generate comprehensive neuroimaging report
- Brain connectivity matrices (patients vs controls)
- Statistical comparison maps on brain surface
- Network metric comparison bar plots
- Graph visualizations (circular or force-directed layout)
- Machine learning ROC curves
- Brain-behavior correlation plots
- Clinical interpretation:
* Which networks are disrupted?
* Relationship to symptoms
* Potential biomarker utility
- Recommendations:
* Brain regions for therapeutic targeting (TMS, DBS)
* Network metrics as treatment response predictors
- Export publication-ready PDF with brain visualizations
Expected Output:
- Functional connectivity matrices for all subjects
- Statistical maps of altered connectivity
- Graph theory metrics
- Machine learning classification model
- Brain-behavior correlations
- Comprehensive neuroimaging report
```
---
## Environmental Microbiology
### Example 15: Metagenomic Analysis of Environmental Samples
**Objective**: Characterize microbial community composition and functional potential from environmental DNA samples.
**Skills Used**:
- `biopython` - Sequence processing
- `pysam` - BAM file handling
- `ena-database` - Sequence data
- `geo-database` - Public datasets
- `uniprot-database` - Protein annotation
- `kegg-database` - Pathway analysis
- `etetoolkit` - Phylogenetic trees
- `scikit-bio` - Microbial ecology
- `networkx` - Co-occurrence networks
- `statsmodels` - Diversity statistics
- `statistical-analysis` - Hypothesis testing
- `matplotlib` - Visualization
- `plotly` - Interactive plots
**Workflow**:
```bash
Step 1: Load and QC metagenomic reads
- Load FASTQ files with BioPython
- Quality control with FastQC-equivalent:
* Remove adapters and low-quality bases (Q < 20)
* Filter short reads (< 50 bp)
* Remove host contamination (if applicable)
- Subsample to even depth if comparing samples
Step 2: Taxonomic classification
- Use Kraken2-like approach or query ENA database
- Classify reads to taxonomic lineages
- Generate abundance table:
* Rows: taxa (species or OTUs)
* Columns: samples
* Values: read counts or relative abundance
- Summarize at different levels: phylum, class, order, family, genus, species
Step 3: Calculate diversity metrics with scikit-bio
- Alpha diversity (within-sample):
* Richness (number of species)
* Shannon entropy
* Simpson diversity
* Chao1 estimated richness
- Beta diversity (between-sample):
* Bray-Curtis dissimilarity
* Weighted/unweighted UniFrac distance
* Jaccard distance
- Rarefaction curves to assess sampling completeness
Step 4: Statistical comparison of communities
- Compare diversity between groups (e.g., polluted vs pristine)
- Use statsmodels for:
* Mann-Whitney or Kruskal-Wallis tests (alpha diversity)
* PERMANOVA for beta diversity (adonis test)
* LEfSe for differential abundance testing
- Identify taxa enriched or depleted in each condition
Step 5: Build phylogenetic tree with ETE Toolkit
- Extract 16S rRNA sequences (or marker genes)
- Align sequences (MUSCLE/MAFFT equivalent)
- Build phylogenetic tree (neighbor-joining or maximum likelihood)
- Visualize tree colored by sample or environment
- Root tree with outgroup
Step 6: Co-occurrence network analysis
- Calculate pairwise correlations between taxa
- Use Spearman correlation to identify co-occurrence patterns
- Filter significant correlations (p < 0.01, |r| > 0.6)
- Build co-occurrence network with NetworkX
- Identify modules (communities of co-occurring taxa)
- Calculate network topology metrics
- Visualize network (nodes = taxa, edges = correlations)
Step 7: Functional annotation
- Assemble contigs from reads (if performing assembly)
- Predict genes with Prodigal-like tools
- Annotate genes using UniProt and KEGG
- Map proteins to KEGG pathways
- Generate functional profile:
* Abundance of metabolic pathways
* Key enzymes (nitrification, denitrification, methanogenesis)
* Antibiotic resistance genes
* Virulence factors
Step 8: Functional diversity analysis
- Compare functional profiles between samples
- Calculate pathway richness and evenness
- Identify enriched pathways with statistical testing
- Link taxonomy to function:
* Which taxa contribute to which functions?
* Use shotgun data to assign functions to taxa
Step 9: Search ENA for related environmental samples
- Query ENA for metagenomic studies from similar environments
- Download and compare to own samples
- Place samples in context of global microbiome diversity
- Identify unique vs ubiquitous taxa
Step 10: Environmental parameter correlation
- Correlate community composition with metadata:
* Temperature, pH, salinity
* Nutrient concentrations (N, P)
* Pollutant levels (heavy metals, hydrocarbons)
- Use Mantel test to correlate distance matrices
- Identify environmental drivers of community structure
Step 11: Biomarker discovery
- Identify taxa or pathways that correlate with environmental condition
- Use Random Forest to find predictive features
- Validate biomarkers:
* Sensitivity and specificity
* Cross-validation across samples
- Propose taxa as bioindicators of environmental health
Step 12: Generate environmental microbiome report
- Taxonomic composition bar charts (stacked by phylum/class)
- Alpha and beta diversity plots (boxplots, PCoA)
- Phylogenetic tree with environmental context
- Co-occurrence network visualization
- Functional pathway heatmaps
- Environmental correlation plots
- Statistical comparison tables
- Biological interpretation:
* Dominant taxa and their ecological roles
* Functional potential of the community
* Environmental factors shaping the microbiome
* Biomarker taxa for monitoring
- Recommendations:
* Biomarkers for environmental monitoring
* Functional guilds for restoration
* Further sampling or sequencing strategies
- Export comprehensive PDF report
Expected Output:
- Taxonomic profiles for all samples
- Diversity metrics and statistical comparisons
- Phylogenetic tree
- Co-occurrence network
- Functional annotation and pathway analysis
- Comprehensive microbiome report
```
---
## Infectious Disease Research
### Example 16: Antimicrobial Resistance Surveillance and Prediction
**Objective**: Track antimicrobial resistance trends and predict resistance phenotypes from genomic data.
**Skills Used**:
- `biopython` - Sequence analysis
- `pysam` - Genome assembly analysis
- `ena-database` - Public genomic data
- `uniprot-database` - Resistance protein annotation
- `gene-database` - Resistance gene catalogs
- `etetoolkit` - Phylogenetic analysis
- `scikit-learn` - Resistance prediction
- `networkx` - Transmission networks
- `statsmodels` - Trend analysis
- `statistical-analysis` - Hypothesis testing
- `matplotlib` - Epidemiological plots
- `plotly` - Interactive dashboards
- `clinical-reports` - Surveillance reports
**Workflow**:
```bash
Step 1: Collect bacterial genome sequences
- Isolates from hospital surveillance program
- Load FASTA assemblies with BioPython
- Basic QC:
* Assess assembly quality (N50, completeness)
* Estimate genome size and coverage
* Remove contaminated assemblies
Step 2: Species identification and MLST typing
- Perform in silico MLST (multi-locus sequence typing)
- Extract housekeeping gene sequences
- Assign sequence types (ST)
- Classify isolates into clonal complexes
- Identify high-risk clones (e.g., ST131 E. coli, ST258 K. pneumoniae)
Step 3: Antimicrobial resistance (AMR) gene detection
- Query NCBI Gene and UniProt for AMR gene databases
- Screen assemblies for resistance genes:
* Beta-lactamases (blaTEM, blaCTX-M, blaKPC, blaNDM)
* Aminoglycoside resistance (aac, aph, ant)
* Fluoroquinolone resistance (gyrA, parC mutations)
* Colistin resistance (mcr-1 to mcr-10)
* Efflux pumps
- Calculate gene presence/absence matrix
Step 4: Resistance mechanism annotation
- Map detected genes to resistance classes:
* Enzymatic modification (e.g., beta-lactamases)
* Target modification (e.g., ribosomal methylation)
* Target mutation (e.g., fluoroquinolone resistance)
* Efflux pumps
- Query UniProt for detailed mechanism descriptions
- Link genes to antibiotic classes affected
Step 5: Build phylogenetic tree with ETE Toolkit
- Extract core genome SNPs
- Concatenate SNP alignments
- Build maximum likelihood tree
- Root with outgroup or midpoint rooting
- Annotate tree with:
* Resistance profiles
* Sequence types
* Collection date and location
Step 6: Genotype-phenotype correlation
- Match genomic data with phenotypic susceptibility testing
- For each antibiotic, correlate:
* Presence of resistance genes with MIC values
* Target mutations with resistance phenotype
- Calculate sensitivity/specificity of genetic markers
- Identify discordant cases (false positives/negatives)
Step 7: Machine learning resistance prediction
- Train classification models with scikit-learn:
* Features: presence/absence of resistance genes + mutations
* Target: resistance phenotype (susceptible/intermediate/resistant)
* Models: Logistic Regression, Random Forest, Gradient Boosting
- Train separate models for each antibiotic
- Cross-validate (stratified 5-fold)
- Calculate accuracy, precision, recall, F1 score
- Feature importance: which genes are most predictive?
Step 8: Temporal trend analysis
- Track resistance rates over time
- Use statsmodels for:
* Mann-Kendall trend test
* Joinpoint regression (identify change points)
* Forecast future resistance rates (ARIMA)
- Analyze trends for each antibiotic class
- Identify emerging resistance mechanisms
Step 9: Transmission network inference
- Identify closely related isolates (< 10 SNPs difference)
- Build transmission network with NetworkX:
* Nodes: isolates
* Edges: putative transmission links
- Incorporate temporal and spatial data
- Identify outbreak clusters
- Detect super-spreaders (high degree nodes)
- Analyze network topology
Step 10: Search ENA for global context
- Query ENA for same species from other regions/countries
- Download representative genomes
- Integrate into phylogenetic analysis
- Assess whether local isolates are globally distributed clones
- Identify region-specific vs international resistance genes
Step 11: Plasmid and mobile element analysis
- Identify plasmid contigs
- Detect insertion sequences and transposons
- Track mobile genetic elements carrying resistance genes
- Identify conjugative plasmids facilitating horizontal gene transfer
- Build plasmid similarity networks
Step 12: Generate AMR surveillance report
- Summary statistics:
* Number of isolates by species, ST, location
* Resistance rates for each antibiotic
- Phylogenetic tree annotated with resistance profiles
- Temporal trend plots (resistance % over time)
- Transmission network visualizations
- Prediction model performance metrics
- Heatmap: resistance genes by isolate
- Geographic distribution map (if spatial data available)
- Interpretation:
* Predominant resistance mechanisms
* High-risk clones circulating
* Temporal trends and emerging threats
* Transmission clusters and outbreaks
- Recommendations:
* Infection control measures for clusters
* Antibiotic stewardship priorities
* Resistance genes to monitor
* Laboratories to perform confirmatory testing
- Export comprehensive PDF for public health reporting
Expected Output:
- AMR gene profiles for all isolates
- Phylogenetic tree with resistance annotations
- Temporal trends in resistance rates
- ML models for resistance prediction from genomes
- Transmission networks
- Comprehensive AMR surveillance report for public health
```
---
## Multi-Omics Integration
### Example 17: Integrative Analysis of Cancer Multi-Omics Data
**Objective**: Integrate genomics, transcriptomics, proteomics, and clinical data to identify cancer subtypes and therapeutic strategies.
**Skills Used**:
- `pydeseq2` - RNA-seq DE analysis
- `pysam` - Variant calling
- `ensembl-database` - Gene annotation
- `gget` - Gene data retrieval
- `cosmic-database` - Cancer mutations
- `string-database` - Protein interactions
- `reactome-database` - Pathway analysis
- `opentargets-database` - Drug targets
- `scikit-learn` - Clustering and classification
- `torch_geometric` - Graph neural networks
- `umap-learn` - Dimensionality reduction
- `scikit-survival` - Survival analysis
- `statsmodels` - Statistical modeling
- `pymoo` - Multi-objective optimization
- `pyhealth` - Healthcare ML models
- `clinical-reports` - Integrative genomics report
**Workflow**:
```bash
Step 1: Load and preprocess genomic data (WES/WGS)
- Parse VCF files with pysam
- Filter high-quality variants (QUAL > 30, DP > 20)
- Annotate with Ensembl VEP (missense, nonsense, frameshift)
- Query COSMIC for known cancer mutations
- Create mutation matrix: samples × genes (binary: mutated or not)
- Focus on cancer genes from COSMIC Cancer Gene Census
Step 2: Process transcriptomic data (RNA-seq)
- Load gene count matrix
- Run differential expression with PyDESeq2
- Compare tumor vs normal (if paired samples available)
- Normalize counts (TPM or FPKM)
- Identify highly variable genes
- Create expression matrix: samples × genes (log2 TPM)
Step 3: Load proteomic data (Mass spec)
- Protein abundance matrix from LC-MS/MS
- Normalize protein abundances (median normalization)
- Log2-transform
- Filter proteins detected in < 50% of samples
- Create protein matrix: samples × proteins
Step 4: Load clinical data
- Demographics: age, sex, race
- Tumor characteristics: stage, grade, histology
- Treatment: surgery, chemo, radiation, targeted therapy
- Outcome: overall survival (OS), progression-free survival (PFS)
- Response: complete/partial response, stable/progressive disease
Step 5: Data integration and harmonization
- Match sample IDs across omics layers
- Ensure consistent gene/protein identifiers
- Handle missing data:
* Impute with KNN or median (for moderate missingness)
* Remove features with > 50% missing
- Create multi-omics data structure (dictionary of matrices)
Step 6: Multi-omics dimensionality reduction
- Concatenate all omics features (genes + proteins + mutations)
- Apply UMAP with umap-learn for visualization
- Alternative: PCA or t-SNE
- Visualize samples in 2D space colored by:
* Histological subtype
* Stage
* Survival (high vs low)
- Identify patterns or clusters
Step 7: Unsupervised clustering to identify subtypes
- Perform consensus clustering with scikit-learn
- Test k = 2 to 10 clusters
- Evaluate cluster stability and optimal k
- Assign samples to clusters (subtypes)
- Visualize clustering in UMAP space
Step 8: Characterize molecular subtypes
For each subtype:
- Differential expression analysis:
* Compare subtype vs all others with PyDESeq2
* Extract top differentially expressed genes and proteins
- Mutation enrichment:
* Fisher's exact test for each gene
* Identify subtype-specific mutations
- Pathway enrichment:
* Query Reactome for enriched pathways
* Query KEGG for metabolic pathway differences
* Identify hallmark biological processes
Step 9: Build protein-protein interaction networks
- Query STRING database for interactions among:
* Differentially expressed proteins
* Products of mutated genes
- Construct PPI network with NetworkX
- Identify network modules (community detection)
- Calculate centrality metrics to find hub proteins
- Overlay fold changes on network for visualization
Step 10: Survival analysis by subtype
- Use statsmodels or lifelines for survival analysis
- Kaplan-Meier curves for each subtype
- Log-rank test for significance
- Cox proportional hazards model:
* Covariates: subtype, stage, age, treatment
* Estimate hazard ratios
- Identify prognostic subtypes
Step 11: Predict therapeutic response
- Train machine learning models with scikit-learn:
* Features: multi-omics data
* Target: response to specific therapy (responder/non-responder)
* Models: Random Forest, XGBoost, SVM
- Cross-validation to assess performance
- Identify features predictive of response
- Calculate AUC and feature importance
Step 12: Graph neural network for integrated prediction
- Build heterogeneous graph with Torch Geometric:
* Nodes: samples, genes, proteins, pathways
* Edges: gene-protein, protein-protein, gene-pathway
* Node features: expression, mutation status
- Train GNN to predict:
* Subtype classification
* Survival risk
* Treatment response
- Extract learned embeddings for interpretation
Step 13: Identify therapeutic targets with Open Targets
- For each subtype, query Open Targets:
* Input: upregulated genes/proteins
* Extract target-disease associations
* Prioritize by tractability score
- Search for FDA-approved drugs targeting identified proteins
- Identify clinical trials for relevant targets
- Propose subtype-specific therapeutic strategies
Step 14: Multi-objective optimization of treatment strategies
- Use PyMOO to optimize treatment selection:
* Objectives:
1. Maximize predicted response probability
2. Minimize predicted toxicity
3. Minimize cost
* Constraints: patient eligibility, drug availability
- Generate Pareto-optimal treatment strategies
- Personalized treatment recommendations per patient
Step 15: Generate comprehensive multi-omics report
- Sample clustering and subtype assignments
- UMAP visualization colored by subtype, survival, mutations
- Subtype characterization:
* Molecular signatures (genes, proteins, mutations)
* Enriched pathways
* PPI networks
- Kaplan-Meier survival curves by subtype
- ML model performance (AUC, confusion matrices)
- Feature importance plots
- Therapeutic target tables with supporting evidence
- Personalized treatment recommendations
- Clinical implications:
* Prognostic biomarkers
* Predictive biomarkers for therapy selection
* Novel drug targets
- Export publication-quality PDF with all figures and tables
Expected Output:
- Integrated multi-omics dataset
- Cancer subtype classification
- Molecular characterization of subtypes
- Survival analysis and prognostic markers
- Predictive models for treatment response
- Therapeutic target identification
- Personalized treatment strategies
- Comprehensive integrative genomics report
```
---
## Experimental Physics & Data Analysis
### Example 18: Analysis of Particle Physics Detector Data
**Objective**: Analyze experimental data from particle detector to identify signal events and measure physical constants.
**Skills Used**:
- `astropy` - Units and constants
- `sympy` - Symbolic mathematics
- `statistical-analysis` - Statistical analysis
- `scikit-learn` - Classification
- `stable-baselines3` - Reinforcement learning for optimization
- `matplotlib` - Visualization
- `seaborn` - Statistical plots
- `statsmodels` - Hypothesis testing
- `dask` - Large-scale data processing
- `vaex` - Out-of-core dataframes
- `plotly` - Interactive visualization
**Workflow**:
```bash
Step 1: Load and inspect detector data
- Load ROOT files or HDF5 with raw detector signals
- Use Vaex for out-of-core processing (TBs of data)
- Inspect data structure: event IDs, timestamps, detector channels
- Extract key observables:
* Energy deposits in calorimeters
* Particle trajectories from tracking detectors
* Time-of-flight measurements
* Trigger information
Step 2: Apply detector calibration and corrections
- Load calibration constants
- Apply energy calibrations to convert ADC to physical units
- Correct for detector efficiency variations
- Apply geometric corrections (alignment)
- Use Astropy units for unit conversions (eV, GeV, MeV)
- Account for dead time and detector acceptance
Step 3: Event reconstruction
- Cluster energy deposits to form particle candidates
- Reconstruct particle trajectories (tracks)
- Match tracks to calorimeter clusters
- Calculate invariant masses for particle identification
- Compute momentum and energy for each particle
- Use Dask for parallel processing across events
Step 4: Event selection and filtering
- Define signal region based on physics hypothesis
- Apply quality cuts:
* Track quality (chi-squared, number of hits)
* Fiducial volume cuts
* Timing cuts (beam window)
* Particle identification cuts
- Estimate trigger efficiency
- Calculate event weights for corrections
Step 5: Background estimation
- Identify background sources:
* Cosmic rays
* Beam-related backgrounds
* Detector noise
* Physics backgrounds (non-signal processes)
- Simulate backgrounds using Monte Carlo (if available)
- Estimate background from data in control regions
- Use sideband subtraction method
Step 6: Signal extraction
- Fit invariant mass distributions to extract signal
- Use scipy for likelihood fitting:
* Signal model: Gaussian or Breit-Wigner
* Background model: polynomial or exponential
* Combined fit with maximum likelihood
- Calculate signal significance (S/√B or Z-score)
- Estimate systematic uncertainties
Step 7: Machine learning event classification
- Train classifier with scikit-learn to separate signal from background
- Features: kinematic variables, topology, detector response
- Models: Boosted Decision Trees (XGBoost), Neural Networks
- Cross-validate with k-fold CV
- Optimize selection criteria using ROC curves
- Calculate signal efficiency and background rejection
Step 8: Reinforcement learning for trigger optimization
- Use Stable-Baselines3 to optimize trigger thresholds
- Environment: detector simulator
- Action: adjust trigger thresholds
- Reward: maximize signal efficiency while controlling rate
- Train PPO or SAC agent
- Validate on real data
Step 9: Calculate physical observables
- Measure cross-sections:
* σ = N_signal / (ε × L × BR)
* N_signal: number of signal events
* ε: detection efficiency
* L: integrated luminosity
* BR: branching ratio
- Use Sympy for symbolic error propagation
- Calculate with Astropy for proper unit handling
Step 10: Statistical analysis and hypothesis testing
- Perform hypothesis tests with statsmodels:
* Likelihood ratio test for signal vs background-only
* Calculate p-values and significance levels
* Set confidence limits (CLs method)
- Bayesian analysis for parameter estimation
- Calculate confidence intervals and error bands
Step 11: Systematic uncertainty evaluation
- Identify sources of systematic uncertainty:
* Detector calibration uncertainties
* Background estimation uncertainties
* Theoretical uncertainties (cross-sections, PDFs)
* Monte Carlo modeling uncertainties
- Propagate uncertainties through analysis chain
- Combine statistical and systematic uncertainties
- Present as error budget
Step 12: Create comprehensive physics report
- Event displays showing candidate signal events
- Kinematic distributions (momentum, energy, angles)
- Invariant mass plots with fitted signal
- ROC curves for ML classifiers
- Cross-section measurements with error bars
- Comparison with theoretical predictions
- Systematic uncertainty breakdown
- Statistical significance calculations
- Interpretation:
* Consistency with Standard Model
* Constraints on new physics parameters
* Discovery potential or exclusion limits
- Recommendations:
* Detector improvements
* Additional data needed
* Future analysis strategies
- Export publication-ready PDF formatted for physics journal
Expected Output:
- Reconstructed physics events
- Signal vs background classification
- Measured cross-sections and branching ratios
- Statistical significance of observations
- Systematic uncertainty analysis
- Comprehensive experimental physics paper
```
---
## Chemical Engineering & Process Optimization
### Example 19: Optimization of Chemical Reactor Design and Operation
**Objective**: Design and optimize a continuous chemical reactor for maximum yield and efficiency while meeting safety and economic constraints.
**Skills Used**:
- `sympy` - Symbolic equations and reaction kinetics
- `statistical-analysis` - Numerical analysis
- `pymoo` - Multi-objective optimization
- `simpy` - Process simulation
- `pymc` - Bayesian parameter estimation
- `scikit-learn` - Process modeling
- `stable-baselines3` - Real-time control optimization
- `matplotlib` - Process diagrams
- `plotly` - Interactive process visualization
- `fluidsim` - Fluid dynamics simulation
- `scientific-writing` - Engineering reports
- `document-skills` - Technical documentation
**Workflow**:
```bash
Step 1: Define reaction system and kinetics
- Chemical reaction: A + B → C + D
- Use Sympy to define symbolic rate equations:
* Arrhenius equation: k = A × exp(-Ea/RT)
* Rate law: r = k × [A]^α × [B]^β
- Define material and energy balances symbolically
- Include equilibrium constants and thermodynamics
- Account for side reactions and byproducts
Step 2: Develop reactor model
- Select reactor type: CSTR, PFR, batch, or semi-batch
- Write conservation equations:
* Mass balance: dC/dt = (F_in × C_in - F_out × C)/V + r
* Energy balance: ρCp × dT/dt = Q - ΔH_rxn × r × V
* Momentum balance (pressure drop)
- Include heat transfer correlations
- Model mixing and mass transfer limitations
Step 3: Parameter estimation with PyMC
- Load experimental data from pilot reactor
- Bayesian inference to estimate kinetic parameters:
* Pre-exponential factor (A)
* Activation energy (Ea)
* Reaction orders (α, β)
- Use MCMC sampling with PyMC
- Incorporate prior knowledge from literature
- Calculate posterior distributions and credible intervals
- Assess parameter uncertainty and correlation
Step 4: Model validation
- Simulate reactor with estimated parameters using scipy.integrate
- Compare predictions with experimental data
- Calculate goodness of fit (R², RMSE)
- Perform sensitivity analysis:
* Which parameters most affect yield?
* Identify critical operating conditions
- Refine model if needed
Step 5: Machine learning surrogate model
- Train fast surrogate model with scikit-learn
- Generate training data from detailed model (1000+ runs)
- Features: T, P, residence time, feed composition, catalyst loading
- Target: yield, selectivity, conversion
- Models: Gaussian Process Regression, Random Forest
- Validate surrogate accuracy (R² > 0.95)
- Use for rapid optimization
Step 6: Single-objective optimization
- Maximize yield with scipy.optimize:
* Decision variables: T, P, feed ratio, residence time
* Objective: maximize Y = (moles C produced) / (moles A fed)
* Constraints:
- Temperature: 300 K ≤ T ≤ 500 K (safety)
- Pressure: 1 bar ≤ P ≤ 50 bar (equipment limits)
- Residence time: 1 min ≤ τ ≤ 60 min
- Conversion: X_A ≥ 90%
- Use Sequential Least Squares Programming (SLSQP)
- Identify optimal operating point
Step 7: Multi-objective optimization with PyMOO
- Competing objectives:
1. Maximize product yield
2. Minimize energy consumption (heating/cooling)
3. Minimize operating cost (raw materials, utilities)
4. Maximize reactor productivity (throughput)
- Constraints:
- Safety: temperature and pressure limits
- Environmental: waste production limits
- Economic: minimum profitability
- Run NSGA-II or NSGA-III
- Generate Pareto front of optimal solutions
- Select operating point based on preferences
Step 8: Dynamic process simulation with SimPy
- Model complete plant:
* Reactors, separators, heat exchangers
* Pumps, compressors, valves
* Storage tanks and buffers
- Simulate startup, steady-state, and shutdown
- Include disturbances:
* Feed composition variations
* Equipment failures
* Demand fluctuations
- Evaluate dynamic stability
- Calculate time to steady state
Step 9: Control system design
- Design feedback control loops:
* Temperature control (PID controller)
* Pressure control
* Flow control
* Level control
- Tune PID parameters using Ziegler-Nichols or optimization
- Implement cascade control for improved performance
- Add feedforward control for disturbance rejection
Step 10: Reinforcement learning for advanced control
- Use Stable-Baselines3 to train RL agent:
* Environment: reactor simulation (SimPy-based)
* State: T, P, concentrations, flow rates
* Actions: adjust setpoints, flow rates, heating/cooling
* Reward: +yield -energy cost -deviation from setpoint
- Train PPO or TD3 agent
- Compare with conventional PID control
- Evaluate performance under disturbances
- Implement model-free adaptive control
Step 11: Economic analysis
- Calculate capital costs (CAPEX):
* Reactor vessel cost (function of size, pressure rating)
* Heat exchanger costs
* Pumps and instrumentation
* Installation costs
- Calculate operating costs (OPEX):
* Raw materials (A, B, catalyst)
* Utilities (steam, cooling water, electricity)
* Labor and maintenance
- Revenue from product sales
- Calculate economic metrics:
* Net present value (NPV)
* Internal rate of return (IRR)
* Payback period
* Levelized cost of production
Step 12: Safety analysis
- Identify hazards:
* Exothermic runaway reactions
* Pressure buildup
* Toxic or flammable materials
- Perform HAZOP-style analysis
- Calculate safe operating limits:
* Maximum temperature of synthesis reaction (MTSR)
* Adiabatic temperature rise
* Relief valve sizing
- Design emergency shutdown systems
- Implement safety interlocks
Step 13: Uncertainty quantification
- Propagate parameter uncertainties from PyMC:
* How does kinetic parameter uncertainty affect yield?
* Monte Carlo simulation with parameter distributions
- Evaluate robustness of optimal design
- Calculate confidence intervals on economic metrics
- Identify critical uncertainties for further study
Step 14: Generate comprehensive engineering report
- Executive summary of project objectives and results
- Process flow diagram (PFD) with material and energy streams
- Reaction kinetics and model equations
- Parameter estimation results with uncertainties
- Optimization results:
* Pareto front for multi-objective optimization
* Recommended operating conditions
* Trade-off analysis
- Dynamic simulation results (startup curves, response to disturbances)
- Control system design and tuning
- Economic analysis with sensitivity to key assumptions
- Safety analysis and hazard mitigation
- Scale-up considerations:
* Pilot to commercial scale
* Heat and mass transfer limitations
* Equipment sizing
- Recommendations:
* Optimal reactor design (size, type, materials of construction)
* Operating conditions for maximum profitability
* Control strategy
* Further experimental studies needed
- Technical drawings and P&ID (piping and instrumentation diagram)
- Export as professional engineering report (PDF)
Expected Output:
- Validated reactor model with parameter uncertainties
- Optimal reactor design and operating conditions
- Pareto-optimal solutions for multi-objective optimization
- Dynamic process simulation results
- Advanced control strategies (RL-based)
- Economic feasibility analysis
- Safety assessment
- Comprehensive chemical engineering design report
```
---
## Scientific Illustration & Visual Communication
### Example 20: Creating Publication-Ready Scientific Figures
**Objective**: Generate and refine scientific illustrations, diagrams, and graphical abstracts for publications and presentations.
**Skills Used**:
- `generate-image` - AI image generation and editing
- `matplotlib` - Data visualization
- `plotly` - Interactive visualization
- `scientific-visualization` - Best practices
- `scientific-schematics` - Scientific diagrams
- `scientific-writing` - Figure caption creation
- `scientific-slides` - Presentation materials
- `latex-posters` - Conference posters
- `pptx-posters` - PowerPoint posters
- `document-skills` - PDF report generation
**Workflow**:
```bash
Step 1: Plan visual communication strategy
- Identify key concepts that need visual representation:
* Experimental workflow diagrams
* Molecular structures and interactions
* Data visualization (handled by matplotlib)
* Conceptual illustrations for mechanisms
* Graphical abstract for paper summary
- Determine appropriate style for target journal/audience
- Sketch rough layouts for each figure
Step 2: Generate experimental workflow diagram
- Use generate-image skill with detailed prompt:
"Scientific illustration showing a step-by-step experimental
workflow for CRISPR gene editing: (1) guide RNA design at computer,
(2) cell culture in petri dish, (3) electroporation device,
(4) selection with antibiotics, (5) sequencing validation.
Clean, professional style with numbered steps, white background,
suitable for scientific publication."
- Save as workflow_diagram.png
- Review and iterate on prompt if needed
Step 3: Create molecular interaction schematic
- Generate detailed molecular visualization:
"Scientific diagram of protein-ligand binding mechanism:
show receptor protein (blue ribbon structure) with binding pocket,
small molecule ligand (ball-and-stick, orange) approaching,
key hydrogen bonds indicated with dashed lines, water molecules
in binding site. Professional biochemistry illustration style,
clean white background, publication quality."
- Generate multiple versions with different angles/styles
- Select best representation
Step 4: Edit existing figures for consistency
- Load existing figure that needs modification:
python scripts/generate_image.py "Change the background to white
and make the protein blue instead of green" --input figure1.png
- Standardize color schemes across all figures
- Edit to match journal style guidelines:
python scripts/generate_image.py "Remove the title text and
increase contrast for print publication" --input diagram.png
Step 5: Generate graphical abstract
- Create comprehensive visual summary:
"Graphical abstract for cancer immunotherapy paper: left side
shows tumor cells (irregular shapes, red) being attacked by
T cells (round, blue). Center shows the drug molecule structure.
Right side shows healthy tissue (green). Arrow flow from left
to right indicating treatment progression. Modern, clean style
with minimal text, high contrast, suitable for journal TOC."
- Ensure dimensions meet journal requirements
- Iterate to highlight key findings
Step 6: Create conceptual mechanism illustrations
- Generate mechanism diagrams:
"Scientific illustration of enzyme catalysis mechanism:
Show substrate entering active site (step 1), transition state
formation with electron movement arrows (step 2), product
release (step 3). Use standard biochemistry notation,
curved arrows for electron movement, clear labeling."
- Generate alternative representations for supplementary materials
Step 7: Produce presentation-ready figures
- Create high-impact visuals for talks:
"Eye-catching scientific illustration of DNA double helix
unwinding during replication, with DNA polymerase (large
green structure) adding nucleotides. Dynamic composition,
vibrant but professional colors, dark background for
presentation slides."
- Adjust style for poster vs slide format
- Create versions at different resolutions
Step 8: Generate figure panels for multi-part figures
- Create consistent series of related images:
"Panel A: Normal cell with intact membrane (green outline)
Panel B: Cell under oxidative stress with damaged membrane
Panel C: Cell treated with antioxidant, membrane recovering
Consistent style across all panels, same scale, white background,
scientific illustration style suitable for publication."
- Ensure visual consistency across panels
- Annotate with panel labels
Step 9: Edit for accessibility
- Modify figures for colorblind accessibility:
python scripts/generate_image.py "Change the red and green
elements to blue and orange for colorblind accessibility,
maintain all other aspects" --input figure_v1.png
- Add patterns or textures for additional differentiation
- Verify contrast meets accessibility standards
Step 10: Create supplementary visual materials
- Generate additional context figures:
"Anatomical diagram showing location of pancreatic islets
within the pancreas, cross-section view with labeled structures:
alpha cells, beta cells, blood vessels. Medical illustration
style, educational, suitable for supplementary materials."
- Create protocol flowcharts and decision trees
- Generate equipment setup diagrams
Step 11: Compile figure legends and captions
- Use scientific-writing skill to create descriptions:
* Figure number and title
* Detailed description of what is shown
* Explanation of symbols, colors, and abbreviations
* Scale bars and measurement units
* Statistical information if applicable
- Format according to journal guidelines
Step 12: Assemble final publication package
- Organize all figures in publication order
- Create high-resolution exports (300+ DPI for print)
- Generate both RGB (web) and CMYK (print) versions
- Compile into PDF using document-skills:
* Title page with graphical abstract
* All figures with captions
* Supplementary figures section
- Create separate folder with individual figure files
- Document all generation prompts for reproducibility
Expected Output:
- Complete set of publication-ready scientific illustrations
- Graphical abstract for table of contents
- Mechanism diagrams and workflow figures
- Edited versions meeting journal style guidelines
- Accessibility-compliant figure versions
- Figure package with captions and metadata
- Documentation of prompts used for reproducibility
```
---
## Quantum Computing for Chemistry
### Example 21: Variational Quantum Eigensolver for Molecular Ground States
**Objective**: Use quantum computing to calculate molecular electronic structure and ground state energies for drug design applications.
**Skills Used**:
- `qiskit` - IBM quantum computing framework
- `pennylane` - Quantum machine learning
- `cirq` - Google quantum circuits
- `qutip` - Quantum dynamics simulation
- `rdkit` - Molecular structure input
- `sympy` - Symbolic Hamiltonian construction
- `matplotlib` - Energy landscape visualization
- `scientific-visualization` - Publication figures
- `scientific-writing` - Quantum chemistry reports
**Workflow**:
```bash
Step 1: Define molecular system
- Load molecular structure with RDKit (small drug molecule)
- Extract atomic coordinates and nuclear charges
- Define basis set (STO-3G, 6-31G for small molecules)
- Calculate number of qubits needed (2 qubits per orbital)
Step 2: Construct molecular Hamiltonian
- Use Qiskit Nature to generate fermionic Hamiltonian
- Apply Jordan-Wigner transformation to qubit Hamiltonian
- Use SymPy to symbolically verify Hamiltonian terms
- Calculate number of Pauli terms
Step 3: Design variational ansatz with Qiskit
- Choose ansatz type: UCCSD, hardware-efficient, or custom
- Define circuit depth and entanglement structure
- Calculate circuit parameters (variational angles)
- Estimate circuit resources (gates, depth)
Step 4: Implement VQE algorithm
- Initialize variational parameters randomly
- Define cost function: <ψ(θ)|H|ψ(θ)>
- Choose classical optimizer (COBYLA, SPSA, L-BFGS-B)
- Set convergence criteria
Step 5: Run quantum simulation with PennyLane
- Configure quantum device (simulator or real hardware)
- Execute variational circuits
- Measure expectation values of Hamiltonian terms
- Update parameters iteratively
Step 6: Error mitigation
- Implement readout error mitigation
- Apply zero-noise extrapolation
- Use measurement error correction
- Estimate uncertainty in energy values
Step 7: Quantum dynamics with QuTiP
- Simulate molecular dynamics on quantum computer
- Calculate time evolution of molecular system
- Study non-adiabatic transitions
- Visualize wavefunction dynamics
Step 8: Compare with classical methods
- Run classical HF and DFT calculations for reference
- Compare VQE results with CCSD(T) (gold standard)
- Analyze quantum advantage for this system
- Quantify accuracy vs computational cost
Step 9: Scale to larger molecules
- Design circuits for larger drug candidates
- Estimate resources for pharmaceutical applications
- Identify molecules where quantum advantage is expected
- Plan for near-term quantum hardware capabilities
Step 10: Generate quantum chemistry report
- Energy convergence plots
- Circuit diagrams and ansatz visualizations
- Comparison with classical methods
- Resource estimates for target molecules
- Discussion of quantum advantage timeline
- Publication-quality figures
- Export comprehensive report
Expected Output:
- Molecular ground state energies from VQE
- Optimized variational circuits
- Comparison with classical chemistry methods
- Resource estimates for drug molecules
- Quantum chemistry analysis report
```
---
## Research Grant Writing
### Example 22: NIH R01 Grant Proposal Development
**Objective**: Develop a comprehensive research grant proposal with literature review, specific aims, and budget justification.
**Skills Used**:
- `research-grants` - Grant writing templates and guidelines
- `literature-review` - Systematic literature analysis
- `pubmed-database` - Literature search
- `openalex-database` - Citation analysis
- `clinicaltrials-database` - Preliminary data context
- `hypothesis-generation` - Scientific hypothesis development
- `scientific-writing` - Technical writing
- `scientific-critical-thinking` - Research design
- `citation-management` - Reference formatting
- `document-skills` - PDF generation
**Workflow**:
```bash
Step 1: Define research question and significance
- Use hypothesis-generation skill to refine research questions
- Identify knowledge gaps in the field
- Articulate significance and innovation
- Define measurable outcomes
Step 2: Comprehensive literature review
- Search PubMed for relevant publications (last 10 years)
- Query OpenAlex for citation networks
- Identify key papers and review articles
- Use literature-review skill to synthesize findings
- Identify gaps that proposal will address
Step 3: Develop specific aims
- Aim 1: Mechanistic studies (hypothesis-driven)
- Aim 2: Translational applications
- Aim 3: Validation and clinical relevance
- Ensure aims are interdependent but not contingent
- Define success criteria for each aim
Step 4: Design research approach
- Use scientific-critical-thinking for experimental design
- Define methods for each specific aim
- Include positive and negative controls
- Plan statistical analysis approach
- Identify potential pitfalls and alternatives
Step 5: Preliminary data compilation
- Gather existing data supporting hypothesis
- Search ClinicalTrials.gov for relevant prior work
- Create figures showing preliminary results
- Quantify feasibility evidence
Step 6: Innovation and significance sections
- Articulate what is novel about approach
- Compare to existing methods/knowledge
- Explain expected impact on field
- Address NIH mission alignment
Step 7: Timeline and milestones
- Create Gantt chart for 5-year project
- Define quarterly milestones
- Identify go/no-go decision points
- Plan for personnel and resource allocation
Step 8: Budget development
- Calculate personnel costs (PI, postdocs, students)
- Equipment and supplies estimates
- Core facility usage costs
- Travel and publication costs
- Indirect cost calculation
Step 9: Rigor and reproducibility
- Address biological variables (sex, age, strain)
- Statistical power calculations
- Data management and sharing plan
- Authentication of key resources
Step 10: Format and compile
- Use research-grants templates for NIH format
- Apply citation-management for references
- Create biosketch and facilities sections
- Generate PDF with proper formatting
- Check page limits and formatting requirements
Step 11: Review and revision
- Use peer-review skill principles for self-assessment
- Check for logical flow and clarity
- Verify alignment with FOA requirements
- Ensure responsive to review criteria
Step 12: Final deliverables
- Specific Aims page (1 page)
- Research Strategy (12 pages)
- Bibliography
- Budget and justification
- Biosketches
- Letters of support
- Data management plan
- Human subjects/vertebrate animals sections (if applicable)
Expected Output:
- Complete NIH R01 grant proposal
- Literature review summary
- Budget spreadsheet with justification
- Timeline and milestone chart
- All required supplementary documents
- Properly formatted PDF ready for submission
```
---
## Flow Cytometry & Immunophenotyping
### Example 23: Multi-Parameter Flow Cytometry Analysis Pipeline
**Objective**: Analyze high-dimensional flow cytometry data to characterize immune cell populations in clinical samples.
**Skills Used**:
- `flowio` - FCS file parsing
- `scanpy` - High-dimensional analysis
- `scikit-learn` - Clustering and classification
- `umap-learn` - Dimensionality reduction
- `statistical-analysis` - Population statistics
- `matplotlib` - Flow cytometry plots
- `plotly` - Interactive gating
- `clinical-reports` - Clinical flow reports
- `exploratory-data-analysis` - Data exploration
**Workflow**:
```bash
Step 1: Load and parse FCS files
- Use flowio to read FCS 3.0/3.1 files
- Extract channel names and metadata
- Load compensation matrix from file
- Parse keywords (patient ID, tube, date)
Step 2: Quality control
- Check for acquisition anomalies (time vs events)
- Identify clogging or fluidics issues
- Remove doublets (FSC-A vs FSC-H)
- Gate viable cells (exclude debris)
- Document QC metrics per sample
Step 3: Compensation and transformation
- Apply compensation matrix
- Transform data (biexponential/logicle)
- Verify compensation with single-stain controls
- Visualize spillover reduction
Step 4: Traditional gating strategy
- Sequential manual gating approach:
* Lymphocytes (FSC vs SSC)
* Single cells (FSC-A vs FSC-H)
* Live cells (viability dye negative)
* CD3+ T cells, CD19+ B cells, etc.
- Calculate population frequencies
- Export gated populations
Step 5: High-dimensional analysis with Scanpy
- Convert flow data to AnnData format
- Apply variance-stabilizing transformation
- Calculate highly variable markers
- Build neighbor graph
Step 6: Dimensionality reduction
- Run UMAP with umap-learn for visualization
- Optimize UMAP parameters (n_neighbors, min_dist)
- Create 2D embeddings colored by:
* Marker expression
* Sample/patient
* Clinical group
Step 7: Automated clustering
- Apply Leiden or FlowSOM clustering
- Determine optimal cluster resolution
- Assign cell type labels based on marker profiles
- Validate clusters against manual gating
Step 8: Differential abundance analysis
- Compare population frequencies between groups
- Use statistical-analysis for hypothesis testing
- Calculate fold changes and p-values
- Apply multiple testing correction
- Identify significantly altered populations
Step 9: Biomarker discovery
- Train classifiers to predict clinical outcome
- Use scikit-learn Random Forest or SVM
- Calculate feature importance (which populations matter)
- Cross-validate prediction accuracy
- Identify candidate biomarkers
Step 10: Quality metrics and batch effects
- Calculate CV for control samples
- Detect batch effects across acquisition dates
- Apply batch correction if needed
- Generate Levey-Jennings plots for QC
Step 11: Visualization suite
- Traditional flow plots:
* Bivariate dot plots with quadrant gates
* Histogram overlays
* Contour plots
- High-dimensional plots:
* UMAP colored by population
* Heatmaps of marker expression
* Violin plots for marker distributions
- Interactive plots with Plotly
Step 12: Generate clinical flow cytometry report
- Sample information and QC summary
- Gating strategy diagrams
- Population frequency tables
- Reference range comparisons
- Statistical comparisons between groups
- Interpretation and clinical significance
- Export as PDF for clinical review
Expected Output:
- Parsed and compensated flow cytometry data
- Traditional and automated gating results
- High-dimensional clustering and UMAP
- Differential abundance statistics
- Biomarker candidates for clinical outcome
- Publication-quality flow plots
- Clinical flow cytometry report
```
---
## Summary
These examples demonstrate:
1. **Cross-domain applicability**: Skills are useful across many scientific fields
2. **Skill integration**: Complex workflows combine multiple databases, packages, and analysis methods
3. **Real-world relevance**: Examples address actual research questions and clinical needs
4. **End-to-end workflows**: From data acquisition to publication-ready reports
5. **Best practices**: QC, statistical rigor, visualization, interpretation, and documentation
### Skills Coverage Summary
The examples in this document cover the following skill categories:
**Databases & Data Sources:**
- Biological: `chembl-database`, `pubchem-database`, `drugbank-database`, `uniprot-database`, `gene-database`, `ensembl-database`, `clinvar-database`, `cosmic-database`, `string-database`, `kegg-database`, `reactome-database`, `hmdb-database`, `pdb-database`, `alphafold-database`, `zinc-database`, `gwas-database`, `geo-database`, `ena-database`, `cellxgene-census`, `metabolomics-workbench-database`, `brenda-database`, `clinpgx-database`
- Clinical: `clinicaltrials-database`, `fda-database`
- Literature: `pubmed-database`, `openalex-database`, `biorxiv-database`
**Analysis Packages:**
- Chemistry: `rdkit`, `datamol`, `medchem`, `molfeat`, `deepchem`, `torchdrug`, `pytdc`, `diffdock`, `pyopenms`, `matchms`, `cobrapy`
- Genomics: `biopython`, `pysam`, `pydeseq2`, `scanpy`, `scvi-tools`, `anndata`, `gget`, `geniml`, `deeptools`, `etetoolkit`, `scikit-bio`
- Proteins: `esm`, `bioservices`
- Machine Learning: `scikit-learn`, `pytorch-lightning`, `torch_geometric`, `transformers`, `stable-baselines3`, `shap`
- Statistics: `statsmodels`, `statistical-analysis`, `pymc`, `scikit-survival`
- Visualization: `matplotlib`, `seaborn`, `plotly`, `scientific-visualization`
- Data Processing: `polars`, `dask`, `vaex`, `networkx`
- Materials: `pymatgen`
- Physics: `astropy`, `sympy`, `fluidsim`
- Quantum: `qiskit`, `pennylane`, `cirq`, `qutip`
- Neuroscience: `neurokit2`, `neuropixels-analysis`
- Pathology: `histolab`, `pathml`, `pydicom`
- Flow Cytometry: `flowio`
- Dimensionality Reduction: `umap-learn`, `arboreto`
- Lab Automation: `pylabrobot`, `opentrons-integration`, `benchling-integration`, `labarchive-integration`, `protocolsio-integration`
- Simulation: `simpy`, `pymoo`
**Writing & Reporting:**
- `scientific-writing`, `scientific-visualization`, `scientific-schematics`, `scientific-slides`
- `clinical-reports`, `clinical-decision-support`
- `literature-review`, `hypothesis-generation`, `scientific-critical-thinking`
- `research-grants`, `peer-review`
- `document-skills`, `latex-posters`, `pptx-posters`
- `citation-management`, `market-research-reports`
**Image & Media:**
- `generate-image`, `omero-integration`
### How to Use These Examples
1. **Adapt to your needs**: Modify parameters, datasets, and objectives for your specific research question
2. **Combine skills creatively**: Mix and match skills from different categories
3. **Follow the structure**: Each example provides a clear step-by-step workflow
4. **Generate comprehensive output**: Aim for publication-quality figures and professional reports
5. **Cite your sources**: Always verify data and provide proper citations
### Additional Notes
- Always start with: "Always use available 'skills' when possible. Keep the output organized."
- For complex projects, break into manageable steps and validate intermediate results
- Save checkpoints and intermediate data files
- Document parameters and decisions for reproducibility
- Generate README files explaining methodology
- Create PDFs for stakeholder communication
These examples showcase the power of combining the skills in this repository to tackle complex, real-world scientific challenges across multiple domains.
================================================
FILE: docs/open-source-sponsors.md
================================================
# Support the Open Source Projects We Depend On
Claude Scientific Skills is built on the shoulders of giants. The 139 skills in this repository leverage dozens of incredible open source projects created and maintained by dedicated developers and research communities around the world.
**If you find value in these skills, please consider supporting the underlying open source projects that make them possible.**
---
## How to Support Open Source
1. **Star repositories** on GitHub - It's free and helps projects gain visibility
2. **Sponsor maintainers** directly through GitHub Sponsors, Open Collective, or project-specific donation pages
3. **Contribute** code, documentation, or bug reports
4. **Cite** projects in your publications
5. **Share** projects with colleagues
---
## Featured Projects by Domain
### Bioinformatics & Genomics
| Project | Description | Links |
|---------|-------------|-------|
| **Biopython** | Computational molecular biology toolkit | [GitHub](https://github.com/biopython/biopython) - [Donate](https://numfocus.org/donate-to-biopython) |
| **Scanpy** | Single-cell analysis in Python | [GitHub](https://github.com/scverse/scanpy) - [scverse](https://scverse.org/) |
| **AnnData** | Annotated data matrices for single-cell | [GitHub](https://github.com/scverse/anndata) |
| **scvi-tools** | Deep learning for single-cell omics | [GitHub](https://github.com/scverse/scvi-tools) |
| **Arboreto** | Gene regulatory network inference | [GitHub](https://github.com/aertslab/arboreto) |
| **pysam** | SAM/BAM/VCF file interface | [GitHub](https://github.com/pysam-developers/pysam) |
| **scikit-bio** | Bioinformatics library | [GitHub](https://github.com/scikit-bio/scikit-bio) |
| **gget** | Gene and transcript info retrieval | [GitHub](https://github.com/pachterlab/gget) |
| **deepTools** | Tools for deep-sequencing data | [GitHub](https://github.com/deeptools/deepTools) |
| **ETE Toolkit** | Phylogenetic tree analysis | [GitHub](https://github.com/etetoolkit/ete) |
### Cheminformatics & Drug Discovery
| Project | Description | Links |
|---------|-------------|-------|
| **RDKit** | Cheminformatics toolkit | [GitHub](https://github.com/rdkit/rdkit) - [Donate](https://github.com/sponsors/rdkit) |
| **Datamol** | Molecular manipulation made easy | [GitHub](https://github.com/datamol-io/datamol) |
| **DeepChem** | Deep learning for chemistry | [GitHub](https://github.com/deepchem/deepchem) |
| **TorchDrug** | Drug discovery with PyTorch | [GitHub](https://github.com/DeepGraphLearning/torchdrug) |
| **molfeat** | Molecular featurization | [GitHub](https://github.com/datamol-io/molfeat) |
| **MedChem** | Medicinal chemistry filters | [GitHub](https://github.com/datamol-io/medchem) |
| **PyTDC** | Therapeutics Data Commons | [GitHub](https://github.com/mims-harvard/TDC) |
### Proteomics & Mass Spectrometry
| Project | Description | Links |
|---------|-------------|-------|
| **matchms** | Mass spectrometry data processing | [GitHub](https://github.com/matchms/matchms) |
| **pyOpenMS** | Mass spectrometry toolkit | [GitHub](https://github.com/OpenMS/OpenMS) |
### Machine Learning & AI
| Project | Description | Links |
|---------|-------------|-------|
| **PyTorch Lightning** | Deep learning framework | [GitHub](https://github.com/Lightning-AI/pytorch-lightning) - [Sponsor](https://github.com/sponsors/Lightning-AI) |
| **Transformers** | State-of-the-art NLP | [GitHub](https://github.com/huggingface/transformers) |
| **scikit-learn** | Machine learning in Python | [GitHub](https://github.com/scikit-learn/scikit-learn) - [Donate](https://numfocus.org/donate-to-scikit-learn) |
| **PyTorch Geometric** | Geometric deep learning | [GitHub](https://github.com/pyg-team/pytorch_geometric) |
| **PyMC** | Probabilistic programming | [GitHub](https://github.com/pymc-devs/pymc) - [Donate](https://numfocus.org/donate-to-pymc) |
| **SHAP** | Model interpretability | [GitHub](https://github.com/shap/shap) |
| **Stable Baselines3** | Reinforcement learning | [GitHub](https://github.com/DLR-RM/stable-baselines3) |
| **scikit-survival** | Survival analysis | [GitHub](https://github.com/sebp/scikit-survival) |
| **aeon** | Time series ML toolkit | [GitHub](https://github.com/aeon-toolkit/aeon) |
| **PyMOO** | Multi-objective optimization | [GitHub](https://github.com/anyoptimization/pymoo) |
| **UMAP** | Dimensionality reduction | [GitHub](https://github.com/lmcinnes/umap) |
### Data Science & Visualization
| Project | Description | Links |
|---------|-------------|-------|
| **Matplotlib** | Plotting library | [GitHub](https://github.com/matplotlib/matplotlib) - [Donate](https://numfocus.org/donate-to-matplotlib) |
| **Seaborn** | Statistical visualization | [GitHub](https://github.com/mwaskom/seaborn) |
| **Plotly** | Interactive visualizations | [GitHub](https://github.com/plotly/plotly.py) |
| **NetworkX** | Network analysis | [GitHub](https://github.com/networkx/networkx) - [Donate](https://numfocus.org/donate-to-networkx) |
| **SymPy** | Symbolic mathematics | [GitHub](https://github.com/sympy/sympy) - [Donate](https://numfocus.org/donate-to-sympy) |
| **statsmodels** | Statistical modeling | [GitHub](https://github.com/statsmodels/statsmodels) |
| **GeoPandas** | Geospatial data in Python | [GitHub](https://github.com/geopandas/geopandas) |
| **Polars** | Fast DataFrame library | [GitHub](https://github.com/pola-rs/polars) |
| **Dask** | Parallel computing | [GitHub](https://github.com/dask/dask) - [Donate](https://numfocus.org/donate-to-dask) |
| **Vaex** | Out-of-core DataFrames | [GitHub](https://github.com/vaexio/vaex) |
### Medical Imaging & Digital Pathology
| Project | Description | Links |
|---------|-------------|-------|
| **pydicom** | DICOM file handling | [GitHub](https://github.com/pydicom/pydicom) |
| **histolab** | Digital pathology preprocessing | [GitHub](https://github.com/histolab/histolab) |
| **PathML** | Pathology ML toolkit | [GitHub](https://github.com/Dana-Farber-AIOS/pathml) |
### Healthcare & Clinical
| Project | Description | Links |
|---------|-------------|-------|
| **PyHealth** | Healthcare AI toolkit | [GitHub](https://github.com/sunlabuiuc/PyHealth) |
| **NeuroKit2** | Neurophysiological signal processing | [GitHub](https://github.com/neuropsychology/NeuroKit) |
### Materials Science & Physics
| Project | Description | Links |
|---------|-------------|-------|
| **Pymatgen** | Materials analysis | [GitHub](https://github.com/materialsproject/pymatgen) |
| **COBRApy** | Metabolic modeling | [GitHub](https://github.com/opencobra/cobrapy) |
| **Astropy** | Astronomy library | [GitHub](https://github.com/astropy/astropy) - [Donate](https://numfocus.org/donate-to-astropy) |
### Quantum Computing
| Project | Description | Links |
|---------|-------------|-------|
| **Qiskit** | IBM quantum computing SDK | [GitHub](https://github.com/Qiskit/qiskit) |
| **Cirq** | Google quantum computing | [GitHub](https://github.com/quantumlib/Cirq) |
| **PennyLane** | Quantum ML library | [GitHub](https://github.com/PennyLaneAI/pennylane) |
| **QuTiP** | Quantum toolbox in Python | [GitHub](https://github.com/qutip/qutip) |
### Simulation & Engineering
| Project | Description | Links |
|---------|-------------|-------|
| **SimPy** | Discrete-event simulation | [GitHub](https://github.com/TeamSim/SimPy) |
| **FluidSim** | CFD framework | [GitHub](https://github.com/fluiddyn/fluidsim) |
### Laboratory & Automation
| Project | Description | Links |
|---------|-------------|-------|
| **PyLabRobot** | Lab automation control | [GitHub](https://github.com/PyLabRobot/pylabrobot) |
### Protein Engineering
| Project | Description | Links |
|---------|-------------|-------|
| **ESM** | Evolutionary scale modeling | [GitHub](https://github.com/facebookresearch/esm) |
### Data Formats & I/O
| Project | Description | Links |
|---------|-------------|-------|
| **Zarr** | Chunked array storage | [GitHub](https://github.com/zarr-developers/zarr-python) |
| **FlowIO** | Flow cytometry I/O | [GitHub](https://github.com/whitews/FlowIO) |
---
## NumFOCUS-Sponsored Projects
Many of the projects above are sponsored by [NumFOCUS](https://numfocus.org/), a nonprofit supporting open source scientific computing. Consider [donating to NumFOCUS](https://numfocus.org/donate) to support the broader ecosystem.
**NumFOCUS-sponsored projects in this collection:**
- Biopython
- scikit-learn
- Matplotlib
- NetworkX
- SymPy
- Dask
- Astropy
- PyMC
---
## scverse Ecosystem
The [scverse](https://scverse.org/) consortium maintains foundational tools for single-cell omics:
- Scanpy
- AnnData
- scvi-tools
- And more
Consider supporting their mission to advance single-cell research.
---
## A Note from K-Dense
At K-Dense, we believe in giving back to the communities that make our work possible. We encourage all users of Claude Scientific Skills to:
1. **Acknowledge** these projects when you use them in research
2. **Contribute** back improvements when you can
3. **Support** maintainers financially if you derive commercial value
The open source scientific Python ecosystem is a shared resource. Let's keep it thriving together.
---
*This list is not exhaustive. Many other excellent open source projects power the skills in this repository. If you notice a project that should be listed here, please open a PR!*
================================================
FILE: docs/scientific-skills.md
================================================
# Scientific Skills
## Scientific Databases
- **AlphaFold DB** - Comprehensive AI-predicted protein structure database from DeepMind providing 200M+ high-confidence protein structure predictions covering UniProt reference proteomes and beyond. Includes confidence metrics (pLDDT for per-residue confidence, PAE for pairwise accuracy estimates), structure quality assessment, predicted aligned error matrices, and multiple structure formats (PDB, mmCIF, AlphaFold DB format). Supports programmatic access via REST API, bulk downloads through Google Cloud Storage, and integration with structural analysis tools. Enables structure-based drug discovery, protein function prediction, structural genomics, comparative modeling, and structural bioinformatics research without experimental structure determination
- **BRENDA** - World's most comprehensive enzyme information system containing detailed enzyme data from scientific literature. Query kinetic parameters (Km, kcat, Vmax), reaction equations, substrate specificities, organism information, and optimal conditions for 45,000+ enzymes with millions of kinetic data points via SOAP API. Supports enzyme discovery by substrate/product, cross-organism comparisons, environmental parameter analysis (pH, temperature optima), cofactor requirements, inhibition/activation data, and thermophilic homolog identification. Includes helper scripts for parsing BRENDA response formats, visualization of kinetic parameters, and enzymatic pathway construction. Use cases: metabolic engineering, enzyme engineering and optimization, kinetic modeling, retrosynthesis planning, industrial enzyme selection, and biochemical research requiring comprehensive enzyme kinetic data
- **ChEMBL** - Comprehensive manually curated database of bioactive molecules with drug-like properties maintained by EMBL-EBI. Contains 2M+ unique compounds, 19M+ bioactivity measurements, 13K+ protein targets, and 1.1M+ assays from 90K+ publications. Provides detailed compound information including chemical structures (SMILES, InChI), bioactivity data (IC50, EC50, Ki, Kd values), target information (protein families, pathways), ADMET properties, drug indications, clinical trial data, and patent information. Features REST API access, web interface, downloadable data files, and integration with other databases (UniProt, PubChem, DrugBank). Use cases: drug discovery, target identification, lead optimization, bioactivity prediction, chemical biology research, and drug repurposing
- **ClinPGx** - Clinical pharmacogenomics database (successor to PharmGKB) providing gene-drug interactions, CPIC clinical guidelines, allele functions, drug labels, and pharmacogenomic annotations for precision medicine and personalized pharmacotherapy (consolidates PharmGKB, CPIC, and PharmCAT resources)
- **ClinVar** - NCBI's public archive of genomic variants and their clinical significance with standardized classifications (pathogenic, benign, VUS), E-utilities API access, and bulk FTP downloads for variant interpretation and precision medicine research
- **ClinicalTrials.gov** - Comprehensive registry of clinical studies conducted worldwide (maintained by U.S. National Library of Medicine) with API v2 access for searching trials by condition, intervention, location, sponsor, study status, and phase; retrieve detailed trial information including eligibility criteria, outcomes, contacts, and locations; export to CSV/JSON formats for analysis (public API, no authentication required, ~50 req/min rate limit)
- **COSMIC** - Catalogue of Somatic Mutations in Cancer, the world's largest database of somatic cancer mutations (millions of mutations across thousands of cancer types, Cancer Gene Census, mutational signatures, structural variants, and drug resistance data)
- **DrugBank** - Comprehensive bioinformatics and cheminformatics database containing detailed drug and drug target information (9,591+ drug entries including 2,037 FDA-approved small molecules, 241 biotech drugs, 96 nutraceuticals, 6,000+ experimental compounds) with 200+ data fields per entry covering chemical structures (SMILES, InChI), pharmacology (mechanism of action, pharmacodynamics, ADME), drug-drug interactions, protein targets (enzymes, transporters, carriers), biological pathways, external identifiers (PubChem, ChEMBL, UniProt), and physicochemical properties for drug discovery, pharmacology research, interaction analysis, target identification, chemical similarity searches, and ADMET predictions
- **ENA (European Nucleotide Archive)** - Comprehensive public repository for nucleotide sequence data and metadata with REST APIs for accessing sequences, assemblies, samples, studies, and reads; supports advanced search, taxonomy lookups, and bulk downloads via FTP/Aspera (rate limit: 50 req/sec)
- **Ensembl** - Genome browser and bioinformatics database providing genomic annotations, sequences, variants, and comparative genomics data for 250+ vertebrate species (Release 115, 2025) with comprehensive REST API for gene lookups, sequence retrieval, variant effect prediction (VEP), ortholog finding, assembly mapping (GRCh37/GRCh38), and region analysis
- **FDA Databases** - Comprehensive access to all FDA (Food and Drug Administration) regulatory databases through openFDA API covering drugs (adverse events, labeling, NDC, recalls, approvals, shortages), medical devices (adverse events, 510k clearances, PMA, UDI, classifications), foods (recalls, adverse events, allergen tracking), animal/veterinary medicines (species-specific adverse events), and substances (UNII/CAS lookup, chemical structures, molecular data) for drug safety research, pharmacovigilance, regulatory compliance, and scientific analysis
- **FRED Economic Data** - Query FRED (Federal Reserve Economic Data) API for 800,000+ economic time series from 100+ sources including GDP, unemployment, inflation, interest rates, exchange rates, housing, and regional data. Supports macroeconomic analysis, financial research, policy studies, economic forecasting, and academic research. Features data transformations (percent change, log), frequency aggregation, vintage/ALFRED historical data access, release calendars, GeoFRED regional mapping, and comprehensive search/discovery by tags and categories
- **U.S. Treasury Fiscal Data (usfiscaldata)** - Free, open REST API from the U.S. Department of the Treasury providing 54 datasets and 182 data tables covering federal fiscal data. No API key required. Access national debt (Debt to the Penny back to 1993, Historical Debt back to 1790), Daily Treasury Statements (TGA balances, deposits/withdrawals), Monthly Treasury Statements (federal budget receipts and outlays), Treasury securities auctions data (bills, notes, bonds, TIPS, FRNs since 1979), average interest rates on Treasury securities, Treasury reporting exchange rates (quarterly for 170+ currencies), I Bond and savings bond rates, TIPS/CPI data, and more. Supports filtering, sorting, pagination, and CSV/XML/JSON output formats
- **OFR Hedge Fund Monitor (hedgefundmonitor)** - Free, open REST API from the U.S. Office of Financial Research providing aggregated hedge fund time series data with no API key or registration required. Access 300+ series across four datasets: SEC Form PF (quarterly aggregated stats from Qualifying Hedge Funds covering leverage, size, counterparties, liquidity, complexity, and risk management stress tests from 2013), CFTC Traders in Financial Futures (monthly futures positioning data), FRB SCOOS (quarterly dealer financing survey), and FICC Sponsored Repo Service Volumes (monthly). Supports date filtering, periodicity resampling (daily, weekly, monthly, quarterly, annual), aggregation methods, spread calculations between series, category CSV downloads, full-text metadata search, and mnemonic discovery
- **GEO (Gene Expression Omnibus)** - NCBI's comprehensive public repository for high-throughput gene expression and functional genomics data. Contains 264K+ studies, 8M+ samples, and petabytes of data from microarray, RNA-seq, ChIP-seq, ATAC-seq, and other high-throughput experiments. Provides standardized data submission formats (MINIML, SOFT), programmatic access via Entrez Programming Utilities (E-utilities) and GEOquery R package, bulk FTP downloads, and web-based search and retrieval. Supports data mining, meta-analysis, differential expression analysis, and cross-study comparisons. Includes curated datasets, series records with experimental design, platform annotations, and sample metadata. Use cases: gene expression analysis, biomarker discovery, disease mechanism research, drug response studies, and functional genomics research
- **GWAS Catalog** - NHGRI-EBI catalog of published genome-wide association studies with curated SNP-trait associations (thousands of studies, genome-wide significant associations p≤5×10⁻⁸), full summary statistics, REST API access for variant/trait/gene queries, and FTP downloads for genetic epidemiology and precision medicine research
- **HMDB (Human Metabolome Database)** - Comprehensive metabolomics resource with 220K+ metabolite entries, detailed chemical/biological data, concentration ranges, disease associations, pathways, and spectral data for metabolite identification and biomarker discovery
- **KEGG** - Kyoto Encyclopedia of Genes and Genomes, comprehensive database resource integrating genomic, chemical, and systemic functional information. Provides pathway databases (KEGG PATHWAY with 500+ reference pathways, metabolic pathways, signaling pathways, disease pathways), genome databases (KEGG GENES with gene catalogs from 5,000+ organisms), chemical databases (KEGG COMPOUND, KEGG DRUG, KEGG GLYCAN), and disease/drug databases (KEGG DISEASE, KEGG DRUG). Features pathway enrichment analysis, gene-to-pathway mapping, compound searches, molecular interaction networks, ortholog identification (KO - KEGG Orthology), ID conversion across databases, and visualization tools. Supports REST API access, KEGG Mapper for pathway mapping, and integration with bioinformatics tools. Use cases: pathway enrichment analysis, metabolic pathway reconstruction, drug target identification, comparative genomics, systems biology, and functional annotation of genes
- **Metabolomics Workbench** - NIH Common Fund metabolomics data repository with 4,200+ processed studies, standardized nomenclature (RefMet), mass spectrometry searches, and comprehensive REST API for accessing metabolite structures, study metadata, experimental results, and gene/protein-metabolite associations
- **OpenAlex** - Comprehensive open catalog of 240M+ scholarly works, authors, institutions, topics, sources, publishers, and funders. Provides complete bibliometric database for academic literature search, citation analysis, research trend tracking, author publication discovery, institution research output analysis, and open access paper identification. Features REST API with no authentication required (100k requests/day, 10 req/sec with email), advanced filtering (publication year, citations, open access status, topics, authors, institutions), aggregation/grouping capabilities, random sampling for research studies, batch ID lookups (DOI, ORCID, ROR, ISSN), and comprehensive metadata (titles, abstracts, citations, authorships, topics, funding). Supports literature reviews, bibliometric analysis, research output evaluation, citation network analysis, and academic database queries across all scientific domains
- **Open Targets** - Comprehensive therapeutic target identification and validation platform integrating genetics, omics, and chemical data (200M+ evidence strings, target-disease associations with scoring, tractability assessments, safety liabilities, known drugs from ChEMBL, GraphQL API) for drug target discovery, prioritization, evidence evaluation, drug repurposing, competitive intelligence, and mechanism research
- **NCBI Gene** - Comprehensive gene-specific database from NCBI providing curated information about genes from 500+ organisms. Contains gene nomenclature (official symbols, aliases, full names), genomic locations (chromosomal positions, exons, introns), sequences (genomic, mRNA, protein), gene function and phenotypes, pathways and interactions, orthologs and paralogs, variation data (SNPs, mutations), expression data, and cross-references to 200+ external databases (UniProt, Ensembl, HGNC, OMIM, Reactome). Supports programmatic access via E-utilities API (Entrez Programming Utilities) and NCBI Datasets API, bulk downloads, and web interface. Enables gene annotation, comparative genomics, variant interpretation, pathway analysis, and integration with other NCBI resources (PubMed, dbSNP, ClinVar). Use cases: gene information retrieval, variant annotation, functional genomics, disease gene discovery, and bioinformatics workflows
- **Protein Data Bank (PDB)** - Worldwide repository for 3D structural data of proteins, nucleic acids, and biological macromolecules. Contains 200K+ experimentally determined structures from X-ray crystallography, NMR spectroscopy, and cryo-electron microscopy. Provides comprehensive structure information including atomic coordinates, experimental data, structure quality metrics, ligand binding sites, protein-protein interfaces, and metadata (authors, methods, citations). Features advanced search capabilities (by sequence, structure similarity, ligand, organism, resolution), REST API and FTP access, structure visualization tools, and integration with analysis software. Supports structure comparison, homology modeling, drug design, structural biology research, and educational use. Maintained by wwPDB consortium (RCSB PDB, PDBe, PDBj, BMRB). Use cases: structural biology research, drug discovery, protein engineering, molecular modeling, and structural bioinformatics
- **PubChem** - World's largest free chemical information database maintained by NCBI. Contains 110M+ unique chemical compounds, 270M+ bioactivity test results, 300M+ chemical structures, and 1M+ patents. Provides comprehensive compound information including chemical structures (2D/3D structures, SMILES, InChI), physicochemical properties (molecular weight, logP, H-bond donors/acceptors), bioactivity data (assays, targets, pathways), safety and toxicity data, literature references, and vendor information. Features REST API (PUG REST, PUG SOAP, PUG View), web interface with advanced search, bulk downloads, and integration with other NCBI resources. Supports chemical similarity searches, substructure searches, property-based filtering, and cheminformatics analysis. Use cases: drug discovery, chemical biology, lead identification, ADMET prediction, chemical database mining, and molecular property analysis
- **PubMed** - NCBI's comprehensive biomedical literature database containing 35M+ citations from MEDLINE, life science journals, and online books. Provides access to abstracts, full-text articles (when available), MeSH (Medical Subject Headings) terms, author information, publication dates, and citation networks. Features advanced search capabilities with Boolean operators, field tags (author, title, journal, MeSH terms, publication date), filters (article type, species, language, publication date range), and saved searches with email alerts. Supports programmatic access via E-utilities API (Entrez Programming Utilities), bulk downloads, citation export in multiple formats (RIS, BibTeX, MEDLINE), and integration with reference management software. Includes PubMed Central (PMC) for open-access full-text articles. Use cases: literature searches, systematic reviews, citation analysis, research discovery, and staying current with scientific publications
- **Reactome** - Curated pathway database for biological processes and molecular interactions (2,825+ human pathways, 16K+ reactions, 11K+ proteins) with pathway enrichment analysis, expression data analysis, and species comparison using Content Service and Analysis Service APIs
- **STRING** - Protein-protein interaction network database (5000+ genomes, 59.3M proteins, 20B+ interactions) with functional enrichment analysis, interaction partner discovery, and network visualization from experimental data, computational prediction, and text-mining
- **UniProt** - Universal Protein Resource for protein sequences, annotations, and functional information (UniProtKB/Swiss-Prot reviewed entries, TrEMBL unreviewed entries) with REST API access for search, retrieval, ID mapping, and batch operations across 200+ databases
- **USPTO** - United States Patent and Trademark Office data access including patent searches, trademark lookups, patent examination history (PEDS), office actions, assignments, citations, and litigation records; supports PatentSearch API (ElasticSearch-based patent search), TSDR (Trademark Status & Document Retrieval), Patent/Trademark Assignment APIs, and additional specialized APIs for comprehensive IP analysis
- **ZINC** - Free database of commercially-available compounds for virtual screening and drug discovery maintained by UCSF. Contains 230M+ purchasable compounds from 100+ vendors in ready-to-dock 3D formats (SDF, MOL2) with pre-computed conformers. Provides compound information including chemical structures, vendor information and pricing, physicochemical properties (molecular weight, logP, H-bond donors/acceptors, rotatable bonds), drug-likeness filters (Lipinski's Rule of Five, Veber rules), and substructure search capabilities. Features multiple compound subsets (drug-like, lead-like, fragment-like, natural products), downloadable subsets for specific screening campaigns, and integration with molecular docking software (AutoDock, DOCK, Glide). Supports structure-based and ligand-based virtual screening workflows. Use cases: virtual screening campaigns, lead identification, compound library design, high-throughput docking, and drug discovery research
- **bioRxiv** - Preprint server for the life sciences providing Python-based tools for searching and retrieving preprints. Supports comprehensive searches by keywords, authors, date ranges, and subject categories, returning structured JSON metadata including titles, abstracts, DOIs, and citation information. Features PDF downloads for full-text analysis, filtering by bioRxiv subject categories (neuroscience, bioinformatics, genomics, etc.), and integration with literature review workflows. Use cases: tracking recent preprints, conducting systematic literature reviews, analyzing research trends, monitoring publications by specific authors, and staying current with emerging research before formal peer review
## Scientific Integrations
### Laboratory Information Management Systems (LIMS) & R&D Platforms
- **Benchling Integration** - Toolkit for integrating with Benchling's R&D platform, providing programmatic access to laboratory data management including registry entities (DNA sequences, proteins), inventory systems (samples, containers, locations), electronic lab notebooks (entries, protocols), workflows (tasks, automation), and data exports using Python SDK and REST API
### Cloud Platforms for Genomics & Biomedical Data
- **DNAnexus Integration** - Comprehensive toolkit for working with the DNAnexus cloud platform for genomics and biomedical data analysis. Covers building and deploying apps/applets (Python/Bash), managing data objects (files, records, databases), running analyses and workflows, using the dxpy Python SDK, and configuring app metadata and dependencies (dxapp.json setup, system packages, Docker, assets). Enables processing of FASTQ/BAM/VCF files, bioinformatics pipelines, job execution, workflow orchestration, and platform operations including project management and permissions
### Laboratory Automation
- **Opentrons Integration** - Toolkit for creating, editing, and debugging Opentrons Python Protocol API v2 protocols for laboratory automation using Flex and OT-2 robots. Enables automated liquid handling, pipetting workflows, hardware module control (thermocycler, temperature, magnetic, heater-shaker, absorbance plate reader), labware management, and complex protocol development for biological and chemical experiments
- **Ginkgo Cloud Lab** - Submit and manage protocols on Ginkgo Bioworks Cloud Lab (cloud.ginkgo.bio), a web-based interface for autonomous lab execution on Reconfigurable Automation Carts (RACs). Supports three protocols: Cell Free Protein Expression Validation ($39/sample, 5-10 day turnaround), Cell Free Protein Expression Optimization ($199/sample, DoE across 24 conditions, 6-11 days), and Fluorescent Pixel Art Generation ($25/plate, bacterial artwork with 11 fluorescent E. coli strains, 5-7 days). Includes EstiMate AI agent for custom protocol feasibility and pricing
### Electronic Lab Notebooks (ELN)
- **LabArchives Integration** - Toolkit for interacting with LabArchives Electronic Lab Notebook (ELN) REST API. Provides programmatic access to notebooks (backup, retrieval, management), entries (creation, comments, attachments), user authentication, site reports and analytics, and third-party integrations (Protocols.io, GraphPad Prism, SnapGene, Geneious, Jupyter, REDCap). Includes Python scripts for configuration setup, notebook operations, and entry management. Supports multi-regional API endpoints (US, UK, Australia) and OAuth authentication
### Workflow Platforms & Cloud Execution
- **LatchBio Integration** - Integration with the Latch platform for building, deploying, and executing bioinformatics workflows. Provides comprehensive support for creating serverless bioinformatics pipelines using Python decorators, deploying Nextflow/Snakemake pipelines, managing cloud data (LatchFile, LatchDir) and structured Registry (Projects, Tables, Records), configuring computational resources (CPU, GPU, memory, storage), and using pre-built Latch Verified workflows (RNA-seq, AlphaFold, DESeq2, single-cell analysis, CRISPR editing). Enables automatic containerization, UI generation, workflow versioning, and execution on scalable clou
gitextract_rdf422j9/
├── .claude-plugin/
│ └── marketplace.json
├── .gitattributes
├── .github/
│ └── workflows/
│ └── release.yml
├── .gitignore
├── LICENSE.md
├── README.md
├── docs/
│ ├── examples.md
│ ├── open-source-sponsors.md
│ └── scientific-skills.md
└── scientific-skills/
├── adaptyv/
│ ├── SKILL.md
│ └── reference/
│ ├── api_reference.md
│ ├── examples.md
│ ├── experiments.md
│ └── protein_optimization.md
├── aeon/
│ ├── SKILL.md
│ └── references/
│ ├── anomaly_detection.md
│ ├── classification.md
│ ├── clustering.md
│ ├── datasets_benchmarking.md
│ ├── distances.md
│ ├── forecasting.md
│ ├── networks.md
│ ├── regression.md
│ ├── segmentation.md
│ ├── similarity_search.md
│ └── transformations.md
├── alpha-vantage/
│ ├── SKILL.md
│ └── references/
│ ├── commodities.md
│ ├── economic-indicators.md
│ ├── forex-crypto.md
│ ├── fundamentals.md
│ ├── intelligence.md
│ ├── options.md
│ ├── technical-indicators.md
│ └── time-series.md
├── alphafold-database/
│ ├── SKILL.md
│ └── references/
│ └── api_reference.md
├── anndata/
│ ├── SKILL.md
│ └── references/
│ ├── best_practices.md
│ ├── concatenation.md
│ ├── data_structure.md
│ ├── io_operations.md
│ └── manipulation.md
├── arboreto/
│ ├── SKILL.md
│ ├── references/
│ │ ├── algorithms.md
│ │ ├── basic_inference.md
│ │ └── distributed_computing.md
│ └── scripts/
│ └── basic_grn_inference.py
├── arxiv-database/
│ ├── SKILL.md
│ ├── references/
│ │ └── api_reference.md
│ └── scripts/
│ └── arxiv_search.py
├── astropy/
│ ├── SKILL.md
│ └── references/
│ ├── coordinates.md
│ ├── cosmology.md
│ ├── fits.md
│ ├── tables.md
│ ├── time.md
│ ├── units.md
│ └── wcs_and_other_modules.md
├── benchling-integration/
│ ├── SKILL.md
│ └── references/
│ ├── api_endpoints.md
│ ├── authentication.md
│ └── sdk_reference.md
├── bgpt-paper-search/
│ └── SKILL.md
├── bindingdb-database/
│ ├── SKILL.md
│ └── references/
│ └── affinity_queries.md
├── biopython/
│ ├── SKILL.md
│ └── references/
│ ├── advanced.md
│ ├── alignment.md
│ ├── blast.md
│ ├── databases.md
│ ├── phylogenetics.md
│ ├── sequence_io.md
│ └── structure.md
├── biorxiv-database/
│ ├── SKILL.md
│ ├── references/
│ │ └── api_reference.md
│ └── scripts/
│ └── biorxiv_search.py
├── bioservices/
│ ├── SKILL.md
│ ├── references/
│ │ ├── identifier_mapping.md
│ │ ├── services_reference.md
│ │ └── workflow_patterns.md
│ └── scripts/
│ ├── batch_id_converter.py
│ ├── compound_cross_reference.py
│ ├── pathway_analysis.py
│ └── protein_analysis_workflow.py
├── brenda-database/
│ ├── SKILL.md
│ ├── references/
│ │ └── api_reference.md
│ └── scripts/
│ ├── brenda_queries.py
│ ├── brenda_visualization.py
│ └── enzyme_pathway_builder.py
├── cbioportal-database/
│ ├── SKILL.md
│ └── references/
│ └── study_exploration.md
├── cellxgene-census/
│ ├── SKILL.md
│ └── references/
│ ├── census_schema.md
│ └── common_patterns.md
├── chembl-database/
│ ├── SKILL.md
│ ├── references/
│ │ └── api_reference.md
│ └── scripts/
│ └── example_queries.py
├── cirq/
│ ├── SKILL.md
│ └── references/
│ ├── building.md
│ ├── experiments.md
│ ├── hardware.md
│ ├── noise.md
│ ├── simulation.md
│ └── transformation.md
├── citation-management/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── bibtex_template.bib
│ │ └── citation_checklist.md
│ ├── references/
│ │ ├── bibtex_formatting.md
│ │ ├── citation_validation.md
│ │ ├── google_scholar_search.md
│ │ ├── metadata_extraction.md
│ │ └── pubmed_search.md
│ └── scripts/
│ ├── doi_to_bibtex.py
│ ├── extract_metadata.py
│ ├── format_bibtex.py
│ ├── search_google_scholar.py
│ ├── search_pubmed.py
│ └── validate_citations.py
├── clinical-decision-support/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── biomarker_report_template.tex
│ │ ├── clinical_pathway_template.tex
│ │ ├── cohort_analysis_template.tex
│ │ ├── color_schemes.tex
│ │ ├── example_gbm_cohort.md
│ │ ├── recommendation_strength_guide.md
│ │ └── treatment_recommendation_template.tex
│ ├── references/
│ │ ├── README.md
│ │ ├── biomarker_classification.md
│ │ ├── clinical_decision_algorithms.md
│ │ ├── evidence_synthesis.md
│ │ ├── outcome_analysis.md
│ │ ├── patient_cohort_analysis.md
│ │ └── treatment_recommendations.md
│ └── scripts/
│ ├── biomarker_classifier.py
│ ├── build_decision_tree.py
│ ├── create_cohort_tables.py
│ ├── generate_survival_analysis.py
│ └── validate_cds_document.py
├── clinical-reports/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── case_report_template.md
│ │ ├── clinical_trial_csr_template.md
│ │ ├── clinical_trial_sae_template.md
│ │ ├── consult_note_template.md
│ │ ├── discharge_summary_template.md
│ │ ├── hipaa_compliance_checklist.md
│ │ ├── history_physical_template.md
│ │ ├── lab_report_template.md
│ │ ├── pathology_report_template.md
│ │ ├── quality_checklist.md
│ │ ├── radiology_report_template.md
│ │ └── soap_note_template.md
│ ├── references/
│ │ ├── README.md
│ │ ├── case_report_guidelines.md
│ │ ├── clinical_trial_reporting.md
│ │ ├── data_presentation.md
│ │ ├── diagnostic_reports_standards.md
│ │ ├── medical_terminology.md
│ │ ├── patient_documentation.md
│ │ ├── peer_review_standards.md
│ │ └── regulatory_compliance.md
│ └── scripts/
│ ├── check_deidentification.py
│ ├── compliance_checker.py
│ ├── extract_clinical_data.py
│ ├── format_adverse_events.py
│ ├── generate_report_template.py
│ ├── terminology_validator.py
│ ├── validate_case_report.py
│ └── validate_trial_report.py
├── clinicaltrials-database/
│ ├── SKILL.md
│ ├── references/
│ │ └── api_reference.md
│ └── scripts/
│ └── query_clinicaltrials.py
├── clinpgx-database/
│ ├── SKILL.md
│ ├── references/
│ │ └── api_reference.md
│ └── scripts/
│ └── query_clinpgx.py
├── clinvar-database/
│ ├── SKILL.md
│ └── references/
│ ├── api_reference.md
│ ├── clinical_significance.md
│ └── data_formats.md
├── cobrapy/
│ ├── SKILL.md
│ └── references/
│ ├── api_quick_reference.md
│ └── workflows.md
├── consciousness-council/
│ ├── SKILL.md
│ └── references/
│ └── advanced-configurations.md
├── cosmic-database/
│ ├── SKILL.md
│ ├── references/
│ │ └── cosmic_data_reference.md
│ └── scripts/
│ └── download_cosmic.py
├── dask/
│ ├── SKILL.md
│ └── references/
│ ├── arrays.md
│ ├── bags.md
│ ├── best-practices.md
│ ├── dataframes.md
│ ├── futures.md
│ └── schedulers.md
├── datacommons-client/
│ ├── SKILL.md
│ └── references/
│ ├── getting_started.md
│ ├── node.md
│ ├── observation.md
│ └── resolve.md
├── datamol/
│ ├── SKILL.md
│ └── references/
│ ├── conformers_module.md
│ ├── core_api.md
│ ├── descriptors_viz.md
│ ├── fragments_scaffolds.md
│ ├── io_module.md
│ └── reactions_data.md
├── deepchem/
│ ├── SKILL.md
│ ├── references/
│ │ ├── api_reference.md
│ │ └── workflows.md
│ └── scripts/
│ ├── graph_neural_network.py
│ ├── predict_solubility.py
│ └── transfer_learning.py
├── deeptools/
│ ├── SKILL.md
│ ├── assets/
│ │ └── quick_reference.md
│ ├── references/
│ │ ├── effective_genome_sizes.md
│ │ ├── normalization_methods.md
│ │ ├── tools_reference.md
│ │ └── workflows.md
│ └── scripts/
│ ├── validate_files.py
│ └── workflow_generator.py
├── denario/
│ ├── SKILL.md
│ └── references/
│ ├── examples.md
│ ├── installation.md
│ ├── llm_configuration.md
│ └── research_pipeline.md
├── depmap/
│ ├── SKILL.md
│ └── references/
│ └── dependency_analysis.md
├── dhdna-profiler/
│ ├── SKILL.md
│ └── references/
│ └── advanced-profiling.md
├── diffdock/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── batch_template.csv
│ │ └── custom_inference_config.yaml
│ ├── references/
│ │ ├── confidence_and_limitations.md
│ │ ├── parameters_reference.md
│ │ └── workflows_examples.md
│ └── scripts/
│ ├── analyze_results.py
│ ├── prepare_batch_csv.py
│ └── setup_check.py
├── dnanexus-integration/
│ ├── SKILL.md
│ └── references/
│ ├── app-development.md
│ ├── configuration.md
│ ├── data-operations.md
│ ├── job-execution.md
│ └── python-sdk.md
├── docx/
│ ├── LICENSE.txt
│ ├── SKILL.md
│ └── scripts/
│ ├── __init__.py
│ ├── accept_changes.py
│ ├── comment.py
│ ├── office/
│ │ ├── helpers/
│ │ │ ├── __init__.py
│ │ │ ├── merge_runs.py
│ │ │ └── simplify_redlines.py
│ │ ├── pack.py
│ │ ├── schemas/
│ │ │ ├── ISO-IEC29500-4_2016/
│ │ │ │ ├── dml-chart.xsd
│ │ │ │ ├── dml-chartDrawing.xsd
│ │ │ │ ├── dml-diagram.xsd
│ │ │ │ ├── dml-lockedCanvas.xsd
│ │ │ │ ├── dml-main.xsd
│ │ │ │ ├── dml-picture.xsd
│ │ │ │ ├── dml-spreadsheetDrawing.xsd
│ │ │ │ ├── dml-wordprocessingDrawing.xsd
│ │ │ │ ├── pml.xsd
│ │ │ │ ├── shared-additionalCharacteristics.xsd
│ │ │ │ ├── shared-bibliography.xsd
│ │ │ │ ├── shared-commonSimpleTypes.xsd
│ │ │ │ ├── shared-customXmlDataProperties.xsd
│ │ │ │ ├── shared-customXmlSchemaProperties.xsd
│ │ │ │ ├── shared-documentPropertiesCustom.xsd
│ │ │ │ ├── shared-documentPropertiesExtended.xsd
│ │ │ │ ├── shared-documentPropertiesVariantTypes.xsd
│ │ │ │ ├── shared-math.xsd
│ │ │ │ ├── shared-relationshipReference.xsd
│ │ │ │ ├── sml.xsd
│ │ │ │ ├── vml-main.xsd
│ │ │ │ ├── vml-officeDrawing.xsd
│ │ │ │ ├── vml-presentationDrawing.xsd
│ │ │ │ ├── vml-spreadsheetDrawing.xsd
│ │ │ │ ├── vml-wordprocessingDrawing.xsd
│ │ │ │ ├── wml.xsd
│ │ │ │ └── xml.xsd
│ │ │ ├── ecma/
│ │ │ │ └── fouth-edition/
│ │ │ │ ├── opc-contentTypes.xsd
│ │ │ │ ├── opc-coreProperties.xsd
│ │ │ │ ├── opc-digSig.xsd
│ │ │ │ └── opc-relationships.xsd
│ │ │ ├── mce/
│ │ │ │ └── mc.xsd
│ │ │ └── microsoft/
│ │ │ ├── wml-2010.xsd
│ │ │ ├── wml-2012.xsd
│ │ │ ├── wml-2018.xsd
│ │ │ ├── wml-cex-2018.xsd
│ │ │ ├── wml-cid-2016.xsd
│ │ │ ├── wml-sdtdatahash-2020.xsd
│ │ │ └── wml-symex-2015.xsd
│ │ ├── soffice.py
│ │ ├── unpack.py
│ │ ├── validate.py
│ │ └── validators/
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── docx.py
│ │ ├── pptx.py
│ │ └── redlining.py
│ └── templates/
│ ├── comments.xml
│ ├── commentsExtended.xml
│ ├── commentsExtensible.xml
│ ├── commentsIds.xml
│ └── people.xml
├── drugbank-database/
│ ├── SKILL.md
│ ├── references/
│ │ ├── chemical-analysis.md
│ │ ├── data-access.md
│ │ ├── drug-queries.md
│ │ ├── interactions.md
│ │ └── targets-pathways.md
│ └── scripts/
│ └── drugbank_helper.py
├── edgartools/
│ ├── SKILL.md
│ └── references/
│ ├── ai-integration.md
│ ├── companies.md
│ ├── data-objects.md
│ ├── entity-facts.md
│ ├── filings.md
│ ├── financial-data.md
│ └── xbrl.md
├── ena-database/
│ ├── SKILL.md
│ └── references/
│ └── api_reference.md
├── ensembl-database/
│ ├── SKILL.md
│ ├── references/
│ │ └── api_endpoints.md
│ └── scripts/
│ └── ensembl_query.py
├── esm/
│ ├── SKILL.md
│ └── references/
│ ├── esm-c-api.md
│ ├── esm3-api.md
│ ├── forge-api.md
│ └── workflows.md
├── etetoolkit/
│ ├── SKILL.md
│ ├── references/
│ │ ├── api_reference.md
│ │ ├── visualization.md
│ │ └── workflows.md
│ └── scripts/
│ ├── quick_visualize.py
│ └── tree_operations.py
├── exploratory-data-analysis/
│ ├── SKILL.md
│ ├── assets/
│ │ └── report_template.md
│ ├── references/
│ │ ├── bioinformatics_genomics_formats.md
│ │ ├── chemistry_molecular_formats.md
│ │ ├── general_scientific_formats.md
│ │ ├── microscopy_imaging_formats.md
│ │ ├── proteomics_metabolomics_formats.md
│ │ └── spectroscopy_analytical_formats.md
│ └── scripts/
│ └── eda_analyzer.py
├── fda-database/
│ ├── SKILL.md
│ ├── references/
│ │ ├── animal_veterinary.md
│ │ ├── api_basics.md
│ │ ├── devices.md
│ │ ├── drugs.md
│ │ ├── foods.md
│ │ └── other.md
│ └── scripts/
│ ├── fda_examples.py
│ └── fda_query.py
├── flowio/
│ ├── SKILL.md
│ └── references/
│ └── api_reference.md
├── fluidsim/
│ ├── SKILL.md
│ └── references/
│ ├── advanced_features.md
│ ├── installation.md
│ ├── output_analysis.md
│ ├── parameters.md
│ ├── simulation_workflow.md
│ └── solvers.md
├── fred-economic-data/
│ ├── SKILL.md
│ ├── references/
│ │ ├── api_basics.md
│ │ ├── categories.md
│ │ ├── geofred.md
│ │ ├── releases.md
│ │ ├── series.md
│ │ ├── sources.md
│ │ └── tags.md
│ └── scripts/
│ ├── fred_examples.py
│ └── fred_query.py
├── gene-database/
│ ├── SKILL.md
│ ├── references/
│ │ ├── api_reference.md
│ │ └── common_workflows.md
│ └── scripts/
│ ├── batch_gene_lookup.py
│ ├── fetch_gene_data.py
│ └── query_gene.py
├── generate-image/
│ ├── SKILL.md
│ └── scripts/
│ └── generate_image.py
├── geniml/
│ ├── SKILL.md
│ └── references/
│ ├── bedspace.md
│ ├── consensus_peaks.md
│ ├── region2vec.md
│ ├── scembed.md
│ └── utilities.md
├── geo-database/
│ ├── SKILL.md
│ └── references/
│ └── geo_reference.md
├── geomaster/
│ ├── README.md
│ ├── SKILL.md
│ └── references/
│ ├── advanced-gis.md
│ ├── big-data.md
│ ├── code-examples.md
│ ├── coordinate-systems.md
│ ├── core-libraries.md
│ ├── data-sources.md
│ ├── gis-software.md
│ ├── industry-applications.md
│ ├── machine-learning.md
│ ├── programming-languages.md
│ ├── remote-sensing.md
│ ├── scientific-domains.md
│ ├── specialized-topics.md
│ └── troubleshooting.md
├── geopandas/
│ ├── SKILL.md
│ └── references/
│ ├── crs-management.md
│ ├── data-io.md
│ ├── data-structures.md
│ ├── geometric-operations.md
│ ├── spatial-analysis.md
│ └── visualization.md
├── get-available-resources/
│ ├── SKILL.md
│ └── scripts/
│ └── detect_resources.py
├── gget/
│ ├── SKILL.md
│ ├── references/
│ │ ├── database_info.md
│ │ ├── module_reference.md
│ │ └── workflows.md
│ └── scripts/
│ ├── batch_sequence_analysis.py
│ ├── enrichment_pipeline.py
│ └── gene_analysis.py
├── ginkgo-cloud-lab/
│ ├── SKILL.md
│ └── references/
│ ├── cell-free-protein-expression-optimization.md
│ ├── cell-free-protein-expression-validation.md
│ └── fluorescent-pixel-art-generation.md
├── glycoengineering/
│ ├── SKILL.md
│ └── references/
│ └── glycan_databases.md
├── gnomad-database/
│ ├── SKILL.md
│ └── references/
│ ├── graphql_queries.md
│ └── variant_interpretation.md
├── gtars/
│ ├── SKILL.md
│ └── references/
│ ├── cli.md
│ ├── coverage.md
│ ├── overlap.md
│ ├── python-api.md
│ ├── refget.md
│ └── tokenizers.md
├── gtex-database/
│ ├── SKILL.md
│ └── references/
│ └── api_reference.md
├── gwas-database/
│ ├── SKILL.md
│ └── references/
│ └── api_reference.md
├── hedgefundmonitor/
│ ├── SKILL.md
│ └── references/
│ ├── api-overview.md
│ ├── datasets.md
│ ├── endpoints-combined.md
│ ├── endpoints-metadata.md
│ ├── endpoints-series-data.md
│ ├── examples.md
│ └── parameters.md
├── histolab/
│ ├── SKILL.md
│ └── references/
│ ├── filters_preprocessing.md
│ ├── slide_management.md
│ ├── tile_extraction.md
│ ├── tissue_masks.md
│ └── visualization.md
├── hmdb-database/
│ ├── SKILL.md
│ └── references/
│ └── hmdb_data_fields.md
├── hypogenic/
│ ├── SKILL.md
│ └── references/
│ └── config_template.yaml
├── hypothesis-generation/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── FORMATTING_GUIDE.md
│ │ ├── hypothesis_generation.sty
│ │ └── hypothesis_report_template.tex
│ └── references/
│ ├── experimental_design_patterns.md
│ ├── hypothesis_quality_criteria.md
│ └── literature_search_strategies.md
├── imaging-data-commons/
│ ├── SKILL.md
│ └── references/
│ ├── bigquery_guide.md
│ ├── cli_guide.md
│ ├── clinical_data_guide.md
│ ├── cloud_storage_guide.md
│ ├── dicomweb_guide.md
│ ├── digital_pathology_guide.md
│ ├── index_tables_guide.md
│ ├── sql_patterns.md
│ └── use_cases.md
├── infographics/
│ ├── SKILL.md
│ ├── references/
│ │ ├── color_palettes.md
│ │ ├── design_principles.md
│ │ └── infographic_types.md
│ └── scripts/
│ ├── generate_infographic.py
│ └── generate_infographic_ai.py
├── interpro-database/
│ ├── SKILL.md
│ └── references/
│ └── domain_analysis.md
├── iso-13485-certification/
│ ├── SKILL.md
│ ├── assets/
│ │ └── templates/
│ │ ├── procedures/
│ │ │ ├── CAPA-procedure-template.md
│ │ │ └── document-control-procedure-template.md
│ │ └── quality-manual-template.md
│ ├── references/
│ │ ├── gap-analysis-checklist.md
│ │ ├── iso-13485-requirements.md
│ │ ├── mandatory-documents.md
│ │ └── quality-manual-guide.md
│ └── scripts/
│ └── gap_analyzer.py
├── jaspar-database/
│ ├── SKILL.md
│ └── references/
│ └── api_reference.md
├── kegg-database/
│ ├── SKILL.md
│ ├── references/
│ │ └── kegg_reference.md
│ └── scripts/
│ └── kegg_api.py
├── labarchive-integration/
│ ├── SKILL.md
│ ├── references/
│ │ ├── api_reference.md
│ │ ├── authentication_guide.md
│ │ └── integrations.md
│ └── scripts/
│ ├── entry_operations.py
│ ├── notebook_operations.py
│ └── setup_config.py
├── lamindb/
│ ├── SKILL.md
│ └── references/
│ ├── annotation-validation.md
│ ├── core-concepts.md
│ ├── data-management.md
│ ├── integrations.md
│ ├── ontologies.md
│ └── setup-deployment.md
├── latchbio-integration/
│ ├── SKILL.md
│ └── references/
│ ├── data-management.md
│ ├── resource-configuration.md
│ ├── verified-workflows.md
│ └── workflow-creation.md
├── latex-posters/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── baposter_template.tex
│ │ ├── beamerposter_template.tex
│ │ ├── poster_quality_checklist.md
│ │ └── tikzposter_template.tex
│ ├── references/
│ │ ├── README.md
│ │ ├── latex_poster_packages.md
│ │ ├── poster_content_guide.md
│ │ ├── poster_design_principles.md
│ │ └── poster_layout_design.md
│ └── scripts/
│ └── review_poster.sh
├── literature-review/
│ ├── SKILL.md
│ ├── assets/
│ │ └── review_template.md
│ ├── references/
│ │ ├── citation_styles.md
│ │ └── database_strategies.md
│ └── scripts/
│ ├── generate_pdf.py
│ ├── search_databases.py
│ └── verify_citations.py
├── markdown-mermaid-writing/
│ ├── SKILL.md
│ ├── assets/
│ │ └── examples/
│ │ └── example-research-report.md
│ ├── references/
│ │ ├── diagrams/
│ │ │ ├── architecture.md
│ │ │ ├── block.md
│ │ │ ├── c4.md
│ │ │ ├── class.md
│ │ │ ├── complex_examples.md
│ │ │ ├── er.md
│ │ │ ├── flowchart.md
│ │ │ ├── gantt.md
│ │ │ ├── git_graph.md
│ │ │ ├── kanban.md
│ │ │ ├── mindmap.md
│ │ │ ├── packet.md
│ │ │ ├── pie.md
│ │ │ ├── quadrant.md
│ │ │ ├── radar.md
│ │ │ ├── requirement.md
│ │ │ ├── sankey.md
│ │ │ ├── sequence.md
│ │ │ ├── state.md
│ │ │ ├── timeline.md
│ │ │ ├── treemap.md
│ │ │ ├── user_journey.md
│ │ │ ├── xy_chart.md
│ │ │ └── zenuml.md
│ │ ├── markdown_style_guide.md
│ │ └── mermaid_style_guide.md
│ └── templates/
│ ├── decision_record.md
│ ├── how_to_guide.md
│ ├── issue.md
│ ├── kanban.md
│ ├── presentation.md
│ ├── project_documentation.md
│ ├── pull_request.md
│ ├── research_paper.md
│ └── status_report.md
├── market-research-reports/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── FORMATTING_GUIDE.md
│ │ ├── market_report_template.tex
│ │ └── market_research.sty
│ ├── references/
│ │ ├── data_analysis_patterns.md
│ │ ├── report_structure_guide.md
│ │ └── visual_generation_guide.md
│ └── scripts/
│ └── generate_market_visuals.py
├── markitdown/
│ ├── SKILL.md
│ ├── assets/
│ │ └── example_usage.md
│ ├── references/
│ │ ├── api_reference.md
│ │ └── file_formats.md
│ └── scripts/
│ ├── batch_convert.py
│ ├── convert_literature.py
│ └── convert_with_ai.py
├── matchms/
│ ├── SKILL.md
│ └── references/
│ ├── filtering.md
│ ├── importing_exporting.md
│ ├── similarity.md
│ └── workflows.md
├── matlab/
│ ├── SKILL.md
│ └── references/
│ ├── data-import-export.md
│ ├── executing-scripts.md
│ ├── graphics-visualization.md
│ ├── mathematics.md
│ ├── matrices-arrays.md
│ ├── octave-compatibility.md
│ ├── programming.md
│ └── python-integration.md
├── matplotlib/
│ ├── SKILL.md
│ ├── references/
│ │ ├── api_reference.md
│ │ ├── common_issues.md
│ │ ├── plot_types.md
│ │ └── styling_guide.md
│ └── scripts/
│ ├── plot_template.py
│ └── style_configurator.py
├── medchem/
│ ├── SKILL.md
│ ├── references/
│ │ ├── api_guide.md
│ │ └── rules_catalog.md
│ └── scripts/
│ └── filter_molecules.py
├── metabolomics-workbench-database/
│ ├── SKILL.md
│ └── references/
│ └── api_reference.md
├── modal/
│ ├── SKILL.md
│ └── references/
│ ├── api_reference.md
│ ├── examples.md
│ ├── functions.md
│ ├── getting-started.md
│ ├── gpu.md
│ ├── images.md
│ ├── resources.md
│ ├── scaling.md
│ ├── scheduled-jobs.md
│ ├── secrets.md
│ ├── volumes.md
│ └── web-endpoints.md
├── molecular-dynamics/
│ ├── SKILL.md
│ └── references/
│ └── mdanalysis_analysis.md
├── molfeat/
│ ├── SKILL.md
│ └── references/
│ ├── api_reference.md
│ ├── available_featurizers.md
│ └── examples.md
├── monarch-database/
│ ├── SKILL.md
│ └── references/
│ └── phenotype_ontology.md
├── networkx/
│ ├── SKILL.md
│ └── references/
│ ├── algorithms.md
│ ├── generators.md
│ ├── graph-basics.md
│ ├── io.md
│ └── visualization.md
├── neurokit2/
│ ├── SKILL.md
│ └── references/
│ ├── bio_module.md
│ ├── complexity.md
│ ├── ecg_cardiac.md
│ ├── eda.md
│ ├── eeg.md
│ ├── emg.md
│ ├── eog.md
│ ├── epochs_events.md
│ ├── hrv.md
│ ├── ppg.md
│ ├── rsp.md
│ └── signal_processing.md
├── neuropixels-analysis/
│ ├── SKILL.md
│ ├── assets/
│ │ └── analysis_template.py
│ ├── references/
│ │ ├── AI_CURATION.md
│ │ ├── ANALYSIS.md
│ │ ├── AUTOMATED_CURATION.md
│ │ ├── MOTION_CORRECTION.md
│ │ ├── PREPROCESSING.md
│ │ ├── QUALITY_METRICS.md
│ │ ├── SPIKE_SORTING.md
│ │ ├── api_reference.md
│ │ ├── plotting_guide.md
│ │ └── standard_workflow.md
│ └── scripts/
│ ├── compute_metrics.py
│ ├── explore_recording.py
│ ├── export_to_phy.py
│ ├── neuropixels_pipeline.py
│ ├── preprocess_recording.py
│ └── run_sorting.py
├── offer-k-dense-web/
│ └── SKILL.md
├── omero-integration/
│ ├── SKILL.md
│ └── references/
│ ├── advanced.md
│ ├── connection.md
│ ├── data_access.md
│ ├── image_processing.md
│ ├── metadata.md
│ ├── rois.md
│ ├── scripts.md
│ └── tables.md
├── open-notebook/
│ ├── SKILL.md
│ ├── references/
│ │ ├── api_reference.md
│ │ ├── architecture.md
│ │ ├── configuration.md
│ │ └── examples.md
│ └── scripts/
│ ├── chat_interaction.py
│ ├── notebook_management.py
│ ├── source_ingestion.py
│ └── test_open_notebook_skill.py
├── openalex-database/
│ ├── SKILL.md
│ ├── references/
│ │ ├── api_guide.md
│ │ └── common_queries.md
│ └── scripts/
│ ├── openalex_client.py
│ └── query_helpers.py
├── opentargets-database/
│ ├── SKILL.md
│ ├── references/
│ │ ├── api_reference.md
│ │ ├── evidence_types.md
│ │ └── target_annotations.md
│ └── scripts/
│ └── query_opentargets.py
├── opentrons-integration/
│ ├── SKILL.md
│ ├── references/
│ │ └── api_reference.md
│ └── scripts/
│ ├── basic_protocol_template.py
│ ├── pcr_setup_template.py
│ └── serial_dilution_template.py
├── paper-2-web/
│ ├── SKILL.md
│ └── references/
│ ├── installation.md
│ ├── paper2poster.md
│ ├── paper2video.md
│ ├── paper2web.md
│ └── usage_examples.md
├── parallel-web/
│ ├── SKILL.md
│ ├── references/
│ │ ├── api_reference.md
│ │ ├── deep_research_guide.md
│ │ ├── extraction_patterns.md
│ │ ├── search_best_practices.md
│ │ └── workflow_recipes.md
│ └── scripts/
│ └── parallel_web.py
├── pathml/
│ ├── SKILL.md
│ └── references/
│ ├── data_management.md
│ ├── graphs.md
│ ├── image_loading.md
│ ├── machine_learning.md
│ ├── multiparametric.md
│ └── preprocessing.md
├── pdb-database/
│ ├── SKILL.md
│ └── references/
│ └── api_reference.md
├── pdf/
│ ├── LICENSE.txt
│ ├── SKILL.md
│ ├── forms.md
│ ├── reference.md
│ └── scripts/
│ ├── check_bounding_boxes.py
│ ├── check_fillable_fields.py
│ ├── convert_pdf_to_images.py
│ ├── create_validation_image.py
│ ├── extract_form_field_info.py
│ ├── extract_form_structure.py
│ ├── fill_fillable_fields.py
│ └── fill_pdf_form_with_annotations.py
├── peer-review/
│ ├── SKILL.md
│ └── references/
│ ├── common_issues.md
│ └── reporting_standards.md
├── pennylane/
│ ├── SKILL.md
│ └── references/
│ ├── advanced_features.md
│ ├── devices_backends.md
│ ├── getting_started.md
│ ├── optimization.md
│ ├── quantum_chemistry.md
│ ├── quantum_circuits.md
│ └── quantum_ml.md
├── perplexity-search/
│ ├── SKILL.md
│ ├── references/
│ │ ├── model_comparison.md
│ │ ├── openrouter_setup.md
│ │ └── search_strategies.md
│ └── scripts/
│ ├── perplexity_search.py
│ └── setup_env.py
├── phylogenetics/
│ ├── SKILL.md
│ ├── references/
│ │ └── iqtree_inference.md
│ └── scripts/
│ └── phylogenetic_analysis.py
├── plotly/
│ ├── SKILL.md
│ └── references/
│ ├── chart-types.md
│ ├── export-interactivity.md
│ ├── graph-objects.md
│ ├── layouts-styling.md
│ └── plotly-express.md
├── polars/
│ ├── SKILL.md
│ └── references/
│ ├── best_practices.md
│ ├── core_concepts.md
│ ├── io_guide.md
│ ├── operations.md
│ ├── pandas_migration.md
│ └── transformations.md
├── polars-bio/
│ ├── SKILL.md
│ └── references/
│ ├── bioframe_migration.md
│ ├── configuration.md
│ ├── file_io.md
│ ├── interval_operations.md
│ ├── pileup_operations.md
│ └── sql_processing.md
├── pptx/
│ ├── LICENSE.txt
│ ├── SKILL.md
│ ├── editing.md
│ ├── pptxgenjs.md
│ └── scripts/
│ ├── __init__.py
│ ├── add_slide.py
│ ├── clean.py
│ ├── office/
│ │ ├── helpers/
│ │ │ ├── __init__.py
│ │ │ ├── merge_runs.py
│ │ │ └── simplify_redlines.py
│ │ ├── pack.py
│ │ ├── schemas/
│ │ │ ├── ISO-IEC29500-4_2016/
│ │ │ │ ├── dml-chart.xsd
│ │ │ │ ├── dml-chartDrawing.xsd
│ │ │ │ ├── dml-diagram.xsd
│ │ │ │ ├── dml-lockedCanvas.xsd
│ │ │ │ ├── dml-main.xsd
│ │ │ │ ├── dml-picture.xsd
│ │ │ │ ├── dml-spreadsheetDrawing.xsd
│ │ │ │ ├── dml-wordprocessingDrawing.xsd
│ │ │ │ ├── pml.xsd
│ │ │ │ ├── shared-additionalCharacteristics.xsd
│ │ │ │ ├── shared-bibliography.xsd
│ │ │ │ ├── shared-commonSimpleTypes.xsd
│ │ │ │ ├── shared-customXmlDataProperties.xsd
│ │ │ │ ├── shared-customXmlSchemaProperties.xsd
│ │ │ │ ├── shared-documentPropertiesCustom.xsd
│ │ │ │ ├── shared-documentPropertiesExtended.xsd
│ │ │ │ ├── shared-documentPropertiesVariantTypes.xsd
│ │ │ │ ├── shared-math.xsd
│ │ │ │ ├── shared-relationshipReference.xsd
│ │ │ │ ├── sml.xsd
│ │ │ │ ├── vml-main.xsd
│ │ │ │ ├── vml-officeDrawing.xsd
│ │ │ │ ├── vml-presentationDrawing.xsd
│ │ │ │ ├── vml-spreadsheetDrawing.xsd
│ │ │ │ ├── vml-wordprocessingDrawing.xsd
│ │ │ │ ├── wml.xsd
│ │ │ │ └── xml.xsd
│ │ │ ├── ecma/
│ │ │ │ └── fouth-edition/
│ │ │ │ ├── opc-contentTypes.xsd
│ │ │ │ ├── opc-coreProperties.xsd
│ │ │ │ ├── opc-digSig.xsd
│ │ │ │ └── opc-relationships.xsd
│ │ │ ├── mce/
│ │ │ │ └── mc.xsd
│ │ │ └── microsoft/
│ │ │ ├── wml-2010.xsd
│ │ │ ├── wml-2012.xsd
│ │ │ ├── wml-2018.xsd
│ │ │ ├── wml-cex-2018.xsd
│ │ │ ├── wml-cid-2016.xsd
│ │ │ ├── wml-sdtdatahash-2020.xsd
│ │ │ └── wml-symex-2015.xsd
│ │ ├── soffice.py
│ │ ├── unpack.py
│ │ ├── validate.py
│ │ └── validators/
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── docx.py
│ │ ├── pptx.py
│ │ └── redlining.py
│ └── thumbnail.py
├── pptx-posters/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── poster_html_template.html
│ │ └── poster_quality_checklist.md
│ └── references/
│ ├── poster_content_guide.md
│ ├── poster_design_principles.md
│ └── poster_layout_design.md
├── primekg/
│ ├── SKILL.md
│ └── scripts/
│ └── query_primekg.py
├── protocolsio-integration/
│ ├── SKILL.md
│ └── references/
│ ├── additional_features.md
│ ├── authentication.md
│ ├── discussions.md
│ ├── file_manager.md
│ ├── protocols_api.md
│ └── workspaces.md
├── pubchem-database/
│ ├── SKILL.md
│ ├── references/
│ │ └── api_reference.md
│ └── scripts/
│ ├── bioactivity_query.py
│ └── compound_search.py
├── pubmed-database/
│ ├── SKILL.md
│ └── references/
│ ├── api_reference.md
│ ├── common_queries.md
│ └── search_syntax.md
├── pufferlib/
│ ├── SKILL.md
│ ├── references/
│ │ ├── environments.md
│ │ ├── integration.md
│ │ ├── policies.md
│ │ ├── training.md
│ │ └── vectorization.md
│ └── scripts/
│ ├── env_template.py
│ └── train_template.py
├── pydeseq2/
│ ├── SKILL.md
│ ├── references/
│ │ ├── api_reference.md
│ │ └── workflow_guide.md
│ └── scripts/
│ └── run_deseq2_analysis.py
├── pydicom/
│ ├── SKILL.md
│ ├── references/
│ │ ├── common_tags.md
│ │ └── transfer_syntaxes.md
│ └── scripts/
│ ├── anonymize_dicom.py
│ ├── dicom_to_image.py
│ └── extract_metadata.py
├── pyhealth/
│ ├── SKILL.md
│ └── references/
│ ├── datasets.md
│ ├── medical_coding.md
│ ├── models.md
│ ├── preprocessing.md
│ ├── tasks.md
│ └── training_evaluation.md
├── pylabrobot/
│ ├── SKILL.md
│ └── references/
│ ├── analytical-equipment.md
│ ├── hardware-backends.md
│ ├── liquid-handling.md
│ ├── material-handling.md
│ ├── resources.md
│ └── visualization.md
├── pymatgen/
│ ├── SKILL.md
│ ├── references/
│ │ ├── analysis_modules.md
│ │ ├── core_classes.md
│ │ ├── io_formats.md
│ │ ├── materials_project_api.md
│ │ └── transformations_workflows.md
│ └── scripts/
│ ├── phase_diagram_generator.py
│ ├── structure_analyzer.py
│ └── structure_converter.py
├── pymc/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── hierarchical_model_template.py
│ │ └── linear_regression_template.py
│ ├── references/
│ │ ├── distributions.md
│ │ ├── sampling_inference.md
│ │ └── workflows.md
│ └── scripts/
│ ├── model_comparison.py
│ └── model_diagnostics.py
├── pymoo/
│ ├── SKILL.md
│ ├── references/
│ │ ├── algorithms.md
│ │ ├── constraints_mcdm.md
│ │ ├── operators.md
│ │ ├── problems.md
│ │ └── visualization.md
│ └── scripts/
│ ├── custom_problem_example.py
│ ├── decision_making_example.py
│ ├── many_objective_example.py
│ ├── multi_objective_example.py
│ └── single_objective_example.py
├── pyopenms/
│ ├── SKILL.md
│ └── references/
│ ├── data_structures.md
│ ├── feature_detection.md
│ ├── file_io.md
│ ├── identification.md
│ ├── metabolomics.md
│ └── signal_processing.md
├── pysam/
│ ├── SKILL.md
│ └── references/
│ ├── alignment_files.md
│ ├── common_workflows.md
│ ├── sequence_files.md
│ └── variant_files.md
├── pytdc/
│ ├── SKILL.md
│ ├── references/
│ │ ├── datasets.md
│ │ ├── oracles.md
│ │ └── utilities.md
│ └── scripts/
│ ├── benchmark_evaluation.py
│ ├── load_and_split_data.py
│ └── molecular_generation.py
├── pytorch-lightning/
│ ├── SKILL.md
│ ├── references/
│ │ ├── best_practices.md
│ │ ├── callbacks.md
│ │ ├── data_module.md
│ │ ├── distributed_training.md
│ │ ├── lightning_module.md
│ │ ├── logging.md
│ │ └── trainer.md
│ └── scripts/
│ ├── quick_trainer_setup.py
│ ├── template_datamodule.py
│ └── template_lightning_module.py
├── pyzotero/
│ ├── SKILL.md
│ └── references/
│ ├── authentication.md
│ ├── cli.md
│ ├── collections.md
│ ├── error-handling.md
│ ├── exports.md
│ ├── files-attachments.md
│ ├── full-text.md
│ ├── pagination.md
│ ├── read-api.md
│ ├── saved-searches.md
│ ├── search-params.md
│ ├── tags.md
│ └── write-api.md
├── qiskit/
│ ├── SKILL.md
│ └── references/
│ ├── algorithms.md
│ ├── backends.md
│ ├── circuits.md
│ ├── patterns.md
│ ├── primitives.md
│ ├── setup.md
│ ├── transpilation.md
│ └── visualization.md
├── qutip/
│ ├── SKILL.md
│ └── references/
│ ├── advanced.md
│ ├── analysis.md
│ ├── core_concepts.md
│ ├── time_evolution.md
│ └── visualization.md
├── rdkit/
│ ├── SKILL.md
│ ├── references/
│ │ ├── api_reference.md
│ │ ├── descriptors_reference.md
│ │ └── smarts_patterns.md
│ └── scripts/
│ ├── molecular_properties.py
│ ├── similarity_search.py
│ └── substructure_filter.py
├── reactome-database/
│ ├── SKILL.md
│ ├── references/
│ │ └── api_reference.md
│ └── scripts/
│ └── reactome_query.py
├── research-grants/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── budget_justification_template.md
│ │ ├── nih_specific_aims_template.md
│ │ └── nsf_project_summary_template.md
│ └── references/
│ ├── README.md
│ ├── broader_impacts.md
│ ├── darpa_guidelines.md
│ ├── doe_guidelines.md
│ ├── nih_guidelines.md
│ ├── nsf_guidelines.md
│ ├── nstc_guidelines.md
│ └── specific_aims_guide.md
├── research-lookup/
│ ├── README.md
│ ├── SKILL.md
│ ├── examples.py
│ ├── lookup.py
│ ├── research_lookup.py
│ └── scripts/
│ └── research_lookup.py
├── rowan/
│ ├── SKILL.md
│ └── references/
│ ├── api_reference.md
│ ├── molecule_handling.md
│ ├── proteins_and_organization.md
│ ├── rdkit_native.md
│ ├── results_interpretation.md
│ └── workflow_types.md
├── scanpy/
│ ├── SKILL.md
│ ├── assets/
│ │ └── analysis_template.py
│ ├── references/
│ │ ├── api_reference.md
│ │ ├── plotting_guide.md
│ │ └── standard_workflow.md
│ └── scripts/
│ └── qc_analysis.py
├── scholar-evaluation/
│ ├── SKILL.md
│ ├── references/
│ │ └── evaluation_framework.md
│ └── scripts/
│ └── calculate_scores.py
├── scientific-brainstorming/
│ ├── SKILL.md
│ └── references/
│ └── brainstorming_methods.md
├── scientific-critical-thinking/
│ ├── SKILL.md
│ └── references/
│ ├── common_biases.md
│ ├── evidence_hierarchy.md
│ ├── experimental_design.md
│ ├── logical_fallacies.md
│ ├── scientific_method.md
│ └── statistical_pitfalls.md
├── scientific-schematics/
│ ├── SKILL.md
│ ├── references/
│ │ ├── QUICK_REFERENCE.md
│ │ ├── README.md
│ │ └── best_practices.md
│ └── scripts/
│ ├── example_usage.sh
│ ├── generate_schematic.py
│ └── generate_schematic_ai.py
├── scientific-slides/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── beamer_template_conference.tex
│ │ ├── beamer_template_defense.tex
│ │ ├── beamer_template_seminar.tex
│ │ ├── powerpoint_design_guide.md
│ │ └── timing_guidelines.md
│ ├── references/
│ │ ├── beamer_guide.md
│ │ ├── data_visualization_slides.md
│ │ ├── presentation_structure.md
│ │ ├── slide_design_principles.md
│ │ ├── talk_types_guide.md
│ │ └── visual_review_workflow.md
│ └── scripts/
│ ├── generate_slide_image.py
│ ├── generate_slide_image_ai.py
│ ├── pdf_to_images.py
│ ├── slides_to_pdf.py
│ └── validate_presentation.py
├── scientific-visualization/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── color_palettes.py
│ │ ├── nature.mplstyle
│ │ ├── presentation.mplstyle
│ │ └── publication.mplstyle
│ ├── references/
│ │ ├── color_palettes.md
│ │ ├── journal_requirements.md
│ │ ├── matplotlib_examples.md
│ │ └── publication_guidelines.md
│ └── scripts/
│ ├── figure_export.py
│ └── style_presets.py
├── scientific-writing/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── REPORT_FORMATTING_GUIDE.md
│ │ ├── scientific_report.sty
│ │ └── scientific_report_template.tex
│ └── references/
│ ├── citation_styles.md
│ ├── figures_tables.md
│ ├── imrad_structure.md
│ ├── professional_report_formatting.md
│ ├── reporting_guidelines.md
│ └── writing_principles.md
├── scikit-bio/
│ ├── SKILL.md
│ └── references/
│ └── api_reference.md
├── scikit-learn/
│ ├── SKILL.md
│ ├── references/
│ │ ├── model_evaluation.md
│ │ ├── pipelines_and_composition.md
│ │ ├── preprocessing.md
│ │ ├── quick_reference.md
│ │ ├── supervised_learning.md
│ │ └── unsupervised_learning.md
│ └── scripts/
│ ├── classification_pipeline.py
│ └── clustering_analysis.py
├── scikit-survival/
│ ├── SKILL.md
│ └── references/
│ ├── competing-risks.md
│ ├── cox-models.md
│ ├── data-handling.md
│ ├── ensemble-models.md
│ ├── evaluation-metrics.md
│ └── svm-models.md
├── scvelo/
│ ├── SKILL.md
│ ├── references/
│ │ └── velocity_models.md
│ └── scripts/
│ └── rna_velocity_workflow.py
├── scvi-tools/
│ ├── SKILL.md
│ └── references/
│ ├── differential-expression.md
│ ├── models-atac-seq.md
│ ├── models-multimodal.md
│ ├── models-scrna-seq.md
│ ├── models-spatial.md
│ ├── models-specialized.md
│ ├── theoretical-foundations.md
│ └── workflows.md
├── seaborn/
│ ├── SKILL.md
│ └── references/
│ ├── examples.md
│ ├── function_reference.md
│ └── objects_interface.md
├── shap/
│ ├── SKILL.md
│ └── references/
│ ├── explainers.md
│ ├── plots.md
│ ├── theory.md
│ └── workflows.md
├── simpy/
│ ├── SKILL.md
│ ├── references/
│ │ ├── events.md
│ │ ├── monitoring.md
│ │ ├── process-interaction.md
│ │ ├── real-time.md
│ │ └── resources.md
│ └── scripts/
│ ├── basic_simulation_template.py
│ └── resource_monitor.py
├── stable-baselines3/
│ ├── SKILL.md
│ ├── references/
│ │ ├── algorithms.md
│ │ ├── callbacks.md
│ │ ├── custom_environments.md
│ │ └── vectorized_envs.md
│ └── scripts/
│ ├── custom_env_template.py
│ ├── evaluate_agent.py
│ └── train_rl_agent.py
├── statistical-analysis/
│ ├── SKILL.md
│ ├── references/
│ │ ├── assumptions_and_diagnostics.md
│ │ ├── bayesian_statistics.md
│ │ ├── effect_sizes_and_power.md
│ │ ├── reporting_standards.md
│ │ └── test_selection_guide.md
│ └── scripts/
│ └── assumption_checks.py
├── statsmodels/
│ ├── SKILL.md
│ └── references/
│ ├── discrete_choice.md
│ ├── glm.md
│ ├── linear_models.md
│ ├── stats_diagnostics.md
│ └── time_series.md
├── string-database/
│ ├── SKILL.md
│ ├── references/
│ │ └── string_reference.md
│ └── scripts/
│ └── string_api.py
├── sympy/
│ ├── SKILL.md
│ └── references/
│ ├── advanced-topics.md
│ ├── code-generation-printing.md
│ ├── core-capabilities.md
│ ├── matrices-linear-algebra.md
│ └── physics-mechanics.md
├── tiledbvcf/
│ └── SKILL.md
├── timesfm-forecasting/
│ ├── SKILL.md
│ ├── examples/
│ │ ├── anomaly-detection/
│ │ │ ├── detect_anomalies.py
│ │ │ └── output/
│ │ │ └── anomaly_detection.json
│ │ ├── covariates-forecasting/
│ │ │ ├── demo_covariates.py
│ │ │ └── output/
│ │ │ ├── covariates_metadata.json
│ │ │ └── sales_with_covariates.csv
│ │ └── global-temperature/
│ │ ├── README.md
│ │ ├── generate_animation_data.py
│ │ ├── generate_gif.py
│ │ ├── generate_html.py
│ │ ├── output/
│ │ │ ├── animation_data.json
│ │ │ ├── forecast_output.csv
│ │ │ ├── forecast_output.json
│ │ │ └── interactive_forecast.html
│ │ ├── run_example.sh
│ │ ├── run_forecast.py
│ │ ├── temperature_anomaly.csv
│ │ └── visualize_forecast.py
│ ├── references/
│ │ ├── api_reference.md
│ │ ├── data_preparation.md
│ │ └── system_requirements.md
│ └── scripts/
│ ├── check_system.py
│ └── forecast_csv.py
├── torch-geometric/
│ ├── SKILL.md
│ ├── references/
│ │ ├── datasets_reference.md
│ │ ├── layers_reference.md
│ │ └── transforms_reference.md
│ └── scripts/
│ ├── benchmark_model.py
│ ├── create_gnn_template.py
│ └── visualize_graph.py
├── torchdrug/
│ ├── SKILL.md
│ └── references/
│ ├── core_concepts.md
│ ├── datasets.md
│ ├── knowledge_graphs.md
│ ├── models_architectures.md
│ ├── molecular_generation.md
│ ├── molecular_property_prediction.md
│ ├── protein_modeling.md
│ └── retrosynthesis.md
├── transformers/
│ ├── SKILL.md
│ └── references/
│ ├── generation.md
│ ├── models.md
│ ├── pipelines.md
│ ├── tokenizers.md
│ └── training.md
├── treatment-plans/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── STYLING_QUICK_REFERENCE.md
│ │ ├── chronic_disease_management_plan.tex
│ │ ├── general_medical_treatment_plan.tex
│ │ ├── medical_treatment_plan.sty
│ │ ├── mental_health_treatment_plan.tex
│ │ ├── one_page_treatment_plan.tex
│ │ ├── pain_management_plan.tex
│ │ ├── perioperative_care_plan.tex
│ │ ├── quality_checklist.md
│ │ └── rehabilitation_treatment_plan.tex
│ ├── references/
│ │ ├── README.md
│ │ ├── goal_setting_frameworks.md
│ │ ├── intervention_guidelines.md
│ │ ├── regulatory_compliance.md
│ │ ├── specialty_specific_guidelines.md
│ │ └── treatment_plan_standards.md
│ └── scripts/
│ ├── check_completeness.py
│ ├── generate_template.py
│ ├── timeline_generator.py
│ └── validate_treatment_plan.py
├── umap-learn/
│ ├── SKILL.md
│ └── references/
│ └── api_reference.md
├── uniprot-database/
│ ├── SKILL.md
│ ├── references/
│ │ ├── api_examples.md
│ │ ├── api_fields.md
│ │ ├── id_mapping_databases.md
│ │ └── query_syntax.md
│ └── scripts/
│ └── uniprot_client.py
├── usfiscaldata/
│ ├── SKILL.md
│ └── references/
│ ├── api-basics.md
│ ├── datasets-debt.md
│ ├── datasets-fiscal.md
│ ├── datasets-interest-rates.md
│ ├── datasets-securities.md
│ ├── examples.md
│ ├── parameters.md
│ └── response-format.md
├── uspto-database/
│ ├── SKILL.md
│ ├── references/
│ │ ├── additional_apis.md
│ │ ├── patentsearch_api.md
│ │ ├── peds_api.md
│ │ └── trademark_api.md
│ └── scripts/
│ ├── patent_search.py
│ ├── peds_client.py
│ └── trademark_client.py
├── vaex/
│ ├── SKILL.md
│ └── references/
│ ├── core_dataframes.md
│ ├── data_processing.md
│ ├── io_operations.md
│ ├── machine_learning.md
│ ├── performance.md
│ └── visualization.md
├── venue-templates/
│ ├── SKILL.md
│ ├── assets/
│ │ ├── examples/
│ │ │ ├── cell_summary_example.md
│ │ │ ├── medical_structured_abstract.md
│ │ │ ├── nature_abstract_examples.md
│ │ │ └── neurips_introduction_example.md
│ │ ├── grants/
│ │ │ ├── nih_specific_aims.tex
│ │ │ └── nsf_proposal_template.tex
│ │ ├── journals/
│ │ │ ├── nature_article.tex
│ │ │ ├── neurips_article.tex
│ │ │ └── plos_one.tex
│ │ └── posters/
│ │ └── beamerposter_academic.tex
│ ├── references/
│ │ ├── cell_press_style.md
│ │ ├── conferences_formatting.md
│ │ ├── cs_conference_style.md
│ │ ├── grants_requirements.md
│ │ ├── journals_formatting.md
│ │ ├── medical_journal_styles.md
│ │ ├── ml_conference_style.md
│ │ ├── nature_science_style.md
│ │ ├── posters_guidelines.md
│ │ ├── reviewer_expectations.md
│ │ └── venue_writing_styles.md
│ └── scripts/
│ ├── customize_template.py
│ ├── query_template.py
│ └── validate_format.py
├── what-if-oracle/
│ ├── SKILL.md
│ └── references/
│ └── scenario-templates.md
├── xlsx/
│ ├── LICENSE.txt
│ ├── SKILL.md
│ └── scripts/
│ ├── office/
│ │ ├── helpers/
│ │ │ ├── __init__.py
│ │ │ ├── merge_runs.py
│ │ │ └── simplify_redlines.py
│ │ ├── pack.py
│ │ ├── schemas/
│ │ │ ├── ISO-IEC29500-4_2016/
│ │ │ │ ├── dml-chart.xsd
│ │ │ │ ├── dml-chartDrawing.xsd
│ │ │ │ ├── dml-diagram.xsd
│ │ │ │ ├── dml-lockedCanvas.xsd
│ │ │ │ ├── dml-main.xsd
│ │ │ │ ├── dml-picture.xsd
│ │ │ │ ├── dml-spreadsheetDrawing.xsd
│ │ │ │ ├── dml-wordprocessingDrawing.xsd
│ │ │ │ ├── pml.xsd
│ │ │ │ ├── shared-additionalCharacteristics.xsd
│ │ │ │ ├── shared-bibliography.xsd
│ │ │ │ ├── shared-commonSimpleTypes.xsd
│ │ │ │ ├── shared-customXmlDataProperties.xsd
│ │ │ │ ├── shared-customXmlSchemaProperties.xsd
│ │ │ │ ├── shared-documentPropertiesCustom.xsd
│ │ │ │ ├── shared-documentPropertiesExtended.xsd
│ │ │ │ ├── shared-documentPropertiesVariantTypes.xsd
│ │ │ │ ├── shared-math.xsd
│ │ │ │ ├── shared-relationshipReference.xsd
│ │ │ │ ├── sml.xsd
│ │ │ │ ├── vml-main.xsd
│ │ │ │ ├── vml-officeDrawing.xsd
│ │ │ │ ├── vml-presentationDrawing.xsd
│ │ │ │ ├── vml-spreadsheetDrawing.xsd
│ │ │ │ ├── vml-wordprocessingDrawing.xsd
│ │ │ │ ├── wml.xsd
│ │ │ │ └── xml.xsd
│ │ │ ├── ecma/
│ │ │ │ └── fouth-edition/
│ │ │ │ ├── opc-contentTypes.xsd
│ │ │ │ ├── opc-coreProperties.xsd
│ │ │ │ ├── opc-digSig.xsd
│ │ │ │ └── opc-relationships.xsd
│ │ │ ├── mce/
│ │ │ │ └── mc.xsd
│ │ │ └── microsoft/
│ │ │ ├── wml-2010.xsd
│ │ │ ├── wml-2012.xsd
│ │ │ ├── wml-2018.xsd
│ │ │ ├── wml-cex-2018.xsd
│ │ │ ├── wml-cid-2016.xsd
│ │ │ ├── wml-sdtdatahash-2020.xsd
│ │ │ └── wml-symex-2015.xsd
│ │ ├── soffice.py
│ │ ├── unpack.py
│ │ ├── validate.py
│ │ └── validators/
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── docx.py
│ │ ├── pptx.py
│ │ └── redlining.py
│ └── recalc.py
├── zarr-python/
│ ├── SKILL.md
│ └── references/
│ └── api_reference.md
└── zinc-database/
├── SKILL.md
└── references/
└── api_reference.md
SYMBOL INDEX (1486 symbols across 201 files)
FILE: scientific-skills/arboreto/scripts/basic_grn_inference.py
function run_grn_inference (line 24) | def run_grn_inference(expression_file, output_file, tf_file=None, seed=7...
FILE: scientific-skills/arxiv-database/scripts/arxiv_search.py
class ArxivSearcher (line 20) | class ArxivSearcher:
method __init__ (line 31) | def __init__(self, verbose: bool = False, delay: float = 3.0):
method _log (line 40) | def _log(self, message: str):
method _rate_limit (line 44) | def _rate_limit(self):
method _parse_entry (line 53) | def _parse_entry(self, entry: ET.Element) -> Dict:
method _fetch (line 112) | def _fetch(self, params: Dict) -> List[Dict]:
method search (line 138) | def search(
method get_by_ids (line 176) | def get_by_ids(self, arxiv_ids: List[str]) -> List[Dict]:
method download_pdf (line 200) | def download_pdf(self, arxiv_id: str, output_path: str) -> bool:
method build_query (line 239) | def build_query(
function main (line 274) | def main():
FILE: scientific-skills/biorxiv-database/scripts/biorxiv_search.py
class BioRxivSearcher (line 20) | class BioRxivSearcher:
method __init__ (line 37) | def __init__(self, verbose: bool = False):
method _log (line 45) | def _log(self, message: str):
method _make_request (line 50) | def _make_request(self, endpoint: str, params: Optional[Dict] = None) ...
method search_by_date_range (line 67) | def search_by_date_range(
method search_by_interval (line 99) | def search_by_interval(
method get_paper_details (line 119) | def get_paper_details(self, doi: str) -> Dict:
method search_by_author (line 143) | def search_by_author(
method search_by_keywords (line 182) | def search_by_keywords(
method download_pdf (line 231) | def download_pdf(self, doi: str, output_path: str) -> bool:
method format_result (line 264) | def format_result(self, paper: Dict, include_abstract: bool = True) ->...
function main (line 301) | def main():
FILE: scientific-skills/bioservices/scripts/batch_id_converter.py
function normalize_database_code (line 63) | def normalize_database_code(code):
function read_ids_from_file (line 78) | def read_ids_from_file(filename):
function batch_convert (line 94) | def batch_convert(ids, from_db, to_db, chunk_size=100, delay=0.5):
function save_mapping_csv (line 164) | def save_mapping_csv(mapping, output_file, from_db, to_db):
function save_failed_ids (line 188) | def save_failed_ids(failed_ids, output_file):
function print_mapping_summary (line 202) | def print_mapping_summary(mapping, from_db, to_db):
function list_common_databases (line 241) | def list_common_databases():
function main (line 257) | def main():
FILE: scientific-skills/bioservices/scripts/compound_cross_reference.py
function search_kegg_compound (line 26) | def search_kegg_compound(compound_name):
function get_kegg_info (line 66) | def get_kegg_info(kegg, kegg_id):
function get_chembl_id (line 147) | def get_chembl_id(kegg_id):
function get_chebi_info (line 172) | def get_chebi_info(chebi_id):
function get_chembl_info (line 222) | def get_chembl_info(chembl_id):
function save_results (line 278) | def save_results(compound_name, kegg_info, chembl_id, output_file):
function main (line 315) | def main():
FILE: scientific-skills/bioservices/scripts/pathway_analysis.py
function get_all_pathways (line 34) | def get_all_pathways(kegg, organism):
function analyze_pathway (line 46) | def analyze_pathway(kegg, pathway_id):
function analyze_all_pathways (line 89) | def analyze_all_pathways(kegg, pathway_ids, limit=None):
function save_pathway_summary (line 110) | def save_pathway_summary(results, output_file):
function save_interactions_sif (line 150) | def save_interactions_sif(results, output_file):
function save_detailed_pathway_info (line 169) | def save_detailed_pathway_info(results, output_dir):
function print_statistics (line 195) | def print_statistics(results):
function main (line 237) | def main():
FILE: scientific-skills/bioservices/scripts/protein_analysis_workflow.py
function search_protein (line 29) | def search_protein(query):
function retrieve_sequence (line 86) | def retrieve_sequence(uniprot, uniprot_id):
function run_blast (line 116) | def run_blast(sequence, email, skip=False):
function discover_pathways (line 186) | def discover_pathways(uniprot, kegg, uniprot_id):
function find_interactions (line 243) | def find_interactions(protein_query):
function get_go_annotations (line 289) | def get_go_annotations(uniprot_id):
function main (line 346) | def main():
FILE: scientific-skills/brenda-database/scripts/brenda_queries.py
function validate_dependencies (line 67) | def validate_dependencies():
function parse_km_entry (line 80) | def parse_km_entry(entry: str) -> Dict[str, Any]:
function parse_reaction_entry (line 115) | def parse_reaction_entry(entry: str) -> Dict[str, Any]:
function extract_organism_data (line 147) | def extract_organism_data(entry: str) -> Dict[str, Any]:
function search_enzymes_by_substrate (line 167) | def search_enzymes_by_substrate(substrate: str, limit: int = 50) -> List...
function search_enzymes_by_product (line 204) | def search_enzymes_by_product(product: str, limit: int = 50) -> List[Dic...
function compare_across_organisms (line 236) | def compare_across_organisms(ec_number: str, organisms: List[str]) -> Li...
function get_organisms_for_enzyme (line 295) | def get_organisms_for_enzyme(ec_number: str) -> List[str]:
function get_environmental_parameters (line 316) | def get_environmental_parameters(ec_number: str) -> Dict[str, Any]:
function get_cofactor_requirements (line 369) | def get_cofactor_requirements(ec_number: str) -> List[Dict[str, Any]]:
function get_substrate_specificity (line 419) | def get_substrate_specificity(ec_number: str) -> List[Dict[str, Any]]:
function compare_substrate_affinity (line 472) | def compare_substrate_affinity(ec_number: str) -> List[Dict[str, Any]]:
function get_inhibitors (line 477) | def get_inhibitors(ec_number: str) -> List[Dict[str, Any]]:
function get_activators (line 528) | def get_activators(ec_number: str) -> List[Dict[str, Any]]:
function find_thermophilic_homologs (line 580) | def find_thermophilic_homologs(ec_number: str, min_temp: int = 50) -> Li...
function find_ph_stable_variants (line 623) | def find_ph_stable_variants(ec_number: str, min_ph: float = 8.0, max_ph:...
function get_modeling_parameters (line 668) | def get_modeling_parameters(ec_number: str, substrate: str = None) -> Di...
function export_kinetic_data (line 732) | def export_kinetic_data(ec_number: str, format: str = 'csv', filename: s...
function search_by_pattern (line 784) | def search_by_pattern(pattern: str, limit: int = 50) -> List[Dict[str, A...
FILE: scientific-skills/brenda-database/scripts/brenda_visualization.py
function validate_dependencies (line 58) | def validate_dependencies():
function plot_kinetic_parameters (line 69) | def plot_kinetic_parameters(ec_number: str, save_path: str = None, show_...
function plot_organism_comparison (line 159) | def plot_organism_comparison(ec_number: str, organisms: List[str], save_...
function plot_pH_profiles (line 235) | def plot_pH_profiles(ec_number: str, save_path: str = None, show_plot: b...
function plot_temperature_profiles (line 311) | def plot_temperature_profiles(ec_number: str, save_path: str = None, sho...
function plot_substrate_specificity (line 393) | def plot_substrate_specificity(ec_number: str, save_path: str = None, sh...
function plot_michaelis_menten (line 471) | def plot_michaelis_menten(ec_number: str, substrate: str = None, save_pa...
function create_heatmap_data (line 581) | def create_heatmap_data(ec_number: str, parameters: List[str] = None) ->...
function plot_heatmap (line 618) | def plot_heatmap(ec_number: str, save_path: str = None, show_plot: bool ...
function generate_summary_plots (line 681) | def generate_summary_plots(ec_number: str, save_dir: str = None) -> List...
FILE: scientific-skills/brenda-database/scripts/enzyme_pathway_builder.py
function validate_dependencies (line 66) | def validate_dependencies():
function identify_metabolite (line 180) | def identify_metabolite(metabolite_name: str) -> Dict[str, Any]:
function infer_transformation_type (line 232) | def infer_transformation_type(substrate: str, product: str) -> List[str]:
function find_enzymes_for_transformation (line 274) | def find_enzymes_for_transformation(substrate: str, product: str, limit:...
function find_pathway_for_product (line 348) | def find_pathway_for_product(product: str, max_steps: int = 3, starting_...
function build_retrosynthetic_tree (line 450) | def build_retrosynthetic_tree(target: str, depth: int = 2) -> Dict[str, ...
function suggest_enzyme_substitutions (line 533) | def suggest_enzyme_substitutions(ec_number: str, criteria: Dict[str, Any...
function calculate_pathway_feasibility (line 596) | def calculate_pathway_feasibility(pathway: Dict[str, Any]) -> Dict[str, ...
function optimize_pathway_conditions (line 696) | def optimize_pathway_conditions(pathway: Dict[str, Any]) -> Dict[str, Any]:
function generate_pathway_report (line 805) | def generate_pathway_report(pathway: Dict[str, Any], filename: str = Non...
function visualize_pathway (line 932) | def visualize_pathway(pathway: Dict[str, Any], save_path: str = None) ->...
FILE: scientific-skills/chembl-database/scripts/example_queries.py
function get_molecule_info (line 16) | def get_molecule_info(chembl_id):
function search_molecules_by_name (line 30) | def search_molecules_by_name(name_pattern):
function find_molecules_by_properties (line 45) | def find_molecules_by_properties(max_mw=500, min_logp=None, max_logp=None):
function get_target_info (line 72) | def get_target_info(target_chembl_id):
function search_targets_by_name (line 86) | def search_targets_by_name(target_name):
function get_bioactivity_data (line 104) | def get_bioactivity_data(target_chembl_id, activity_type='IC50', max_val...
function find_similar_compounds (line 126) | def find_similar_compounds(smiles, similarity_threshold=85):
function substructure_search (line 145) | def substructure_search(smiles):
function get_drug_info (line 160) | def get_drug_info(molecule_chembl_id):
function find_kinase_inhibitors (line 185) | def find_kinase_inhibitors(max_ic50=100):
function get_compound_bioactivities (line 218) | def get_compound_bioactivities(molecule_chembl_id):
function export_to_dataframe (line 236) | def export_to_dataframe(data):
FILE: scientific-skills/citation-management/scripts/doi_to_bibtex.py
class DOIConverter (line 14) | class DOIConverter:
method __init__ (line 17) | def __init__(self):
method doi_to_bibtex (line 23) | def doi_to_bibtex(self, doi: str) -> Optional[str]:
method convert_multiple (line 72) | def convert_multiple(self, dois: List[str], delay: float = 0.5) -> Lis...
function main (line 99) | def main():
FILE: scientific-skills/citation-management/scripts/extract_metadata.py
class MetadataExtractor (line 18) | class MetadataExtractor:
method __init__ (line 21) | def __init__(self, email: Optional[str] = None):
method identify_type (line 34) | def identify_type(self, identifier: str) -> Tuple[str, str]:
method _parse_url (line 70) | def _parse_url(self, url: str) -> Tuple[str, str]:
method extract_from_doi (line 98) | def extract_from_doi(self, doi: str) -> Optional[Dict]:
method extract_from_pmid (line 141) | def extract_from_pmid(self, pmid: str) -> Optional[Dict]:
method extract_from_arxiv (line 213) | def extract_from_arxiv(self, arxiv_id: str) -> Optional[Dict]:
method metadata_to_bibtex (line 283) | def metadata_to_bibtex(self, metadata: Dict, citation_key: Optional[st...
method _crossref_type_to_bibtex (line 348) | def _crossref_type_to_bibtex(self, crossref_type: str) -> str:
method _format_authors_crossref (line 361) | def _format_authors_crossref(self, authors: List[Dict]) -> str:
method _format_authors_pubmed (line 378) | def _format_authors_pubmed(self, authors: List) -> str:
method _extract_year_crossref (line 392) | def _extract_year_crossref(self, message: Dict) -> str:
method _extract_year_pubmed (line 403) | def _extract_year_pubmed(self, article: ET.Element) -> str:
method _generate_citation_key (line 414) | def _generate_citation_key(self, metadata: Dict) -> str:
method _protect_title (line 442) | def _protect_title(self, title: str) -> str:
method extract (line 455) | def extract(self, identifier: str) -> Optional[str]:
function main (line 487) | def main():
FILE: scientific-skills/citation-management/scripts/format_bibtex.py
class BibTeXFormatter (line 13) | class BibTeXFormatter:
method __init__ (line 16) | def __init__(self):
method parse_bibtex_file (line 27) | def parse_bibtex_file(self, filepath: str) -> List[Dict]:
method format_entry (line 78) | def format_entry(self, entry: Dict) -> str:
method fix_common_issues (line 119) | def fix_common_issues(self, entry: Dict) -> Dict:
method deduplicate_entries (line 166) | def deduplicate_entries(self, entries: List[Dict]) -> List[Dict]:
method sort_entries (line 201) | def sort_entries(self, entries: List[Dict], sort_by: str = 'key', desc...
method format_file (line 233) | def format_file(self, filepath: str, output: str = None,
function main (line 292) | def main():
FILE: scientific-skills/citation-management/scripts/search_google_scholar.py
class GoogleScholarSearcher (line 24) | class GoogleScholarSearcher:
method __init__ (line 27) | def __init__(self, use_proxy: bool = False):
method search (line 47) | def search(self, query: str, max_results: int = 50,
method metadata_to_bibtex (line 119) | def metadata_to_bibtex(self, metadata: Dict) -> str:
function main (line 179) | def main():
FILE: scientific-skills/citation-management/scripts/search_pubmed.py
class PubMedSearcher (line 17) | class PubMedSearcher:
method __init__ (line 20) | def __init__(self, api_key: Optional[str] = None, email: Optional[str]...
method search (line 36) | def search(self, query: str, max_results: int = 100,
method fetch_metadata (line 98) | def fetch_metadata(self, pmids: List[str]) -> List[Dict]:
method _extract_metadata_from_xml (line 154) | def _extract_metadata_from_xml(self, article: ET.Element) -> Optional[...
method metadata_to_bibtex (line 214) | def metadata_to_bibtex(self, metadata: Dict) -> str:
function main (line 269) | def main():
FILE: scientific-skills/citation-management/scripts/validate_citations.py
class CitationValidator (line 15) | class CitationValidator:
method __init__ (line 18) | def __init__(self):
method parse_bibtex_file (line 43) | def parse_bibtex_file(self, filepath: str) -> List[Dict]:
method validate_entry (line 95) | def validate_entry(self, entry: Dict) -> Tuple[List[Dict], List[Dict]]:
method verify_doi (line 201) | def verify_doi(self, doi: str) -> Tuple[bool, Optional[Dict]]:
method detect_duplicates (line 239) | def detect_duplicates(self, entries: List[Dict]) -> List[Dict]:
method validate_file (line 303) | def validate_file(self, filepath: str, check_dois: bool = False) -> Dict:
method _extract_year_crossref (line 377) | def _extract_year_crossref(self, message: Dict) -> str:
method _format_authors_crossref (line 387) | def _format_authors_crossref(self, authors: List[Dict]) -> str:
function main (line 405) | def main():
FILE: scientific-skills/clinical-decision-support/scripts/biomarker_classifier.py
function classify_binary_biomarker (line 21) | def classify_binary_biomarker(data, biomarker_col, threshold,
function classify_pd_l1_tps (line 45) | def classify_pd_l1_tps(data, pd_l1_col='pd_l1_tps'):
function classify_her2_status (line 77) | def classify_her2_status(data, ihc_col='her2_ihc', fish_col='her2_fish'):
function classify_breast_cancer_subtype (line 130) | def classify_breast_cancer_subtype(data, er_col='er_positive', pr_col='p...
function correlate_biomarker_outcome (line 169) | def correlate_biomarker_outcome(data, biomarker_col, outcome_col, biomar...
function stratify_cohort_report (line 229) | def stratify_cohort_report(data, stratification_var, output_dir='stratif...
function main (line 296) | def main():
FILE: scientific-skills/clinical-decision-support/scripts/build_decision_tree.py
class DecisionNode (line 16) | class DecisionNode:
method __init__ (line 19) | def __init__(self, question, yes_path=None, no_path=None, node_id=None):
method _generate_id (line 25) | def _generate_id(self, text):
class ActionNode (line 30) | class ActionNode:
method __init__ (line 33) | def __init__(self, action, urgency='routine', node_id=None):
method _generate_id (line 38) | def _generate_id(self, text):
function generate_tikz_header (line 42) | def generate_tikz_header():
function generate_tikz_footer (line 90) | def generate_tikz_footer():
function simple_algorithm_to_tikz (line 102) | def simple_algorithm_to_tikz(algorithm_text, output_file='algorithm.tex'):
function json_to_tikz (line 230) | def json_to_tikz(json_file, output_file='algorithm.tex'):
function create_example_json (line 337) | def create_example_json():
function main (line 390) | def main():
FILE: scientific-skills/clinical-decision-support/scripts/create_cohort_tables.py
function calculate_p_value (line 21) | def calculate_p_value(data, variable, group_col='group', var_type='categ...
function format_continuous_variable (line 72) | def format_continuous_variable(data, variable, group_col, distribution='...
function format_categorical_variable (line 106) | def format_categorical_variable(data, variable, group_col):
function generate_baseline_table (line 138) | def generate_baseline_table(data, group_col='group', output_file='table1...
function generate_efficacy_table (line 197) | def generate_efficacy_table(data, group_col='group', output_file='table2...
function generate_safety_table (line 282) | def generate_safety_table(data, ae_columns, group_col='group', output_fi...
function generate_latex_table (line 333) | def generate_latex_table(df, caption, label='table'):
function _binomial_ci (line 371) | def _binomial_ci(successes, trials, confidence=0.95):
function create_example_data (line 400) | def create_example_data():
function main (line 422) | def main():
FILE: scientific-skills/clinical-decision-support/scripts/generate_survival_analysis.py
function load_survival_data (line 26) | def load_survival_data(filepath):
function calculate_median_survival (line 54) | def calculate_median_survival(kmf):
function generate_kaplan_meier_plot (line 71) | def generate_kaplan_meier_plot(data, time_col='time', event_col='event',
function generate_number_at_risk_table (line 186) | def generate_number_at_risk_table(data, time_col='time', event_col='event',
function calculate_hazard_ratio (line 218) | def calculate_hazard_ratio(data, time_col='time', event_col='event', gro...
function generate_report (line 257) | def generate_report(data, output_dir, prefix='survival'):
function main (line 375) | def main():
FILE: scientific-skills/clinical-decision-support/scripts/validate_cds_document.py
class CDSValidator (line 22) | class CDSValidator:
method __init__ (line 25) | def __init__(self, filepath):
method validate_all (line 34) | def validate_all(self):
method check_required_sections (line 49) | def check_required_sections(self):
method check_evidence_citations (line 90) | def check_evidence_citations(self):
method check_recommendation_grading (line 121) | def check_recommendation_grading(self):
method check_statistical_reporting (line 145) | def check_statistical_reporting(self):
method check_hipaa_identifiers (line 178) | def check_hipaa_identifiers(self):
method check_biomarker_nomenclature (line 206) | def check_biomarker_nomenclature(self):
method generate_report (line 237) | def generate_report(self):
method save_report (line 271) | def save_report(self, output_file):
function main (line 300) | def main():
FILE: scientific-skills/clinical-reports/scripts/check_deidentification.py
function check_identifiers (line 159) | def check_identifiers(text: str) -> Dict:
function check_age_compliance (line 188) | def check_age_compliance(text: str) -> Dict:
function generate_report (line 202) | def generate_report(filename: str) -> Dict:
function get_recommendation (line 243) | def get_recommendation(status: str, identifiers: Dict, ages: Dict) -> str:
function print_report (line 263) | def print_report(report: Dict):
function main (line 310) | def main():
FILE: scientific-skills/clinical-reports/scripts/compliance_checker.py
function check_compliance (line 31) | def check_compliance(filename: str) -> dict:
function main (line 46) | def main():
FILE: scientific-skills/clinical-reports/scripts/extract_clinical_data.py
function extract_vital_signs (line 14) | def extract_vital_signs(content: str) -> dict:
function extract_demographics (line 33) | def extract_demographics(content: str) -> dict:
function extract_medications (line 49) | def extract_medications(content: str) -> list:
function main (line 67) | def main():
FILE: scientific-skills/clinical-reports/scripts/format_adverse_events.py
function format_ae_summary_table (line 17) | def format_ae_summary_table(data: list) -> str:
function main (line 70) | def main():
FILE: scientific-skills/clinical-reports/scripts/generate_report_template.py
function get_template_dir (line 44) | def get_template_dir() -> Path:
function list_templates (line 51) | def list_templates():
function generate_template (line 60) | def generate_template(template_type: str, output_file: str = None):
function interactive_mode (line 82) | def interactive_mode():
function main (line 121) | def main():
FILE: scientific-skills/clinical-reports/scripts/terminology_validator.py
function check_do_not_use_abbreviations (line 29) | def check_do_not_use_abbreviations(content: str) -> dict:
function check_ambiguous_abbreviations (line 47) | def check_ambiguous_abbreviations(content: str) -> dict:
function validate_icd10_format (line 63) | def validate_icd10_format(content: str) -> list:
function main (line 71) | def main():
FILE: scientific-skills/clinical-reports/scripts/validate_case_report.py
class CareValidator (line 20) | class CareValidator:
method __init__ (line 109) | def __init__(self, filename: str):
method _read_file (line 115) | def _read_file(self) -> str:
method validate_care_compliance (line 125) | def validate_care_compliance(self) -> Dict[str, Dict]:
method check_deidentification (line 144) | def check_deidentification(self) -> Dict[str, List[str]]:
method check_word_count (line 156) | def check_word_count(self) -> Dict[str, int]:
method check_references (line 170) | def check_references(self) -> Dict[str, any]:
method generate_report (line 191) | def generate_report(self) -> Dict:
method print_report (line 217) | def print_report(self):
function main (line 286) | def main():
FILE: scientific-skills/clinical-reports/scripts/validate_trial_report.py
function validate_ich_e3 (line 36) | def validate_ich_e3(filename: str) -> dict:
function main (line 58) | def main():
FILE: scientific-skills/clinicaltrials-database/scripts/query_clinicaltrials.py
function search_studies (line 22) | def search_studies(
function get_study_details (line 96) | def get_study_details(nct_id: str, format: str = "json") -> Dict:
function search_with_all_results (line 119) | def search_with_all_results(
function extract_study_summary (line 170) | def extract_study_summary(study: Dict) -> Dict:
FILE: scientific-skills/clinpgx-database/scripts/query_clinpgx.py
function rate_limited_request (line 24) | def rate_limited_request(url: str, params: Optional[Dict] = None, delay:...
function safe_api_call (line 41) | def safe_api_call(url: str, params: Optional[Dict] = None, max_retries: ...
function cached_query (line 81) | def cached_query(cache_file: str, query_func, *args, **kwargs) -> Any:
function get_gene_info (line 114) | def get_gene_info(gene_symbol: str) -> Optional[Dict]:
function get_drug_info (line 132) | def get_drug_info(drug_name: str) -> Optional[List[Dict]]:
function get_gene_drug_pairs (line 152) | def get_gene_drug_pairs(gene: Optional[str] = None, drug: Optional[str] ...
function get_cpic_guidelines (line 180) | def get_cpic_guidelines(gene: Optional[str] = None, drug: Optional[str] ...
function get_alleles (line 208) | def get_alleles(gene: str) -> Optional[List[Dict]]:
function get_allele_info (line 228) | def get_allele_info(allele_name: str) -> Optional[Dict]:
function get_clinical_annotations (line 246) | def get_clinical_annotations(
function get_drug_labels (line 281) | def get_drug_labels(drug: str, source: Optional[str] = None) -> Optional...
function search_variants (line 307) | def search_variants(rsid: Optional[str] = None, chromosome: Optional[str...
function get_pathway_info (line 342) | def get_pathway_info(pathway_id: Optional[str] = None, drug: Optional[st...
function export_to_dataframe (line 374) | def export_to_dataframe(data: List[Dict], output_file: Optional[str] = N...
function batch_gene_query (line 405) | def batch_gene_query(gene_list: List[str], delay: float = 0.5) -> Dict[s...
function find_actionable_gene_drug_pairs (line 437) | def find_actionable_gene_drug_pairs(cpic_level: str = "A") -> Optional[L...
FILE: scientific-skills/cosmic-database/scripts/download_cosmic.py
function download_cosmic_file (line 30) | def download_cosmic_file(
function get_common_file_path (line 131) | def get_common_file_path(
function main (line 163) | def main():
FILE: scientific-skills/deepchem/scripts/graph_neural_network.py
function create_model (line 38) | def create_model(model_type, n_tasks, mode='classification'):
function train_on_molnet (line 90) | def train_on_molnet(dataset_name, model_type, n_epochs=50):
function train_on_custom_data (line 164) | def train_on_custom_data(data_path, model_type, task_type, target_cols, ...
function main (line 244) | def main():
FILE: scientific-skills/deepchem/scripts/predict_solubility.py
function train_solubility_model (line 19) | def train_solubility_model(data_path=None, smiles_col='smiles', target_c...
function predict_new_molecules (line 119) | def predict_new_molecules(model, smiles_list, transformers=None):
function main (line 158) | def main():
FILE: scientific-skills/deepchem/scripts/transfer_learning.py
function train_chemberta (line 37) | def train_chemberta(train_dataset, valid_dataset, test_dataset, task_typ...
function train_grover (line 100) | def train_grover(train_dataset, test_dataset, task_type='classification'...
function load_molnet_dataset (line 159) | def load_molnet_dataset(dataset_name, model_type):
function load_custom_dataset (line 203) | def load_custom_dataset(data_path, target_cols, smiles_col, model_type):
function main (line 252) | def main():
FILE: scientific-skills/deeptools/scripts/validate_files.py
function check_file_exists (line 15) | def check_file_exists(filepath):
function check_bam_index (line 24) | def check_bam_index(bam_file):
function check_bigwig_file (line 37) | def check_bigwig_file(bw_file):
function check_bed_file (line 46) | def check_bed_file(bed_file):
function validate_files (line 76) | def validate_files(bam_files=None, bigwig_files=None, bed_files=None):
function main (line 145) | def main():
FILE: scientific-skills/deeptools/scripts/workflow_generator.py
function generate_chipseq_qc_workflow (line 32) | def generate_chipseq_qc_workflow(output_file, params):
function generate_chipseq_analysis_workflow (line 107) | def generate_chipseq_analysis_workflow(output_file, params):
function generate_rnaseq_coverage_workflow (line 226) | def generate_rnaseq_coverage_workflow(output_file, params):
function generate_atacseq_workflow (line 275) | def generate_atacseq_workflow(output_file, params):
function main (line 361) | def main():
FILE: scientific-skills/diffdock/scripts/analyze_results.py
function parse_confidence_scores (line 23) | def parse_confidence_scores(results_dir):
function parse_single_complex (line 53) | def parse_single_complex(complex_dir):
function extract_confidence_score (line 82) | def extract_confidence_score(sdf_file, complex_dir):
function classify_confidence (line 125) | def classify_confidence(score):
function print_summary (line 137) | def print_summary(results, top_n=None, min_confidence=None):
function export_to_csv (line 210) | def export_to_csv(results, output_path):
function get_top_predictions (line 236) | def get_top_predictions(results, n=10, sort_by='confidence'):
function print_top_predictions (line 255) | def print_top_predictions(results, n=10):
function main (line 271) | def main():
FILE: scientific-skills/diffdock/scripts/prepare_batch_csv.py
function validate_smiles (line 30) | def validate_smiles(smiles_string):
function validate_file_path (line 44) | def validate_file_path(file_path, base_dir=None):
function validate_csv (line 61) | def validate_csv(csv_path, base_dir=None):
function create_template_csv (line 159) | def create_template_csv(output_path, num_examples=3):
function main (line 183) | def main():
FILE: scientific-skills/diffdock/scripts/setup_check.py
function check_python_version (line 19) | def check_python_version():
function check_package (line 34) | def check_package(package_name, import_name=None, version_attr='__versio...
function check_pytorch (line 49) | def check_pytorch():
function check_pytorch_geometric (line 70) | def check_pytorch_geometric():
function check_core_dependencies (line 88) | def check_core_dependencies():
function check_esm (line 114) | def check_esm():
function check_diffdock_installation (line 127) | def check_diffdock_installation():
function print_installation_instructions (line 169) | def print_installation_instructions():
function print_performance_notes (line 198) | def print_performance_notes(has_cuda):
function main (line 228) | def main():
FILE: scientific-skills/docx/scripts/accept_changes.py
function accept_changes (line 36) | def accept_changes(
function _setup_libreoffice_macro (line 91) | def _setup_libreoffice_macro() -> bool:
FILE: scientific-skills/docx/scripts/comment.py
function _generate_hex_id (line 68) | def _generate_hex_id() -> str:
function _encode_smart_quotes (line 80) | def _encode_smart_quotes(text: str) -> str:
function _append_xml (line 86) | def _append_xml(xml_path: Path, root_tag: str, content: str) -> None:
function _find_para_id (line 98) | def _find_para_id(comments_path: Path, comment_id: int) -> str | None:
function _get_next_rid (line 108) | def _get_next_rid(rels_path: Path) -> int:
function _has_relationship (line 121) | def _has_relationship(rels_path: Path, target: str) -> bool:
function _has_content_type (line 129) | def _has_content_type(ct_path: Path, part_name: str) -> bool:
function _ensure_comment_relationships (line 137) | def _ensure_comment_relationships(unpacked_dir: Path) -> None:
function _ensure_comment_content_types (line 179) | def _ensure_comment_content_types(unpacked_dir: Path) -> None:
function add_comment (line 218) | def add_comment(
FILE: scientific-skills/docx/scripts/office/helpers/merge_runs.py
function merge_runs (line 16) | def merge_runs(input_dir: str) -> tuple[int, str]:
function _find_elements (line 44) | def _find_elements(root, tag: str) -> list:
function _get_child (line 59) | def _get_child(parent, tag: str):
function _get_children (line 68) | def _get_children(parent, tag: str) -> list:
function _is_adjacent (line 78) | def _is_adjacent(elem1, elem2) -> bool:
function _remove_elements (line 93) | def _remove_elements(root, tag: str):
function _strip_run_rsid_attrs (line 99) | def _strip_run_rsid_attrs(root):
function _merge_runs_in (line 108) | def _merge_runs_in(container) -> int:
function _first_child_run (line 128) | def _first_child_run(container):
function _next_element_sibling (line 135) | def _next_element_sibling(node):
function _next_sibling_run (line 144) | def _next_sibling_run(node):
function _is_run (line 154) | def _is_run(node) -> bool:
function _can_merge (line 159) | def _can_merge(run1, run2) -> bool:
function _merge_run_content (line 170) | def _merge_run_content(target, source):
function _consolidate_text (line 178) | def _consolidate_text(run):
FILE: scientific-skills/docx/scripts/office/helpers/simplify_redlines.py
function simplify_redlines (line 22) | def simplify_redlines(input_dir: str) -> tuple[int, str]:
function _merge_tracked_changes_in (line 47) | def _merge_tracked_changes_in(container, tag: str) -> int:
function _is_element (line 75) | def _is_element(node, tag: str) -> bool:
function _get_author (line 80) | def _get_author(elem) -> str:
function _can_merge_tracked (line 89) | def _can_merge_tracked(elem1, elem2) -> bool:
function _merge_tracked_content (line 104) | def _merge_tracked_content(target, source):
function _find_elements (line 111) | def _find_elements(root, tag: str) -> list:
function get_tracked_change_authors (line 126) | def get_tracked_change_authors(doc_xml_path: Path) -> dict[str, int]:
function _get_authors_from_docx (line 149) | def _get_authors_from_docx(docx_path: Path) -> dict[str, int]:
function infer_author (line 172) | def infer_author(modified_dir: Path, original_docx: Path, default: str =...
FILE: scientific-skills/docx/scripts/office/pack.py
function pack (line 24) | def pack(
function _run_validation (line 69) | def _run_validation(
function _condense_xml (line 108) | def _condense_xml(xml_file: Path) -> None:
FILE: scientific-skills/docx/scripts/office/soffice.py
function get_soffice_env (line 24) | def get_soffice_env() -> dict:
function run_soffice (line 35) | def run_soffice(args: list[str], **kwargs) -> subprocess.CompletedProcess:
function _needs_shim (line 44) | def _needs_shim() -> bool:
function _ensure_shim (line 53) | def _ensure_shim() -> Path:
FILE: scientific-skills/docx/scripts/office/unpack.py
function unpack (line 34) | def unpack(
function _pretty_print_xml (line 82) | def _pretty_print_xml(xml_file: Path) -> None:
function _escape_smart_quotes (line 91) | def _escape_smart_quotes(xml_file: Path) -> None:
FILE: scientific-skills/docx/scripts/office/validate.py
function main (line 25) | def main():
FILE: scientific-skills/docx/scripts/office/validators/base.py
class BaseSchemaValidator (line 12) | class BaseSchemaValidator:
method __init__ (line 94) | def __init__(self, unpacked_dir, original_file=None, verbose=False):
method validate (line 109) | def validate(self):
method repair (line 112) | def repair(self) -> int:
method repair_whitespace_preservation (line 115) | def repair_whitespace_preservation(self) -> int:
method validate_xml (line 143) | def validate_xml(self):
method validate_namespaces (line 170) | def validate_namespaces(self):
method validate_unique_ids (line 199) | def validate_unique_ids(self):
method validate_file_references (line 289) | def validate_file_references(self):
method validate_all_relationship_ids (line 385) | def validate_all_relationship_ids(self):
method _get_expected_relationship_type (line 469) | def _get_expected_relationship_type(self, element_name):
method validate_content_types (line 492) | def validate_content_types(self):
method validate_file_against_xsd (line 598) | def validate_file_against_xsd(self, xml_file, verbose=False):
method validate_against_xsd (line 636) | def validate_against_xsd(self):
method _get_schema_path (line 685) | def _get_schema_path(self, xml_file):
method _clean_ignorable_namespaces (line 703) | def _clean_ignorable_namespaces(self, xml_doc):
method _remove_ignorable_elements (line 723) | def _remove_ignorable_elements(self, root):
method _preprocess_for_mc_ignorable (line 742) | def _preprocess_for_mc_ignorable(self, xml_doc):
method _validate_single_file_xsd (line 750) | def _validate_single_file_xsd(self, xml_file, base_path):
method _get_original_file_errors (line 787) | def _get_original_file_errors(self, xml_file):
method _remove_template_tags_from_text_nodes (line 814) | def _remove_template_tags_from_text_nodes(self, xml_doc):
FILE: scientific-skills/docx/scripts/office/validators/docx.py
class DOCXSchemaValidator (line 16) | class DOCXSchemaValidator(BaseSchemaValidator):
method validate (line 24) | def validate(self):
method validate_whitespace_preservation (line 66) | def validate_whitespace_preservation(self):
method validate_deletions (line 112) | def validate_deletions(self):
method count_paragraphs_in_unpacked (line 163) | def count_paragraphs_in_unpacked(self):
method count_paragraphs_in_original (line 179) | def count_paragraphs_in_original(self):
method validate_insertions (line 202) | def validate_insertions(self):
method compare_paragraph_counts (line 243) | def compare_paragraph_counts(self):
method _parse_id_value (line 251) | def _parse_id_value(self, val: str, base: int = 16) -> int:
method validate_id_constraints (line 254) | def validate_id_constraints(self):
method validate_comment_markers (line 298) | def validate_comment_markers(self):
method repair (line 386) | def repair(self) -> int:
method repair_durableId (line 391) | def repair_durableId(self) -> int:
FILE: scientific-skills/docx/scripts/office/validators/pptx.py
class PPTXSchemaValidator (line 10) | class PPTXSchemaValidator(BaseSchemaValidator):
method validate (line 25) | def validate(self):
method validate_uuid_ids (line 62) | def validate_uuid_ids(self):
method _looks_like_uuid (line 100) | def _looks_like_uuid(self, value):
method validate_slide_layout_ids (line 104) | def validate_slide_layout_ids(self):
method validate_no_duplicate_slide_layouts (line 172) | def validate_no_duplicate_slide_layouts(self):
method validate_notes_slide_references (line 210) | def validate_notes_slide_references(self):
FILE: scientific-skills/docx/scripts/office/validators/redlining.py
class RedliningValidator (line 11) | class RedliningValidator:
method __init__ (line 13) | def __init__(self, unpacked_dir, original_docx, verbose=False, author=...
method repair (line 22) | def repair(self) -> int:
method validate (line 25) | def validate(self):
method _generate_detailed_diff (line 104) | def _generate_detailed_diff(self, original_text, modified_text):
method _get_git_word_diff (line 127) | def _get_git_word_diff(self, original_text, modified_text):
method _remove_author_tracked_changes (line 198) | def _remove_author_tracked_changes(self, root):
method _extract_text_content (line 229) | def _extract_text_content(self, root):
FILE: scientific-skills/drugbank-database/scripts/drugbank_helper.py
class DrugBankHelper (line 23) | class DrugBankHelper:
method __init__ (line 28) | def __init__(self, root=None):
method _get_root (line 38) | def _get_root(self):
method _get_text_safe (line 45) | def _get_text_safe(self, element) -> Optional[str]:
method find_drug (line 49) | def find_drug(self, drugbank_id: str):
method get_drug_info (line 70) | def get_drug_info(self, drugbank_id: str) -> Dict[str, Any]:
method get_interactions (line 97) | def get_interactions(self, drugbank_id: str) -> List[Dict[str, str]]:
method get_targets (line 124) | def get_targets(self, drugbank_id: str) -> List[Dict[str, Any]]:
method get_properties (line 169) | def get_properties(self, drugbank_id: str) -> Dict[str, Dict[str, Any]]:
method check_interaction (line 205) | def check_interaction(self, drug1_id: str, drug2_id: str) -> Optional[...
method check_polypharmacy (line 229) | def check_polypharmacy(self, drug_ids: List[str]) -> List[Dict[str, An...
method get_smiles (line 251) | def get_smiles(self, drugbank_id: str) -> Optional[str]:
method get_inchi (line 264) | def get_inchi(self, drugbank_id: str) -> Optional[str]:
method search_by_name (line 277) | def search_by_name(self, name: str, exact: bool = False) -> List[Dict[...
FILE: scientific-skills/ensembl-database/scripts/ensembl_query.py
class EnsemblAPIClient (line 19) | class EnsemblAPIClient:
method __init__ (line 22) | def __init__(self, server: str = "https://rest.ensembl.org", rate_limi...
method _rate_limit_check (line 35) | def _rate_limit_check(self):
method _make_request (line 50) | def _make_request(
method lookup_gene_by_symbol (line 105) | def lookup_gene_by_symbol(self, species: str, symbol: str, expand: boo...
method lookup_by_id (line 121) | def lookup_by_id(self, ensembl_id: str, expand: bool = False) -> Dict:
method get_sequence (line 136) | def get_sequence(
method get_region_sequence (line 164) | def get_region_sequence(
method get_variant (line 191) | def get_variant(self, species: str, variant_id: str, include_pops: boo...
method predict_variant_effect (line 207) | def predict_variant_effect(
method find_orthologs (line 225) | def find_orthologs(
method get_region_features (line 246) | def get_region_features(
method get_species_info (line 267) | def get_species_info(self) -> List[Dict]:
method get_assembly_info (line 278) | def get_assembly_info(self, species: str) -> Dict:
method map_coordinates (line 291) | def map_coordinates(
function main (line 314) | def main():
FILE: scientific-skills/etetoolkit/scripts/quick_visualize.py
function create_tree_style (line 20) | def create_tree_style(args):
function apply_node_styling (line 53) | def apply_node_styling(tree, args):
function visualize_tree (line 79) | def visualize_tree(tree_file, output, args):
function main (line 117) | def main():
FILE: scientific-skills/etetoolkit/scripts/tree_operations.py
function load_tree (line 24) | def load_tree(tree_file, format_num=0):
function convert_format (line 33) | def convert_format(tree_file, output, in_format=0, out_format=1):
function reroot_tree (line 40) | def reroot_tree(tree_file, output, outgroup=None, midpoint=False, format...
function prune_tree (line 64) | def prune_tree(tree_file, output, keep_taxa, preserve_length=True, forma...
function tree_stats (line 88) | def tree_stats(tree_file, format_num=0):
function show_ascii (line 120) | def show_ascii(tree_file, format_num=0, show_internal=True):
function list_leaves (line 126) | def list_leaves(tree_file, format_num=0):
function main (line 133) | def main():
FILE: scientific-skills/exploratory-data-analysis/scripts/eda_analyzer.py
function detect_file_type (line 14) | def detect_file_type(filepath):
function get_file_basic_info (line 136) | def get_file_basic_info(filepath):
function format_bytes (line 151) | def format_bytes(size):
function load_reference_info (line 160) | def load_reference_info(category, extension):
function analyze_file (line 215) | def analyze_file(filepath):
function analyze_general_scientific (line 251) | def analyze_general_scientific(filepath, extension):
function analyze_bioinformatics (line 335) | def analyze_bioinformatics(filepath, extension):
function analyze_imaging (line 381) | def analyze_imaging(filepath, extension):
function generate_markdown_report (line 421) | def generate_markdown_report(analysis, output_path=None):
function main (line 517) | def main():
FILE: scientific-skills/fda-database/scripts/fda_examples.py
function example_drug_safety_profile (line 15) | def example_drug_safety_profile(fda, drug_name):
function example_device_surveillance (line 68) | def example_device_surveillance(fda, device_name):
function example_food_recall_monitoring (line 109) | def example_food_recall_monitoring(fda, allergen):
function example_substance_lookup (line 144) | def example_substance_lookup(fda, substance_name):
function example_comparative_drug_analysis (line 202) | def example_comparative_drug_analysis(fda, drug_list):
function example_veterinary_analysis (line 252) | def example_veterinary_analysis(fda, species, drug_name):
function main (line 296) | def main():
FILE: scientific-skills/fda-database/scripts/fda_query.py
class RateLimiter (line 25) | class RateLimiter:
method __init__ (line 28) | def __init__(self, max_per_minute: int = 240):
method wait_if_needed (line 32) | def wait_if_needed(self):
class FDACache (line 51) | class FDACache:
method __init__ (line 54) | def __init__(self, cache_dir: str = "fda_cache", ttl: int = 3600):
method _get_cache_key (line 59) | def _get_cache_key(self, url: str, params: Dict) -> str:
method get (line 64) | def get(self, url: str, params: Dict) -> Optional[Dict]:
method set (line 76) | def set(self, url: str, params: Dict, data: Dict):
class FDAQuery (line 84) | class FDAQuery:
method __init__ (line 89) | def __init__(self, api_key: Optional[str] = None, use_cache: bool = True,
method _build_url (line 104) | def _build_url(self, category: str, endpoint: str) -> str:
method _make_request (line 108) | def _make_request(self, url: str, params: Dict, use_cache: bool = True...
method query (line 160) | def query(self, category: str, endpoint: str, search: Optional[str] = ...
method query_all (line 194) | def query_all(self, category: str, endpoint: str, search: str,
method query_drug_events (line 239) | def query_drug_events(self, drug_name: str, limit: int = 100) -> Dict:
method query_drug_label (line 244) | def query_drug_label(self, drug_name: str, brand: bool = True) -> Dict:
method query_drug_ndc (line 250) | def query_drug_ndc(self, ndc: Optional[str] = None,
method query_drug_recalls (line 262) | def query_drug_recalls(self, drug_name: Optional[str] = None,
method query_device_events (line 277) | def query_device_events(self, device_name: str, limit: int = 100) -> D...
method query_device_510k (line 282) | def query_device_510k(self, applicant: Optional[str] = None,
method query_device_classification (line 294) | def query_device_classification(self, product_code: str) -> Dict:
method query_food_events (line 301) | def query_food_events(self, product_name: Optional[str] = None,
method query_food_recalls (line 313) | def query_food_recalls(self, product: Optional[str] = None,
method query_animal_events (line 331) | def query_animal_events(self, species: Optional[str] = None,
method query_substance_by_unii (line 345) | def query_substance_by_unii(self, unii: str) -> Dict:
method query_substance_by_name (line 350) | def query_substance_by_name(self, name: str) -> Dict:
method count_by_field (line 357) | def count_by_field(self, category: str, endpoint: str,
method get_date_range_data (line 375) | def get_date_range_data(self, category: str, endpoint: str,
function main (line 403) | def main():
FILE: scientific-skills/fred-economic-data/scripts/fred_examples.py
function example_basic_series (line 16) | def example_basic_series():
function example_transformations (line 42) | def example_transformations():
function example_search (line 76) | def example_search():
function example_categories (line 107) | def example_categories():
function example_releases (line 135) | def example_releases():
function example_economic_indicators (line 174) | def example_economic_indicators():
function example_time_series_analysis (line 203) | def example_time_series_analysis():
function example_vintage_data (line 240) | def example_vintage_data():
function example_sources (line 265) | def example_sources():
function example_regional_data (line 288) | def example_regional_data():
function main (line 314) | def main():
FILE: scientific-skills/fred-economic-data/scripts/fred_query.py
class FREDQuery (line 14) | class FREDQuery:
method __init__ (line 27) | def __init__(
method _make_request (line 54) | def _make_request(
method get_series (line 99) | def get_series(self, series_id: str, **kwargs) -> Dict[str, Any]:
method get_observations (line 113) | def get_observations(
method search_series (line 162) | def search_series(
method get_series_categories (line 211) | def get_series_categories(self, series_id: str, **kwargs) -> Dict[str,...
method get_series_release (line 216) | def get_series_release(self, series_id: str, **kwargs) -> Dict[str, Any]:
method get_series_tags (line 221) | def get_series_tags(self, series_id: str, **kwargs) -> Dict[str, Any]:
method get_series_updates (line 226) | def get_series_updates(
method get_vintage_dates (line 242) | def get_vintage_dates(self, series_id: str, **kwargs) -> Dict[str, Any]:
method get_category (line 249) | def get_category(self, category_id: int = 0, **kwargs) -> Dict[str, Any]:
method get_category_children (line 259) | def get_category_children(self, category_id: int = 0, **kwargs) -> Dic...
method get_category_series (line 264) | def get_category_series(
method get_category_tags (line 284) | def get_category_tags(self, category_id: int, **kwargs) -> Dict[str, A...
method get_releases (line 291) | def get_releases(
method get_release_dates (line 309) | def get_release_dates(
method get_release (line 335) | def get_release(self, release_id: int, **kwargs) -> Dict[str, Any]:
method get_release_series (line 340) | def get_release_series(
method get_release_sources (line 356) | def get_release_sources(self, release_id: int, **kwargs) -> Dict[str, ...
method get_release_tables (line 361) | def get_release_tables(self, release_id: int, **kwargs) -> Dict[str, A...
method get_tags (line 368) | def get_tags(
method get_related_tags (line 392) | def get_related_tags(
method get_series_by_tags (line 408) | def get_series_by_tags(
method get_sources (line 443) | def get_sources(
method get_source (line 461) | def get_source(self, source_id: int, **kwargs) -> Dict[str, Any]:
method get_source_releases (line 466) | def get_source_releases(
method get_shapes (line 484) | def get_shapes(self, shape: str) -> Dict[str, Any]:
method get_series_group (line 494) | def get_series_group(self, series_id: str) -> Dict[str, Any]:
method get_series_data (line 499) | def get_series_data(
method get_regional_data (line 513) | def get_regional_data(
method clear_cache (line 555) | def clear_cache(self):
function query_fred (line 561) | def query_fred(series_id: str, api_key: Optional[str] = None, **kwargs) ...
FILE: scientific-skills/gene-database/scripts/batch_gene_lookup.py
function read_gene_list (line 18) | def read_gene_list(filepath: str) -> List[str]:
function batch_esearch (line 40) | def batch_esearch(queries: List[str], organism: Optional[str] = None,
function batch_esummary (line 96) | def batch_esummary(gene_ids: List[str], api_key: Optional[str] = None,
function batch_lookup_by_ids (line 147) | def batch_lookup_by_ids(gene_ids: List[str], api_key: Optional[str] = No...
function batch_lookup_by_symbols (line 182) | def batch_lookup_by_symbols(gene_symbols: List[str], organism: str,
function main (line 240) | def main():
FILE: scientific-skills/gene-database/scripts/fetch_gene_data.py
function get_taxon_id (line 20) | def get_taxon_id(taxon_name: str) -> Optional[str]:
function fetch_gene_by_id (line 56) | def fetch_gene_by_id(gene_id: str, api_key: Optional[str] = None) -> Dic...
function fetch_gene_by_symbol (line 87) | def fetch_gene_by_symbol(symbol: str, taxon: str, api_key: Optional[str]...
function fetch_multiple_genes (line 125) | def fetch_multiple_genes(gene_ids: List[str], api_key: Optional[str] = N...
function display_gene_info (line 157) | def display_gene_info(data: Dict[str, Any], verbose: bool = False) -> None:
function main (line 216) | def main():
FILE: scientific-skills/gene-database/scripts/query_gene.py
function esearch (line 23) | def esearch(query: str, retmax: int = 20, api_key: Optional[str] = None)...
function esummary (line 65) | def esummary(gene_ids: List[str], api_key: Optional[str] = None) -> Dict...
function efetch (line 99) | def efetch(gene_ids: List[str], retmode: str = 'xml', api_key: Optional[...
function search_and_summarize (line 133) | def search_and_summarize(query: str, organism: Optional[str] = None,
function fetch_by_id (line 182) | def fetch_by_id(gene_ids: List[str], output_format: str = 'json',
function main (line 205) | def main():
FILE: scientific-skills/generate-image/scripts/generate_image.py
function check_env_file (line 22) | def check_env_file() -> Optional[str]:
function load_image_as_base64 (line 38) | def load_image_as_base64(image_path: str) -> str:
function save_base64_image (line 63) | def save_base64_image(base64_data: str, output_path: str) -> None:
function generate_image (line 75) | def generate_image(
function main (line 209) | def main():
FILE: scientific-skills/get-available-resources/scripts/detect_resources.py
function get_cpu_info (line 23) | def get_cpu_info() -> Dict[str, Any]:
function get_memory_info (line 45) | def get_memory_info() -> Dict[str, Any]:
function get_disk_info (line 60) | def get_disk_info(path: str = None) -> Dict[str, Any]:
function detect_nvidia_gpus (line 81) | def detect_nvidia_gpus() -> List[Dict[str, Any]]:
function detect_amd_gpus (line 116) | def detect_amd_gpus() -> List[Dict[str, Any]]:
function detect_apple_silicon_gpu (line 149) | def detect_apple_silicon_gpu() -> Optional[Dict[str, Any]]:
function get_gpu_info (line 204) | def get_gpu_info() -> Dict[str, Any]:
function get_os_info (line 230) | def get_os_info() -> Dict[str, Any]:
function detect_all_resources (line 241) | def detect_all_resources(output_path: str = None) -> Dict[str, Any]:
function generate_recommendations (line 273) | def generate_recommendations(resources: Dict[str, Any]) -> Dict[str, Any]:
function main (line 351) | def main():
FILE: scientific-skills/gget/scripts/batch_sequence_analysis.py
function read_fasta (line 13) | def read_fasta(fasta_file):
function analyze_sequences (line 36) | def analyze_sequences(
function main (line 141) | def main():
FILE: scientific-skills/gget/scripts/enrichment_pipeline.py
function read_gene_list (line 14) | def read_gene_list(file_path):
function enrichment_pipeline (line 30) | def enrichment_pipeline(
function main (line 169) | def main():
FILE: scientific-skills/gget/scripts/gene_analysis.py
function analyze_gene (line 12) | def analyze_gene(gene_name, species="homo_sapiens", output_prefix=None):
function main (line 132) | def main():
FILE: scientific-skills/infographics/scripts/generate_infographic.py
function list_options (line 42) | def list_options():
function main (line 98) | def main():
FILE: scientific-skills/infographics/scripts/generate_infographic_ai.py
function _load_env_file (line 39) | def _load_env_file():
class InfographicGenerator (line 272) | class InfographicGenerator:
method __init__ (line 339) | def __init__(self, api_key: Optional[str] = None, verbose: bool = False):
method _log (line 364) | def _log(self, message: str):
method research_topic (line 371) | def research_topic(self, topic: str, infographic_type: Optional[str] =...
method web_search (line 481) | def web_search(self, query: str) -> Dict[str, Any]:
method _enhance_prompt_with_research (line 555) | def _enhance_prompt_with_research(self, user_prompt: str, research_dat...
method _make_request (line 581) | def _make_request(self, model: str, messages: List[Dict[str, Any]],
method _extract_image_from_response (line 625) | def _extract_image_from_response(self, response: Dict[str, Any]) -> Op...
method _image_to_base64 (line 683) | def _image_to_base64(self, image_path: str) -> str:
method _build_generation_prompt (line 700) | def _build_generation_prompt(self, user_prompt: str,
method generate_image (line 738) | def generate_image(self, prompt: str) -> Optional[bytes]:
method review_image (line 786) | def review_image(self, image_path: str, original_prompt: str,
method improve_prompt (line 936) | def improve_prompt(self, original_prompt: str, critique: str,
method generate_iterative (line 983) | def generate_iterative(self, user_prompt: str, output_path: str,
function main (line 1173) | def main():
FILE: scientific-skills/iso-13485-certification/scripts/gap_analyzer.py
class GapAnalyzer (line 164) | class GapAnalyzer:
method __init__ (line 167) | def __init__(self, docs_dir: str):
method analyze (line 173) | def analyze(self) -> Dict:
method _scan_documents (line 198) | def _scan_documents(self) -> List[Tuple[Path, str]]:
method _search_for_procedure (line 223) | def _search_for_procedure(self, documents: List[Tuple[Path, str]],
method _search_for_document (line 238) | def _search_for_document(self, documents: List[Tuple[Path, str]],
method _generate_report (line 249) | def _generate_report(self) -> Dict:
method _generate_recommendations (line 292) | def _generate_recommendations(self, missing_procedures: List[Dict],
function print_report (line 331) | def print_report(report: Dict):
function save_report (line 400) | def save_report(report: Dict, output_path: str):
function main (line 407) | def main():
FILE: scientific-skills/kegg-database/scripts/kegg_api.py
function kegg_info (line 22) | def kegg_info(database: str) -> str:
function kegg_list (line 43) | def kegg_list(database: str, org: Optional[str] = None) -> str:
function kegg_find (line 71) | def kegg_find(database: str, query: str, option: Optional[str] = None) -...
function kegg_get (line 110) | def kegg_get(entries: Union[str, List[str]], option: Optional[str] = Non...
function kegg_conv (line 157) | def kegg_conv(target_db: str, source_db: str) -> str:
function kegg_link (line 186) | def kegg_link(target_db: str, source_db: str) -> str:
function kegg_ddi (line 215) | def kegg_ddi(drug_entries: Union[str, List[str]]) -> str:
FILE: scientific-skills/labarchive-integration/scripts/entry_operations.py
function load_config (line 16) | def load_config(config_path='config.yaml'):
function init_client (line 30) | def init_client(config):
function get_user_id (line 45) | def get_user_id(client, config):
function create_entry (line 70) | def create_entry(client, uid, nbid, title, content=None, date=None):
function create_comment (line 119) | def create_comment(client, uid, nbid, entry_id, comment):
function upload_attachment (line 145) | def upload_attachment(client, config, uid, nbid, entry_id, file_path):
function batch_upload (line 187) | def batch_upload(client, config, uid, nbid, entry_id, directory):
function create_entry_with_attachments (line 218) | def create_entry_with_attachments(client, config, uid, nbid, title, cont...
function main (line 235) | def main():
FILE: scientific-skills/labarchive-integration/scripts/notebook_operations.py
function load_config (line 15) | def load_config(config_path='config.yaml'):
function init_client (line 29) | def init_client(config):
function get_user_id (line 44) | def get_user_id(client, config):
function list_notebooks (line 69) | def list_notebooks(client, uid):
function backup_notebook (line 113) | def backup_notebook(client, uid, nbid, output_dir='backups', json_format...
function backup_all_notebooks (line 165) | def backup_all_notebooks(client, uid, output_dir='backups', json_format=...
function main (line 197) | def main():
FILE: scientific-skills/labarchive-integration/scripts/setup_config.py
function get_regional_endpoint (line 14) | def get_regional_endpoint():
function get_credentials (line 39) | def get_credentials():
function create_config_file (line 70) | def create_config_file(config_data, output_path='config.yaml'):
function verify_config (line 82) | def verify_config(config_path='config.yaml'):
function test_authentication (line 105) | def test_authentication(config_path='config.yaml'):
function main (line 161) | def main():
FILE: scientific-skills/literature-review/scripts/generate_pdf.py
function generate_pdf (line 12) | def generate_pdf(
function check_dependencies (line 100) | def check_dependencies():
function main (line 128) | def main():
FILE: scientific-skills/literature-review/scripts/search_databases.py
function format_search_results (line 12) | def format_search_results(results: List[Dict], output_format: str = 'jso...
function deduplicate_results (line 83) | def deduplicate_results(results: List[Dict]) -> List[Dict]:
function rank_results (line 119) | def rank_results(results: List[Dict], criteria: str = 'citations') -> Li...
function filter_by_year (line 139) | def filter_by_year(results: List[Dict], start_year: int = None, end_year...
function generate_search_summary (line 167) | def generate_search_summary(results: List[Dict]) -> Dict:
function main (line 209) | def main():
FILE: scientific-skills/literature-review/scripts/verify_citations.py
class CitationVerifier (line 14) | class CitationVerifier:
method __init__ (line 15) | def __init__(self):
method extract_dois (line 21) | def extract_dois(self, text: str) -> List[str]:
method verify_doi (line 26) | def verify_doi(self, doi: str) -> Tuple[bool, Dict]:
method _get_crossref_metadata (line 44) | def _get_crossref_metadata(self, doi: str) -> Dict:
method _format_authors (line 69) | def _format_authors(self, authors: List[Dict]) -> str:
method _extract_year (line 86) | def _extract_year(self, message: Dict) -> str:
method verify_url (line 96) | def verify_url(self, url: str) -> Tuple[bool, int]:
method verify_citations_in_file (line 108) | def verify_citations_in_file(self, filepath: str) -> Dict:
method format_citation_apa (line 139) | def format_citation_apa(self, metadata: Dict) -> str:
method format_citation_nature (line 161) | def format_citation_nature(self, metadata: Dict) -> str:
function main (line 182) | def main():
FILE: scientific-skills/market-research-reports/scripts/generate_market_visuals.py
function get_script_path (line 312) | def get_script_path(tool: str) -> Path:
function generate_visual (line 324) | def generate_visual(
function main (line 400) | def main():
FILE: scientific-skills/markitdown/scripts/batch_convert.py
function convert_file (line 17) | def convert_file(md: MarkItDown, file_path: Path, output_dir: Path, verb...
function batch_convert (line 54) | def batch_convert(
function main (line 136) | def main():
FILE: scientific-skills/markitdown/scripts/convert_literature.py
function extract_metadata_from_filename (line 19) | def extract_metadata_from_filename(filename: str) -> Dict[str, str]:
function convert_paper (line 45) | def convert_paper(
function create_index (line 127) | def create_index(papers: List[Dict], output_dir: Path):
function main (line 180) | def main():
FILE: scientific-skills/markitdown/scripts/convert_with_ai.py
function convert_with_ai (line 70) | def convert_with_ai(
function main (line 140) | def main():
FILE: scientific-skills/matplotlib/scripts/plot_template.py
function set_publication_style (line 21) | def set_publication_style():
function generate_sample_data (line 39) | def generate_sample_data():
function create_line_plot (line 64) | def create_line_plot(data, ax=None):
function create_scatter_plot (line 88) | def create_scatter_plot(data, ax=None):
function create_bar_chart (line 115) | def create_bar_chart(data, ax=None):
function create_histogram (line 148) | def create_histogram(data, ax=None):
function create_heatmap (line 174) | def create_heatmap(data, ax=None):
function create_contour_plot (line 201) | def create_contour_plot(data, ax=None):
function create_box_plot (line 231) | def create_box_plot(data, ax=None):
function create_violin_plot (line 255) | def create_violin_plot(data, ax=None):
function create_3d_plot (line 284) | def create_3d_plot():
function create_comprehensive_figure (line 316) | def create_comprehensive_figure():
function main (line 350) | def main():
FILE: scientific-skills/matplotlib/scripts/style_configurator.py
function generate_preview_data (line 114) | def generate_preview_data():
function create_style_preview (line 132) | def create_style_preview(style_dict=None):
function save_style_file (line 196) | def save_style_file(style_dict, filename):
function print_style_info (line 235) | def print_style_info(style_dict):
function list_available_presets (line 261) | def list_available_presets():
function interactive_mode (line 277) | def interactive_mode():
function main (line 339) | def main():
FILE: scientific-skills/medchem/scripts/filter_molecules.py
function load_molecules (line 32) | def load_molecules(input_file: Path, smiles_column: str = "smiles") -> T...
function apply_rule_filters (line 98) | def apply_rule_filters(mols: List[Chem.Mol], rules: List[str], n_jobs: i...
function apply_structural_alerts (line 114) | def apply_structural_alerts(mols: List[Chem.Mol], alert_type: str, n_job...
function apply_complexity_filter (line 159) | def apply_complexity_filter(mols: List[Chem.Mol], max_complexity: float,...
function apply_constraints (line 176) | def apply_constraints(mols: List[Chem.Mol], constraints: Dict, n_jobs: i...
function apply_chemical_groups (line 191) | def apply_chemical_groups(mols: List[Chem.Mol], groups: List[str]) -> pd...
function generate_summary (line 205) | def generate_summary(df: pd.DataFrame, output_file: Path):
function main (line 289) | def main():
FILE: scientific-skills/neuropixels-analysis/assets/analysis_template.py
function main (line 59) | def main():
FILE: scientific-skills/neuropixels-analysis/scripts/compute_metrics.py
function compute_metrics (line 40) | def compute_metrics(
function main (line 157) | def main():
FILE: scientific-skills/neuropixels-analysis/scripts/explore_recording.py
function explore_recording (line 15) | def explore_recording(data_path: str, stream_id: str = 'imec0.ap'):
function plot_probe (line 69) | def plot_probe(recording, output_path=None):
function plot_traces (line 82) | def plot_traces(recording, duration=1.0, output_path=None):
function plot_power_spectrum (line 110) | def plot_power_spectrum(recording, output_path=None):
FILE: scientific-skills/neuropixels-analysis/scripts/export_to_phy.py
function export_phy (line 16) | def export_phy(
function main (line 57) | def main():
FILE: scientific-skills/neuropixels-analysis/scripts/neuropixels_pipeline.py
function load_recording (line 22) | def load_recording(data_path: str, stream_id: str = 'imec0.ap') -> si.Ba...
function preprocess (line 47) | def preprocess(
function check_drift (line 87) | def check_drift(recording: si.BaseRecording, output_folder: str) -> dict:
function correct_motion (line 152) | def correct_motion(
function run_spike_sorting (line 174) | def run_spike_sorting(
function postprocess (line 197) | def postprocess(
function curate_units (line 236) | def curate_units(qm, method: str = 'allen') -> dict:
function export_results (line 300) | def export_results(
function run_pipeline (line 348) | def run_pipeline(
FILE: scientific-skills/neuropixels-analysis/scripts/preprocess_recording.py
function preprocess_recording (line 15) | def preprocess_recording(
function main (line 92) | def main():
FILE: scientific-skills/neuropixels-analysis/scripts/run_sorting.py
function run_sorting (line 36) | def run_sorting(
function main (line 79) | def main():
FILE: scientific-skills/open-notebook/scripts/chat_interaction.py
function create_chat_session (line 21) | def create_chat_session(notebook_id, title, model_override=None):
function list_chat_sessions (line 36) | def list_chat_sessions(notebook_id):
function send_chat_message (line 51) | def send_chat_message(session_id, message, include_sources=True,
function get_session_history (line 72) | def get_session_history(session_id):
function build_context (line 86) | def build_context(notebook_id, source_ids=None, note_ids=None):
function search_knowledge_base (line 101) | def search_knowledge_base(query, search_type="vector", limit=5):
function ask_question (line 118) | def ask_question(query):
function delete_chat_session (line 130) | def delete_chat_session(session_id):
FILE: scientific-skills/open-notebook/scripts/notebook_management.py
function create_notebook (line 21) | def create_notebook(name, description=""):
function list_notebooks (line 33) | def list_notebooks(archived=False):
function get_notebook (line 48) | def get_notebook(notebook_id):
function update_notebook (line 55) | def update_notebook(notebook_id, name=None, description=None, archived=N...
function delete_notebook (line 73) | def delete_notebook(notebook_id, delete_sources=False):
function link_source_to_notebook (line 90) | def link_source_to_notebook(notebook_id, source_id):
function unlink_source_from_notebook (line 99) | def unlink_source_from_notebook(notebook_id, source_id):
FILE: scientific-skills/open-notebook/scripts/source_ingestion.py
function add_url_source (line 22) | def add_url_source(notebook_id, url, process_async=True):
function add_text_source (line 35) | def add_text_source(notebook_id, title, text):
function upload_file_source (line 48) | def upload_file_source(notebook_id, file_path, process_async=True):
function wait_for_processing (line 66) | def wait_for_processing(source_id, poll_interval=5, timeout=300):
function list_sources (line 85) | def list_sources(notebook_id=None, limit=20):
function get_source_insights (line 99) | def get_source_insights(source_id):
function retry_failed_source (line 106) | def retry_failed_source(source_id):
function delete_source (line 114) | def delete_source(source_id):
FILE: scientific-skills/open-notebook/scripts/test_open_notebook_skill.py
class TestSkillDirectoryStructure (line 26) | class TestSkillDirectoryStructure(unittest.TestCase):
method test_skill_directory_exists (line 29) | def test_skill_directory_exists(self):
method test_skill_md_exists (line 36) | def test_skill_md_exists(self):
method test_references_directory_exists (line 43) | def test_references_directory_exists(self):
method test_scripts_directory_exists (line 50) | def test_scripts_directory_exists(self):
class TestSkillMdFrontmatter (line 58) | class TestSkillMdFrontmatter(unittest.TestCase):
method setUpClass (line 62) | def setUpClass(cls):
method test_has_yaml_frontmatter (line 69) | def test_has_yaml_frontmatter(self):
method test_frontmatter_has_name (line 81) | def test_frontmatter_has_name(self):
method test_frontmatter_has_description (line 86) | def test_frontmatter_has_description(self):
method test_frontmatter_has_license (line 99) | def test_frontmatter_has_license(self):
method test_frontmatter_has_metadata_author (line 104) | def test_frontmatter_has_metadata_author(self):
class TestSkillMdContent (line 111) | class TestSkillMdContent(unittest.TestCase):
method setUpClass (line 115) | def setUpClass(cls):
method test_has_title_heading (line 119) | def test_has_title_heading(self):
method test_has_overview_section (line 126) | def test_has_overview_section(self):
method test_has_quick_start_section (line 134) | def test_has_quick_start_section(self):
method test_has_docker_setup (line 142) | def test_has_docker_setup(self):
method test_has_api_base_url (line 147) | def test_has_api_base_url(self):
method test_mentions_notebooklm_alternative (line 151) | def test_mentions_notebooklm_alternative(self):
method test_mentions_self_hosted (line 159) | def test_mentions_self_hosted(self):
method test_mentions_multiple_ai_providers (line 167) | def test_mentions_multiple_ai_providers(self):
method test_has_core_features_section (line 181) | def test_has_core_features_section(self):
method test_has_api_reference_section (line 192) | def test_has_api_reference_section(self):
method test_has_python_code_examples (line 200) | def test_has_python_code_examples(self):
method test_has_bash_code_examples (line 204) | def test_has_bash_code_examples(self):
method test_has_installation_instructions (line 211) | def test_has_installation_instructions(self):
method test_has_environment_variable_info (line 219) | def test_has_environment_variable_info(self):
method test_has_kdense_suggestion (line 227) | def test_has_kdense_suggestion(self):
method test_content_length_sufficient (line 235) | def test_content_length_sufficient(self):
class TestReferenceFiles (line 244) | class TestReferenceFiles(unittest.TestCase):
method _read_reference (line 247) | def _read_reference(self, filename):
method test_api_reference_exists_and_comprehensive (line 257) | def test_api_reference_exists_and_comprehensive(self):
method test_api_reference_has_http_methods (line 269) | def test_api_reference_has_http_methods(self):
method test_examples_reference_exists (line 279) | def test_examples_reference_exists(self):
method test_configuration_reference_exists (line 285) | def test_configuration_reference_exists(self):
method test_architecture_reference_exists (line 299) | def test_architecture_reference_exists(self):
class TestExampleScripts (line 312) | class TestExampleScripts(unittest.TestCase):
method _check_script (line 315) | def _check_script(self, filename):
method test_notebook_management_script_exists (line 330) | def test_notebook_management_script_exists(self):
method test_source_ingestion_script_exists (line 336) | def test_source_ingestion_script_exists(self):
method test_chat_interaction_script_exists (line 341) | def test_chat_interaction_script_exists(self):
class TestMarketplaceJson (line 347) | class TestMarketplaceJson(unittest.TestCase):
method setUpClass (line 351) | def setUpClass(cls):
method test_marketplace_has_open_notebook_skill (line 355) | def test_marketplace_has_open_notebook_skill(self):
method test_marketplace_valid_json (line 365) | def test_marketplace_valid_json(self):
class TestSkillMdApiEndpointCoverage (line 373) | class TestSkillMdApiEndpointCoverage(unittest.TestCase):
method setUpClass (line 377) | def setUpClass(cls):
method test_covers_notebook_endpoints (line 385) | def test_covers_notebook_endpoints(self):
method test_covers_source_endpoints (line 389) | def test_covers_source_endpoints(self):
method test_covers_note_endpoints (line 393) | def test_covers_note_endpoints(self):
method test_covers_chat_endpoints (line 397) | def test_covers_chat_endpoints(self):
method test_covers_search_endpoints (line 401) | def test_covers_search_endpoints(self):
method test_covers_podcast_endpoints (line 405) | def test_covers_podcast_endpoints(self):
method test_covers_transformation_endpoints (line 409) | def test_covers_transformation_endpoints(self):
method test_covers_model_management (line 413) | def test_covers_model_management(self):
method test_covers_credential_management (line 417) | def test_covers_credential_management(self):
FILE: scientific-skills/openalex-database/scripts/openalex_client.py
class OpenAlexClient (line 18) | class OpenAlexClient:
method __init__ (line 23) | def __init__(self, email: Optional[str] = None, requests_per_second: i...
method _rate_limit (line 36) | def _rate_limit(self):
method _make_request (line 44) | def _make_request(
method search_works (line 101) | def search_works(
method get_entity (line 144) | def get_entity(self, entity_type: str, entity_id: str) -> Dict[str, Any]:
method batch_lookup (line 158) | def batch_lookup(
method paginate_all (line 192) | def paginate_all(
method sample_works (line 238) | def sample_works(
method group_by (line 295) | def group_by(
FILE: scientific-skills/openalex-database/scripts/query_helpers.py
function find_author_works (line 12) | def find_author_works(
function find_institution_works (line 58) | def find_institution_works(
function find_highly_cited_recent_papers (line 103) | def find_highly_cited_recent_papers(
function get_open_access_papers (line 140) | def get_open_access_papers(
function get_publication_trends (line 176) | def get_publication_trends(
function analyze_research_output (line 208) | def analyze_research_output(
FILE: scientific-skills/opentargets-database/scripts/query_opentargets.py
function execute_query (line 21) | def execute_query(query: str, variables: Optional[Dict[str, Any]] = None...
function search_entities (line 53) | def search_entities(query_string: str, entity_types: Optional[List[str]]...
function get_target_info (line 85) | def get_target_info(ensembl_id: str, include_diseases: bool = False) -> ...
function get_disease_info (line 156) | def get_disease_info(efo_id: str, include_targets: bool = False) -> Dict...
function get_target_disease_evidence (line 206) | def get_target_disease_evidence(ensembl_id: str, efo_id: str,
function get_known_drugs_for_disease (line 245) | def get_known_drugs_for_disease(efo_id: str) -> Dict[str, Any]:
function get_drug_info (line 285) | def get_drug_info(chembl_id: str) -> Dict[str, Any]:
function get_target_associations (line 330) | def get_target_associations(ensembl_id: str, min_score: float = 0.0) -> ...
FILE: scientific-skills/opentrons-integration/scripts/basic_protocol_template.py
function run (line 25) | def run(protocol: protocol_api.ProtocolContext):
FILE: scientific-skills/opentrons-integration/scripts/pcr_setup_template.py
function run (line 23) | def run(protocol: protocol_api.ProtocolContext):
FILE: scientific-skills/opentrons-integration/scripts/serial_dilution_template.py
function run (line 23) | def run(protocol: protocol_api.ProtocolContext):
FILE: scientific-skills/parallel-web/scripts/parallel_web.py
function _get_api_key (line 29) | def _get_api_key():
function _get_extract_client (line 41) | def _get_extract_client():
class ParallelChat (line 53) | class ParallelChat:
method __init__ (line 66) | def __init__(self):
method query (line 80) | def query(
method _extract_basis (line 135) | def _extract_basis(self, response) -> List[Dict[str, str]]:
class ParallelSearch (line 165) | class ParallelSearch:
method __init__ (line 181) | def __init__(self):
method search (line 184) | def search(
class ParallelExtract (line 224) | class ParallelExtract:
method __init__ (line 232) | def __init__(self):
method extract (line 235) | def extract(
class ParallelDeepResearch (line 300) | class ParallelDeepResearch:
method __init__ (line 319) | def __init__(self):
method research (line 322) | def research(
function _print_search_results (line 371) | def _print_search_results(result: Dict[str, Any], output_file=None):
function _print_extract_results (line 401) | def _print_extract_results(result: Dict[str, Any], output_file=None):
function _print_research_results (line 432) | def _print_research_results(result: Dict[str, Any], output_file=None):
function main (line 467) | def main():
FILE: scientific-skills/pdf/scripts/check_bounding_boxes.py
class RectAndField (line 9) | class RectAndField:
function get_bounding_box_messages (line 15) | def get_bounding_box_messages(fields_json_stream) -> list[str]:
FILE: scientific-skills/pdf/scripts/convert_pdf_to_images.py
function convert (line 9) | def convert(pdf_path, output_dir, max_dim=1000):
FILE: scientific-skills/pdf/scripts/create_validation_image.py
function create_validation_image (line 9) | def create_validation_image(page_number, fields_json_path, input_path, o...
FILE: scientific-skills/pdf/scripts/extract_form_field_info.py
function get_full_annotation_field_id (line 9) | def get_full_annotation_field_id(annotation):
function make_field_dict (line 19) | def make_field_dict(field, field_id):
function get_field_info (line 47) | def get_field_info(reader: PdfReader):
function write_field_info (line 110) | def write_field_info(pdf_path: str, json_output_path: str):
FILE: scientific-skills/pdf/scripts/extract_form_structure.py
function extract_form_structure (line 20) | def extract_form_structure(pdf_path):
function main (line 91) | def main():
FILE: scientific-skills/pdf/scripts/fill_fillable_fields.py
function fill_pdf_fields (line 11) | def fill_pdf_fields(input_pdf_path: str, fields_json_path: str, output_p...
function validation_error_for_field_value (line 55) | def validation_error_for_field_value(field_info, field_value):
function monkeypatch_pydpf_method (line 74) | def monkeypatch_pydpf_method():
FILE: scientific-skills/pdf/scripts/fill_pdf_form_with_annotations.py
function transform_from_image_coords (line 10) | def transform_from_image_coords(bbox, image_width, image_height, pdf_wid...
function transform_from_pdf_coords (line 23) | def transform_from_pdf_coords(bbox, pdf_height):
function fill_pdf_form (line 33) | def fill_pdf_form(input_pdf_path, fields_json_path, output_pdf_path):
FILE: scientific-skills/perplexity-search/scripts/perplexity_search.py
function check_dependencies (line 26) | def check_dependencies():
function check_api_key (line 37) | def check_api_key() -> Optional[str]:
function search_with_perplexity (line 52) | def search_with_perplexity(
function main (line 135) | def main():
FILE: scientific-skills/perplexity-search/scripts/setup_env.py
function create_env_file (line 20) | def create_env_file(api_key: str, env_file: str = ".env") -> bool:
function validate_setup (line 61) | def validate_setup() -> bool:
function main (line 104) | def main():
FILE: scientific-skills/phylogenetics/scripts/phylogenetic_analysis.py
function check_dependencies (line 22) | def check_dependencies():
function count_sequences (line 42) | def count_sequences(fasta_file: str) -> int:
function run_mafft (line 48) | def run_mafft(input_fasta: str, output_fasta: str, n_threads: int = 4,
function run_iqtree (line 79) | def run_iqtree(aligned_fasta: str, prefix: str, seq_type: str = "nt",
function run_fasttree (line 118) | def run_fasttree(aligned_fasta: str, output_tree: str, seq_type: str = "...
function visualize_tree (line 137) | def visualize_tree(tree_file: str, output_png: str, outgroup: str = None...
function tree_summary (line 174) | def tree_summary(tree_file: str) -> dict:
function main (line 204) | def main():
FILE: scientific-skills/pptx/scripts/add_slide.py
function get_next_slide_number (line 27) | def get_next_slide_number(slides_dir: Path) -> int:
function create_slide_from_layout (line 33) | def create_slide_from_layout(unpacked_dir: Path, layout_file: str) -> None:
function duplicate_slide (line 90) | def duplicate_slide(unpacked_dir: Path, source: str) -> None:
function _add_to_content_types (line 130) | def _add_to_content_types(unpacked_dir: Path, dest: str) -> None:
function _add_to_presentation_rels (line 141) | def _add_to_presentation_rels(unpacked_dir: Path, dest: str) -> str:
function _get_next_slide_id (line 158) | def _get_next_slide_id(unpacked_dir: Path) -> int:
function parse_source (line 165) | def parse_source(source: str) -> tuple[str, str | None]:
FILE: scientific-skills/pptx/scripts/clean.py
function get_slides_in_sldidlst (line 27) | def get_slides_in_sldidlst(unpacked_dir: Path) -> set[str]:
function remove_orphaned_slides (line 49) | def remove_orphaned_slides(unpacked_dir: Path) -> list[str]:
function remove_trash_directory (line 91) | def remove_trash_directory(unpacked_dir: Path) -> list[str]:
function get_slide_referenced_files (line 106) | def get_slide_referenced_files(unpacked_dir: Path) -> set:
function remove_orphaned_rels_files (line 128) | def remove_orphaned_rels_files(unpacked_dir: Path) -> list[str]:
function get_referenced_files (line 153) | def get_referenced_files(unpacked_dir: Path) -> set:
function remove_orphaned_files (line 171) | def remove_orphaned_files(unpacked_dir: Path, referenced: set) -> list[s...
function update_content_types (line 221) | def update_content_types(unpacked_dir: Path, removed_files: list[str]) -...
function clean_unused_files (line 241) | def clean_unused_files(unpacked_dir: Path) -> list[str]:
FILE: scientific-skills/pptx/scripts/office/helpers/merge_runs.py
function merge_runs (line 16) | def merge_runs(input_dir: str) -> tuple[int, str]:
function _find_elements (line 44) | def _find_elements(root, tag: str) -> list:
function _get_child (line 59) | def _get_child(parent, tag: str):
function _get_children (line 68) | def _get_children(parent, tag: str) -> list:
function _is_adjacent (line 78) | def _is_adjacent(elem1, elem2) -> bool:
function _remove_elements (line 93) | def _remove_elements(root, tag: str):
function _strip_run_rsid_attrs (line 99) | def _strip_run_rsid_attrs(root):
function _merge_runs_in (line 108) | def _merge_runs_in(container) -> int:
function _first_child_run (line 128) | def _first_child_run(container):
function _next_element_sibling (line 135) | def _next_element_sibling(node):
function _next_sibling_run (line 144) | def _next_sibling_run(node):
function _is_run (line 154) | def _is_run(node) -> bool:
function _can_merge (line 159) | def _can_merge(run1, run2) -> bool:
function _merge_run_content (line 170) | def _merge_run_content(target, source):
function _consolidate_text (line 178) | def _consolidate_text(run):
FILE: scientific-skills/pptx/scripts/office/helpers/simplify_redlines.py
function simplify_redlines (line 22) | def simplify_redlines(input_dir: str) -> tuple[int, str]:
function _merge_tracked_changes_in (line 47) | def _merge_tracked_changes_in(container, tag: str) -> int:
function _is_element (line 75) | def _is_element(node, tag: str) -> bool:
function _get_author (line 80) | def _get_author(elem) -> str:
function _can_merge_tracked (line 89) | def _can_merge_tracked(elem1, elem2) -> bool:
function _merge_tracked_content (line 104) | def _merge_tracked_content(target, source):
function _find_elements (line 111) | def _find_elements(root, tag: str) -> list:
function get_tracked_change_authors (line 126) | def get_tracked_change_authors(doc_xml_path: Path) -> dict[str, int]:
function _get_authors_from_docx (line 149) | def _get_authors_from_docx(docx_path: Path) -> dict[str, int]:
function infer_author (line 172) | def infer_author(modified_dir: Path, original_docx: Path, default: str =...
FILE: scientific-skills/pptx/scripts/office/pack.py
function pack (line 24) | def pack(
function _run_validation (line 69) | def _run_validation(
function _condense_xml (line 108) | def _condense_xml(xml_file: Path) -> None:
FILE: scientific-skills/pptx/scripts/office/soffice.py
function get_soffice_env (line 24) | def get_soffice_env() -> dict:
function run_soffice (line 35) | def run_soffice(args: list[str], **kwargs) -> subprocess.CompletedProcess:
function _needs_shim (line 44) | def _needs_shim() -> bool:
function _ensure_shim (line 53) | def _ensure_shim() -> Path:
FILE: scientific-skills/pptx/scripts/office/unpack.py
function unpack (line 34) | def unpack(
function _pretty_print_xml (line 82) | def _pretty_print_xml(xml_file: Path) -> None:
function _escape_smart_quotes (line 91) | def _escape_smart_quotes(xml_file: Path) -> None:
FILE: scientific-skills/pptx/scripts/office/validate.py
function main (line 25) | def main():
FILE: scientific-skills/pptx/scripts/office/validators/base.py
class BaseSchemaValidator (line 12) | class BaseSchemaValidator:
method __init__ (line 94) | def __init__(self, unpacked_dir, original_file=None, verbose=False):
method validate (line 109) | def validate(self):
method repair (line 112) | def repair(self) -> int:
method repair_whitespace_preservation (line 115) | def repair_whitespace_preservation(self) -> int:
method validate_xml (line 143) | def validate_xml(self):
method validate_namespaces (line 170) | def validate_namespaces(self):
method validate_unique_ids (line 199) | def validate_unique_ids(self):
method validate_file_references (line 289) | def validate_file_references(self):
method validate_all_relationship_ids (line 385) | def validate_all_relationship_ids(self):
method _get_expected_relationship_type (line 469) | def _get_expected_relationship_type(self, element_name):
method validate_content_types (line 492) | def validate_content_types(self):
method validate_file_against_xsd (line 598) | def validate_file_against_xsd(self, xml_file, verbose=False):
method validate_against_xsd (line 636) | def validate_against_xsd(self):
method _get_schema_path (line 685) | def _get_schema_path(self, xml_file):
method _clean_ignorable_namespaces (line 703) | def _clean_ignorable_namespaces(self, xml_doc):
method _remove_ignorable_elements (line 723) | def _remove_ignorable_elements(self, root):
method _preprocess_for_mc_ignorable (line 742) | def _preprocess_for_mc_ignorable(self, xml_doc):
method _validate_single_file_xsd (line 750) | def _validate_single_file_xsd(self, xml_file, base_path):
method _get_original_file_errors (line 787) | def _get_original_file_errors(self, xml_file):
method _remove_template_tags_from_text_nodes (line 814) | def _remove_template_tags_from_text_nodes(self, xml_doc):
FILE: scientific-skills/pptx/scripts/office/validators/docx.py
class DOCXSchemaValidator (line 16) | class DOCXSchemaValidator(BaseSchemaValidator):
method validate (line 24) | def validate(self):
method validate_whitespace_preservation (line 66) | def validate_whitespace_preservation(self):
method validate_deletions (line 112) | def validate_deletions(self):
method count_paragraphs_in_unpacked (line 163) | def count_paragraphs_in_unpacked(self):
method count_paragraphs_in_original (line 179) | def count_paragraphs_in_original(self):
method validate_insertions (line 202) | def validate_insertions(self):
method compare_paragraph_counts (line 243) | def compare_paragraph_counts(self):
method _parse_id_value (line 251) | def _parse_id_value(self, val: str, base: int = 16) -> int:
method validate_id_constraints (line 254) | def validate_id_constraints(self):
method validate_comment_markers (line 298) | def validate_comment_markers(self):
method repair (line 386) | def repair(self) -> int:
method repair_durableId (line 391) | def repair_durableId(self) -> int:
FILE: scientific-skills/pptx/scripts/office/validators/pptx.py
class PPTXSchemaValidator (line 10) | class PPTXSchemaValidator(BaseSchemaValidator):
method validate (line 25) | def validate(self):
method validate_uuid_ids (line 62) | def validate_uuid_ids(self):
method _looks_like_uuid (line 100) | def _looks_like_uuid(self, value):
method validate_slide_layout_ids (line 104) | def validate_slide_layout_ids(self):
method validate_no_duplicate_slide_layouts (line 172) | def validate_no_duplicate_slide_layouts(self):
method validate_notes_slide_references (line 210) | def validate_notes_slide_references(self):
FILE: scientific-skills/pptx/scripts/office/validators/redlining.py
class RedliningValidator (line 11) | class RedliningValidator:
method __init__ (line 13) | def __init__(self, unpacked_dir, original_docx, verbose=False, author=...
method repair (line 22) | def repair(self) -> int:
method validate (line 25) | def validate(self):
method _generate_detailed_diff (line 104) | def _generate_detailed_diff(self, original_text, modified_text):
method _get_git_word_diff (line 127) | def _get_git_word_diff(self, original_text, modified_text):
method _remove_author_tracked_changes (line 198) | def _remove_author_tracked_changes(self, root):
method _extract_text_content (line 229) | def _extract_text_content(self, root):
FILE: scientific-skills/pptx/scripts/thumbnail.py
function main (line 40) | def main():
function get_slide_info (line 95) | def get_slide_info(pptx_path: Path) -> list[dict]:
function build_slide_list (line 121) | def build_slide_list(
function create_hidden_placeholder (line 149) | def create_hidden_placeholder(size: tuple[int, int]) -> Image.Image:
function convert_to_images (line 158) | def convert_to_images(pptx_path: Path, temp_dir: Path) -> list[Path]:
function create_grids (line 196) | def create_grids(
function create_grid (line 225) | def create_grid(
FILE: scientific-skills/primekg/scripts/query_primekg.py
function _load_kg (line 9) | def _load_kg():
function search_nodes (line 17) | def search_nodes(name_query: str, node_type: Optional[str] = None) -> Li...
function get_neighbors (line 46) | def get_neighbors(node_id: Union[str, int], relation_type: Optional[str]...
function find_paths (line 76) | def find_paths(start_node_id: str, end_node_id: str, max_depth: int = 2)...
function get_disease_context (line 105) | def get_disease_context(disease_name: str) -> Dict:
FILE: scientific-skills/pubchem-database/scripts/bioactivity_query.py
function rate_limited_request (line 28) | def rate_limited_request(url: str, method: str = 'GET', **kwargs) -> Opt...
function get_bioassay_summary (line 55) | def get_bioassay_summary(cid: int) -> Optional[Dict]:
function get_compound_bioactivities (line 73) | def get_compound_bioactivities(
function get_assay_description (line 110) | def get_assay_description(aid: int) -> Optional[Dict]:
function get_assay_targets (line 128) | def get_assay_targets(aid: int) -> List[str]:
function search_assays_by_target (line 160) | def search_assays_by_target(
function get_active_compounds_in_assay (line 185) | def get_active_compounds_in_assay(aid: int, max_results: int = 1000) -> ...
function get_compound_annotations (line 206) | def get_compound_annotations(cid: int, section: Optional[str] = None) ->...
function get_drug_information (line 229) | def get_drug_information(cid: int) -> Optional[Dict]:
function get_safety_hazards (line 242) | def get_safety_hazards(cid: int) -> Optional[Dict]:
function summarize_bioactivities (line 255) | def summarize_bioactivities(cid: int) -> Dict:
function find_compounds_by_bioactivity (line 291) | def find_compounds_by_bioactivity(
function main (line 336) | def main():
FILE: scientific-skills/pubchem-database/scripts/compound_search.py
function search_by_name (line 20) | def search_by_name(name: str, max_results: int = 10) -> List[pcp.Compound]:
function search_by_smiles (line 39) | def search_by_smiles(smiles: str) -> Optional[pcp.Compound]:
function get_compound_by_cid (line 57) | def get_compound_by_cid(cid: int) -> Optional[pcp.Compound]:
function get_compound_properties (line 74) | def get_compound_properties(
function similarity_search (line 110) | def similarity_search(
function substructure_search (line 140) | def substructure_search(
function get_synonyms (line 167) | def get_synonyms(identifier: Union[str, int], namespace: str = 'name') -...
function batch_search (line 188) | def batch_search(
function download_structure (line 213) | def download_structure(
function print_compound_info (line 242) | def print_compound_info(compound: pcp.Compound) -> None:
function main (line 265) | def main():
FILE: scientific-skills/pufferlib/scripts/env_template.py
class MyEnvironment (line 14) | class MyEnvironment(PufferEnv):
method __init__ (line 21) | def __init__(self, buf=None, grid_size=10, max_steps=1000):
method reset (line 66) | def reset(self):
method step (line 81) | def step(self, action):
method _apply_action (line 119) | def _apply_action(self, action):
method _compute_reward (line 131) | def _compute_reward(self):
method _is_done (line 145) | def _is_done(self):
method _get_observation (line 154) | def _get_observation(self):
class MultiAgentEnvironment (line 165) | class MultiAgentEnvironment(PufferEnv):
method __init__ (line 172) | def __init__(self, buf=None, num_agents=4, grid_size=10, max_steps=1000):
method reset (line 196) | def reset(self):
method step (line 212) | def step(self, actions):
method _apply_action (line 250) | def _apply_action(self, agent_idx, action):
method _compute_reward (line 269) | def _compute_reward(self, agent_idx):
method _is_done (line 276) | def _is_done(self, agent_idx):
method _get_obs (line 283) | def _get_obs(self, agent_idx):
function test_environment (line 299) | def test_environment():
FILE: scientific-skills/pufferlib/scripts/train_template.py
class Policy (line 18) | class Policy(nn.Module):
method __init__ (line 21) | def __init__(self, observation_space, action_space, hidden_size=256):
method forward (line 41) | def forward(self, observations):
function make_env (line 49) | def make_env():
function create_policy (line 62) | def create_policy(env):
function train (line 71) | def train(args):
function main (line 166) | def main():
FILE: scientific-skills/pydeseq2/scripts/run_deseq2_analysis.py
function load_and_validate_data (line 35) | def load_and_validate_data(counts_path, metadata_path, transpose_counts=...
function filter_data (line 66) | def filter_data(counts_df, metadata, min_counts=10, condition_col=None):
function run_deseq2 (line 93) | def run_deseq2(counts_df, metadata, design, n_cpus=1):
function run_statistical_tests (line 122) | def run_statistical_tests(dds, contrast, alpha=0.05, shrink_lfc=True):
function save_results (line 155) | def save_results(ds, dds, output_dir, shrink_lfc=True):
function create_plots (line 202) | def create_plots(ds, output_dir):
function main (line 268) | def main():
FILE: scientific-skills/pydicom/scripts/anonymize_dicom.py
function anonymize_dicom (line 40) | def anonymize_dicom(input_path, output_path, patient_id='ANONYMOUS', pat...
function main (line 91) | def main():
FILE: scientific-skills/pydicom/scripts/dicom_to_image.py
function apply_windowing (line 25) | def apply_windowing(pixel_array, ds):
function normalize_to_uint8 (line 34) | def normalize_to_uint8(pixel_array):
function convert_dicom_to_image (line 52) | def convert_dicom_to_image(input_path, output_path, image_format='PNG',
function main (line 113) | def main():
FILE: scientific-skills/pydicom/scripts/extract_metadata.py
function format_value (line 23) | def format_value(value):
function extract_metadata_text (line 38) | def extract_metadata_text(ds, show_sequences=False):
function extract_metadata_json (line 102) | def extract_metadata_json(ds):
function main (line 121) | def main():
FILE: scientific-skills/pymatgen/scripts/phase_diagram_generator.py
function get_api_key (line 37) | def get_api_key() -> str:
function generate_phase_diagram (line 48) | def generate_phase_diagram(chemsys: str, args):
function main (line 169) | def main():
FILE: scientific-skills/pymatgen/scripts/structure_analyzer.py
function analyze_structure (line 36) | def analyze_structure(struct: Structure, args) -> dict:
function main (line 194) | def main():
FILE: scientific-skills/pymatgen/scripts/structure_converter.py
function convert_structure (line 31) | def convert_structure(input_path: Path, output_path: Path = None, output...
function batch_convert (line 66) | def batch_convert(input_files: List[Path], output_dir: Path, output_form...
function main (line 87) | def main():
FILE: scientific-skills/pymc/scripts/model_comparison.py
function compare_models (line 27) | def compare_models(models_dict: Dict[str, az.InferenceData],
function check_loo_reliability (line 120) | def check_loo_reliability(models_dict: Dict[str, az.InferenceData],
function plot_model_comparison (line 195) | def plot_model_comparison(comparison, output_path=None, show=True):
function model_averaging (line 230) | def model_averaging(models_dict: Dict[str, az.InferenceData],
function cross_validation_comparison (line 291) | def cross_validation_comparison(models_dict: Dict[str, az.InferenceData],
FILE: scientific-skills/pymc/scripts/model_diagnostics.py
function check_diagnostics (line 23) | def check_diagnostics(idata, var_names=None, ess_threshold=400, rhat_thr...
function create_diagnostic_report (line 171) | def create_diagnostic_report(idata, var_names=None, output_dir='diagnost...
function compare_prior_posterior (line 269) | def compare_prior_posterior(idata, prior_idata, var_names=None, output_p...
FILE: scientific-skills/pymoo/scripts/custom_problem_example.py
class MyBiObjectiveProblem (line 15) | class MyBiObjectiveProblem(ElementwiseProblem):
method __init__ (line 28) | def __init__(self):
method _evaluate (line 38) | def _evaluate(self, x, out, *args, **kwargs):
class ConstrainedProblem (line 50) | class ConstrainedProblem(ElementwiseProblem):
method __init__ (line 65) | def __init__(self):
method _evaluate (line 74) | def _evaluate(self, x, out, *args, **kwargs):
function solve_custom_problem (line 92) | def solve_custom_problem():
function solve_constrained_problem (line 127) | def solve_constrained_problem():
FILE: scientific-skills/pymoo/scripts/decision_making_example.py
function run_optimization_for_decision_making (line 17) | def run_optimization_for_decision_making():
function apply_pseudo_weights (line 39) | def apply_pseudo_weights(result, weights):
function compare_different_preferences (line 61) | def compare_different_preferences(result):
function visualize_selected_solutions (line 95) | def visualize_selected_solutions(result, selections):
function find_extreme_solutions (line 115) | def find_extreme_solutions(result):
function main (line 135) | def main():
FILE: scientific-skills/pymoo/scripts/many_objective_example.py
function run_many_objective_optimization (line 16) | def run_many_objective_optimization():
FILE: scientific-skills/pymoo/scripts/multi_objective_example.py
function run_multi_objective_optimization (line 15) | def run_multi_objective_optimization():
FILE: scientific-skills/pymoo/scripts/single_objective_example.py
function run_single_objective_optimization (line 18) | def run_single_objective_optimization():
FILE: scientific-skills/pytdc/scripts/benchmark_evaluation.py
function load_benchmark_group (line 18) | def load_benchmark_group():
function single_dataset_evaluation (line 43) | def single_dataset_evaluation(group, dataset_name='Caco2_Wang'):
function multiple_datasets_evaluation (line 102) | def multiple_datasets_evaluation(group):
function custom_model_template (line 168) | def custom_model_template():
function multi_seed_statistics (line 221) | def multi_seed_statistics(predictions_dict):
function leaderboard_submission_guide (line 247) | def leaderboard_submission_guide():
function main (line 290) | def main():
FILE: scientific-skills/pytdc/scripts/load_and_split_data.py
function load_single_pred_example (line 18) | def load_single_pred_example():
function load_multi_pred_example (line 60) | def load_multi_pred_example():
function evaluation_example (line 123) | def evaluation_example(split):
function custom_split_example (line 163) | def custom_split_example():
function main (line 188) | def main():
FILE: scientific-skills/pytdc/scripts/molecular_generation.py
function load_generation_dataset (line 17) | def load_generation_dataset():
function single_oracle_example (line 43) | def single_oracle_example():
function multiple_oracles_example (line 71) | def multiple_oracles_example():
function batch_evaluation_example (line 118) | def batch_evaluation_example():
function goal_directed_generation_template (line 156) | def goal_directed_generation_template():
function distribution_learning_example (line 220) | def distribution_learning_example(train_smiles):
function available_oracles_info (line 272) | def available_oracles_info():
function constraint_satisfaction_example (line 309) | def constraint_satisfaction_example():
function main (line 360) | def main():
FILE: scientific-skills/pytorch-lightning/scripts/quick_trainer_setup.py
function basic_trainer (line 24) | def basic_trainer():
function debug_trainer (line 43) | def debug_trainer():
function production_single_gpu_trainer (line 62) | def production_single_gpu_trainer(
function multi_gpu_ddp_trainer (line 121) | def multi_gpu_ddp_trainer(
function large_model_fsdp_trainer (line 183) | def large_model_fsdp_trainer(
function deepspeed_trainer (line 244) | def deepspeed_trainer(
function hyperparameter_tuning_trainer (line 299) | def hyperparameter_tuning_trainer(max_epochs=50):
function overfit_test_trainer (line 321) | def overfit_test_trainer(num_batches=10):
function time_limited_trainer (line 341) | def time_limited_trainer(
function reproducible_trainer (line 375) | def reproducible_trainer(seed=42, max_epochs=100):
FILE: scientific-skills/pytorch-lightning/scripts/template_datamodule.py
class CustomDataset (line 13) | class CustomDataset(Dataset):
method __init__ (line 20) | def __init__(self, data_path, transform=None):
method __len__ (line 39) | def __len__(self):
method __getitem__ (line 43) | def __getitem__(self, idx):
class TemplateDataModule (line 62) | class TemplateDataModule(L.LightningDataModule):
method __init__ (line 79) | def __init__(
method prepare_data (line 98) | def prepare_data(self):
method setup (line 120) | def setup(self, stage: str = None):
method _get_train_transforms (line 167) | def _get_train_transforms(self):
method _get_test_transforms (line 185) | def _get_test_transforms(self):
method train_dataloader (line 201) | def train_dataloader(self):
method val_dataloader (line 218) | def val_dataloader(self):
method test_dataloader (line 234) | def test_dataloader(self):
method predict_dataloader (line 248) | def predict_dataloader(self):
method state_dict (line 264) | def state_dict(self):
method load_state_dict (line 273) | def load_state_dict(self, state_dict):
method teardown (line 284) | def teardown(self, stage: str = None):
FILE: scientific-skills/pytorch-lightning/scripts/template_lightning_module.py
class TemplateLightningModule (line 16) | class TemplateLightningModule(L.LightningModule):
method __init__ (line 26) | def __init__(
method forward (line 50) | def forward(self, x):
method training_step (line 62) | def training_step(self, batch, batch_idx):
method validation_step (line 90) | def validation_step(self, batch, batch_idx):
method test_step (line 112) | def test_step(self, batch, batch_idx):
method predict_step (line 134) | def predict_step(self, batch, batch_idx, dataloader_idx=0):
method configure_optimizers (line 151) | def configure_optimizers(self):
method on_train_epoch_end (line 187) | def on_train_epoch_end(self):
method on_validation_epoch_end (line 192) | def on_validation_epoch_end(self):
FILE: scientific-skills/rdkit/scripts/molecular_properties.py
function calculate_properties (line 25) | def calculate_properties(mol):
function process_single_molecule (line 92) | def process_single_molecule(smiles):
function process_file (line 103) | def process_file(input_file, output_file=None):
function write_csv (line 145) | def write_csv(results, output_file):
function print_properties (line 160) | def print_properties(props):
function main (line 205) | def main():
FILE: scientific-skills/rdkit/scripts/similarity_search.py
function generate_fingerprint (line 35) | def generate_fingerprint(mol, method='morgan', radius=2, n_bits=2048):
function load_molecules (line 60) | def load_molecules(file_path):
function similarity_search (line 97) | def similarity_search(query_mol, database, method='morgan', threshold=0.7,
function write_results (line 157) | def write_results(hits, output_file):
function print_results (line 176) | def print_results(hits, max_display=20):
function main (line 198) | def main():
FILE: scientific-skills/rdkit/scripts/substructure_filter.py
function load_molecules (line 67) | def load_molecules(file_path, keep_props=True):
function create_pattern_query (line 95) | def create_pattern_query(pattern_string):
function filter_molecules (line 111) | def filter_molecules(molecules, include_patterns=None, exclude_patterns=...
function write_molecules (line 191) | def write_molecules(molecules, output_file):
function write_report (line 213) | def write_report(match_info, output_file):
function print_summary (line 231) | def print_summary(total, filtered, match_info):
function main (line 254) | def main():
FILE: scientific-skills/reactome-database/scripts/reactome_query.py
class ReactomeClient (line 28) | class ReactomeClient:
method get_version (line 34) | def get_version(self) -> str:
method query_pathway (line 40) | def query_pathway(self, pathway_id: str) -> Dict:
method get_pathway_entities (line 46) | def get_pathway_entities(self, pathway_id: str) -> List[Dict]:
method search_pathways (line 54) | def search_pathways(self, term: str) -> List[Dict]:
method analyze_genes (line 63) | def analyze_genes(self, gene_list: List[str]) -> Dict:
method get_analysis_by_token (line 74) | def get_analysis_by_token(self, token: str) -> Dict:
function print_json (line 81) | def print_json(data):
function command_version (line 86) | def command_version():
function command_query (line 93) | def command_query(pathway_id: str):
function command_entities (line 121) | def command_entities(pathway_id: str):
function command_search (line 153) | def command_search(term: str):
function command_analyze (line 176) | def command_analyze(gene_file: str):
function print_usage (line 236) | def print_usage():
function main (line 241) | def main():
FILE: scientific-skills/research-lookup/examples.py
function example_automatic_selection (line 16) | def example_automatic_selection():
function example_manual_override (line 42) | def example_manual_override():
function example_batch_queries (line 67) | def example_batch_queries():
function example_scientific_writing_workflow (line 96) | def example_scientific_writing_workflow():
function main (line 132) | def main():
FILE: scientific-skills/research-lookup/lookup.py
function format_response (line 17) | def format_response(result: Dict) -> str:
function _detect_venue_tier (line 90) | def _detect_venue_tier(url: str) -> Optional[str]:
function main (line 149) | def main():
FILE: scientific-skills/research-lookup/research_lookup.py
class ResearchLookup (line 24) | class ResearchLookup:
method __init__ (line 61) | def __init__(self, force_backend: Optional[str] = None):
method _select_backend (line 79) | def _select_backend(self, query: str) -> str:
method _get_chat_client (line 105) | def _get_chat_client(self):
method _parallel_lookup (line 121) | def _parallel_lookup(self, query: str) -> Dict[str, Any]:
method _extract_basis_citations (line 168) | def _extract_basis_citations(self, response) -> List[Dict[str, str]]:
method _perplexity_lookup (line 201) | def _perplexity_lookup(self, query: str) -> Dict[str, Any]:
method _format_academic_prompt (line 299) | def _format_academic_prompt(self, query: str) -> str:
method _extract_api_citations (line 328) | def _extract_api_citations(self, response: Dict[str, Any], choice: Dic...
method _extract_citations_from_text (line 370) | def _extract_citations_from_text(self, text: str) -> List[Dict[str, st...
method lookup (line 408) | def lookup(self, query: str) -> Dict[str, Any]:
method batch_lookup (line 422) | def batch_lookup(self, queries: List[str], delay: float = 1.0) -> List...
function main (line 438) | def main():
FILE: scientific-skills/research-lookup/scripts/research_lookup.py
class ResearchLookup (line 24) | class ResearchLookup:
method __init__ (line 61) | def __init__(self, force_backend: Optional[str] = None):
method _select_backend (line 79) | def _select_backend(self, query: str) -> str:
method _get_chat_client (line 105) | def _get_chat_client(self):
method _parallel_lookup (line 121) | def _parallel_lookup(self, query: str) -> Dict[str, Any]:
method _extract_basis_citations (line 168) | def _extract_basis_citations(self, response) -> List[Dict[str, str]]:
method _perplexity_lookup (line 201) | def _perplexity_lookup(self, query: str) -> Dict[str, Any]:
method _format_academic_prompt (line 299) | def _format_academic_prompt(self, query: str) -> str:
method _extract_api_citations (line 328) | def _extract_api_citations(self, response: Dict[str, Any], choice: Dic...
method _extract_citations_from_text (line 370) | def _extract_citations_from_text(self, text: str) -> List[Dict[str, st...
method lookup (line 408) | def lookup(self, query: str) -> Dict[str, Any]:
method batch_lookup (line 422) | def batch_lookup(self, queries: List[str], delay: float = 1.0) -> List...
function main (line 438) | def main():
FILE: scientific-skills/scanpy/scripts/qc_analysis.py
function calculate_qc_metrics (line 17) | def calculate_qc_metrics(adata, mt_threshold=5, min_genes=200, min_cells...
function generate_qc_plots (line 54) | def generate_qc_plots(adata, output_prefix='qc'):
function filter_data (line 86) | def filter_data(adata, mt_threshold=5, min_genes=200, max_genes=None,
function main (line 138) | def main():
FILE: scientific-skills/scholar-evaluation/scripts/calculate_scores.py
function load_scores (line 47) | def load_scores(filepath: Path) -> Dict[str, float]:
function load_weights (line 70) | def load_weights(filepath: Optional[Path] = None) -> Dict[str, float]:
function calculate_weighted_average (line 96) | def calculate_weighted_average(scores: Dict[str, float], weights: Dict[s...
function get_quality_level (line 115) | def get_quality_level(score: float) -> tuple:
function generate_bar_chart (line 123) | def generate_bar_chart(scores: Dict[str, float], max_width: int = 50) ->...
function identify_strengths_weaknesses (line 137) | def identify_strengths_weaknesses(scores: Dict[str, float]) -> tuple:
function generate_report (line 147) | def generate_report(scores: Dict[str, float], weights: Dict[str, float],
function interactive_mode (line 245) | def interactive_mode():
function main (line 305) | def main():
FILE: scientific-skills/scientific-schematics/scripts/generate_schematic.py
function main (line 29) | def main():
FILE: scientific-skills/scientific-schematics/scripts/generate_schematic_ai.py
function _load_env_file (line 37) | def _load_env_file():
class ScientificSchematicGenerator (line 79) | class ScientificSchematicGenerator:
method __init__ (line 146) | def __init__(self, api_key: Optional[str] = None, verbose: bool = False):
method _log (line 180) | def _log(self, message: str):
method _make_request (line 185) | def _make_request(self, model: str, messages: List[Dict[str, Any]],
method _extract_image_from_response (line 241) | def _extract_image_from_response(self, response: Dict[str, Any]) -> Op...
method _image_to_base64 (line 322) | def _image_to_base64(self, image_path: str) -> str:
method generate_image (line 348) | def generate_image(self, prompt: str) -> Optional[bytes]:
method review_image (line 426) | def review_image(self, image_path: str, original_prompt: str,
method improve_prompt (line 580) | def improve_prompt(self, original_prompt: str, critique: str,
method generate_iterative (line 604) | def generate_iterative(self, user_prompt: str, output_path: str,
function main (line 753) | def main():
FILE: scientific-skills/scientific-slides/scripts/generate_slide_image.py
function main (line 32) | def main():
FILE: scientific-skills/scientific-slides/scripts/generate_slide_image_ai.py
function _load_env_file (line 49) | def _load_env_file():
class SlideImageGenerator (line 87) | class SlideImageGenerator:
method __init__ (line 170) | def __init__(self, api_key: Optional[str] = None, verbose: bool = False):
method _log (line 201) | def _log(self, message: str):
method _make_request (line 206) | def _make_request(self, model: str, messages: List[Dict[str, Any]],
method _extract_image_from_response (line 250) | def _extract_image_from_response(self, response: Dict[str, Any]) -> Op...
method _image_to_base64 (line 308) | def _image_to_base64(self, image_path: str) -> str:
method generate_image (line 325) | def generate_image(self, prompt: str, attachments: Optional[List[str]]...
method review_image (line 404) | def review_image(self, image_path: str, original_prompt: str,
method improve_prompt (line 518) | def improve_prompt(self, original_prompt: str, critique: str,
method generate_slide (line 532) | def generate_slide(self, user_prompt: str, output_path: str,
function main (line 682) | def main():
FILE: scientific-skills/scientific-slides/scripts/pdf_to_images.py
class PDFToImagesConverter (line 25) | class PDFToImagesConverter:
method __init__ (line 28) | def __init__(
method convert (line 48) | def convert(self) -> List[Path]:
method _convert_with_pymupdf (line 67) | def _convert_with_pymupdf(self) -> List[Path]:
function main (line 108) | def main():
FILE: scientific-skills/scientific-slides/scripts/slides_to_pdf.py
function get_image_files (line 31) | def get_image_files(paths: List[str]) -> List[Path]:
function combine_images_to_pdf (line 74) | def combine_images_to_pdf(image_paths: List[Path], output_path: Path,
function main (line 162) | def main():
FILE: scientific-skills/scientific-slides/scripts/validate_presentation.py
class PresentationValidator (line 34) | class PresentationValidator:
method __init__ (line 48) | def __init__(self, filepath: str, duration: Optional[int] = None):
method validate (line 56) | def validate(self) -> Dict:
method _check_file_size (line 82) | def _check_file_size(self):
method _validate_pdf (line 98) | def _validate_pdf(self):
method _validate_pptx (line 147) | def _validate_pptx(self):
method _check_pptx_content (line 181) | def _check_pptx_content(self, prs):
method _validate_latex (line 220) | def _validate_latex(self):
method _try_compile_latex (line 243) | def _try_compile_latex(self) -> bool:
method _check_slide_count (line 257) | def _check_slide_count(self, num_slides: int):
method _format_results (line 291) | def _format_results(self) -> Dict:
function print_results (line 303) | def print_results(results: Dict):
function main (line 340) | def main():
FILE: scientific-skills/scientific-visualization/assets/color_palettes.py
function apply_palette (line 111) | def apply_palette(palette_name='okabe_ito'):
function get_palette (line 156) | def get_palette(palette_name='okabe_ito'):
FILE: scientific-skills/scientific-visualization/scripts/figure_export.py
function save_publication_figure (line 14) | def save_publication_figure(
function save_for_journal (line 98) | def save_for_journal(
function check_figure_size (line 187) | def check_figure_size(fig: plt.Figure, journal: str = 'nature') -> dict:
function verify_font_embedding (line 281) | def verify_font_embedding(pdf_path: Union[str, Path]) -> bool:
FILE: scientific-skills/scientific-visualization/scripts/style_presets.py
function get_base_style (line 35) | def get_base_style() -> Dict[str, Any]:
function apply_publication_style (line 111) | def apply_publication_style(style_name: str = 'default') -> None:
function set_color_palette (line 195) | def set_color_palette(palette_name: str = 'okabe_ito') -> None:
function configure_for_journal (line 234) | def configure_for_journal(journal: str, figure_width: str = 'single') ->...
function create_style_template (line 303) | def create_style_template(output_file: str = 'publication.mplstyle') -> ...
function show_color_palettes (line 335) | def show_color_palettes() -> None:
function reset_to_default (line 367) | def reset_to_default() -> None:
FILE: scientific-skills/scikit-learn/scripts/classification_pipeline.py
function create_preprocessing_pipeline (line 23) | def create_preprocessing_pipeline(numeric_features, categorical_features):
function train_and_evaluate_model (line 62) | def train_and_evaluate_model(X, y, numeric_features, categorical_features,
FILE: scientific-skills/scikit-learn/scripts/clustering_analysis.py
function preprocess_for_clustering (line 19) | def preprocess_for_clustering(X, scale=True, pca_components=None):
function find_optimal_k_kmeans (line 51) | def find_optimal_k_kmeans(X, k_range=range(2, 11)):
function compare_clustering_algorithms (line 111) | def compare_clustering_algorithms(X, n_clusters=3):
function visualize_clusters (line 201) | def visualize_clusters(X, results, true_labels=None):
function complete_clustering_analysis (line 277) | def complete_clustering_analysis(X, true_labels=None, scale=True,
FILE: scientific-skills/scvelo/scripts/rna_velocity_workflow.py
function run_velocity_analysis (line 21) | def run_velocity_analysis(
function load_from_loom (line 179) | def load_from_loom(loom_path, processed_h5ad=None):
FILE: scientific-skills/simpy/scripts/basic_simulation_template.py
class SimulationConfig (line 13) | class SimulationConfig:
method __init__ (line 16) | def __init__(self):
class SimulationStats (line 26) | class SimulationStats:
method __init__ (line 29) | def __init__(self):
method record_arrival (line 36) | def record_arrival(self, time):
method record_service_start (line 39) | def record_service_start(self, time):
method record_departure (line 42) | def record_departure(self, time):
method record_wait_time (line 45) | def record_wait_time(self, wait_time):
method record_service_time (line 48) | def record_service_time(self, service_time):
method report (line 51) | def report(self):
function customer_process (line 72) | def customer_process(env, name, resource, stats, config):
function customer_generator (line 114) | def customer_generator(env, resource, stats, config):
function run_simulation (line 137) | def run_simulation(config):
function main (line 174) | def main():
FILE: scientific-skills/simpy/scripts/resource_monitor.py
class ResourceMonitor (line 15) | class ResourceMonitor:
method __init__ (line 26) | def __init__(self, env: simpy.Environment, resource: simpy.Resource, n...
method _patch_resource (line 49) | def _patch_resource(self):
method average_queue_length (line 100) | def average_queue_length(self) -> float:
method average_utilization (line 117) | def average_utilization(self) -> float:
method average_wait_time (line 134) | def average_wait_time(self) -> float:
method max_queue_length (line 138) | def max_queue_length(self) -> int:
method report (line 142) | def report(self):
method export_csv (line 170) | def export_csv(self, filename: str):
class MultiResourceMonitor (line 194) | class MultiResourceMonitor:
method __init__ (line 197) | def __init__(self, env: simpy.Environment):
method add_resource (line 207) | def add_resource(self, resource: simpy.Resource, name: str):
method report_all (line 219) | def report_all(self):
method summary (line 224) | def summary(self):
class ContainerMonitor (line 240) | class ContainerMonitor:
method __init__ (line 243) | def __init__(self, env: simpy.Environment, container: simpy.Container,...
method _patch_container (line 259) | def _patch_container(self):
method average_level (line 285) | def average_level(self) -> float:
method report (line 302) | def report(self):
function example_process (line 322) | def example_process(env, name, resource, duration):
FILE: scientific-skills/stable-baselines3/scripts/custom_env_template.py
class CustomEnv (line 17) | class CustomEnv(gym.Env):
method __init__ (line 32) | def __init__(self, grid_size=5, render_mode=None):
method reset (line 78) | def reset(self, seed=None, options=None):
method step (line 106) | def step(self, action):
method _get_obs (line 154) | def _get_obs(self):
method _get_info (line 170) | def _get_info(self):
method render (line 185) | def render(self):
method close (line 216) | def close(self):
function validate_environment (line 232) | def validate_environment():
function test_environment (line 244) | def test_environment():
function train_on_custom_env (line 275) | def train_on_custom_env():
FILE: scientific-skills/stable-baselines3/scripts/evaluate_agent.py
function evaluate_agent (line 19) | def evaluate_agent(
function watch_agent (line 97) | def watch_agent(
function compare_models (line 162) | def compare_models(
FILE: scientific-skills/stable-baselines3/scripts/train_rl_agent.py
function train_agent (line 24) | def train_agent(
FILE: scientific-skills/statistical-analysis/scripts/assumption_checks.py
function check_normality (line 20) | def check_normality(
function check_normality_per_group (line 95) | def check_normality_per_group(
function check_homogeneity_of_variance (line 156) | def check_homogeneity_of_variance(
function check_linearity (line 236) | def check_linearity(
function detect_outliers (line 308) | def detect_outliers(
function comprehensive_assumption_check (line 409) | def comprehensive_assumption_check(
FILE: scientific-skills/string-database/scripts/string_api.py
function string_map_ids (line 24) | def string_map_ids(identifiers: Union[str, List[str]],
function string_network (line 72) | def string_network(identifiers: Union[str, List[str]],
function string_network_image (line 125) | def string_network_image(identifiers: Union[str, List[str]],
function string_interaction_partners (line 174) | def string_interaction_partners(identifiers: Union[str, List[str]],
function string_enrichment (line 218) | def string_enrichment(identifiers: Union[str, List[str]],
function string_ppi_enrichment (line 257) | def string_ppi_enrichment(identifiers: Union[str, List[str]],
function string_homology (line 299) | def string_homology(identifiers: Union[str, List[str]],
function string_version (line 337) | def string_version() -> str:
FILE: scientific-skills/timesfm-forecasting/examples/anomaly-detection/detect_anomalies.py
function detect_context_anomalies (line 50) | def detect_context_anomalies(
function build_synthetic_future (line 98) | def build_synthetic_future(
function detect_forecast_anomalies (line 121) | def detect_forecast_anomalies(
function plot_results (line 172) | def plot_results(
function main (line 391) | def main() -> None:
FILE: scientific-skills/timesfm-forecasting/examples/covariates-forecasting/demo_covariates.py
function generate_sales_data (line 49) | def generate_sales_data() -> dict:
function create_visualization (line 132) | def create_visualization(data: dict) -> None:
function demonstrate_api (line 405) | def demonstrate_api() -> None:
function explain_xreg_modes (line 431) | def explain_xreg_modes() -> None:
function main (line 450) | def main() -> None:
FILE: scientific-skills/timesfm-forecasting/examples/global-temperature/generate_animation_data.py
function main (line 30) | def main() -> None:
FILE: scientific-skills/timesfm-forecasting/examples/global-temperature/generate_gif.py
function create_frame (line 26) | def create_frame(
function main (line 157) | def main() -> None:
FILE: scientific-skills/timesfm-forecasting/examples/global-temperature/generate_html.py
function main (line 521) | def main() -> None:
FILE: scientific-skills/timesfm-forecasting/examples/global-temperature/visualize_forecast.py
function main (line 30) | def main() -> None:
FILE: scientific-skills/timesfm-forecasting/scripts/check_system.py
class CheckResult (line 74) | class CheckResult:
method icon (line 81) | def icon(self) -> str:
method __str__ (line 84) | def __str__(self) -> str:
class SystemReport (line 89) | class SystemReport:
method passed (line 98) | def passed(self) -> bool:
method to_dict (line 101) | def to_dict(self) -> dict[str, Any]:
function _get_total_ram_gb (line 126) | def _get_total_ram_gb() -> float:
function _get_available_ram_gb (line 173) | def _get_available_ram_gb() -> float:
function check_ram (line 222) | def check_ram(profile: dict[str, Any]) -> CheckResult:
function check_gpu (line 262) | def check_gpu() -> CheckResult:
function check_disk (line 303) | def check_disk(profile: dict[str, Any]) -> CheckResult:
function check_python (line 336) | def check_python() -> CheckResult:
function check_package (line 357) | def check_package(pkg_name: str, import_name: str | None = None) -> Chec...
function recommend_batch_size (line 383) | def recommend_batch_size(report: SystemReport) -> int:
function run_checks (line 432) | def run_checks(model_version: str = "v2.5") -> SystemReport:
function print_report (line 474) | def print_report(report: SystemReport) -> None:
function main (line 491) | def main() -> None:
FILE: scientific-skills/timesfm-forecasting/scripts/forecast_csv.py
function run_preflight (line 32) | def run_preflight() -> dict:
function load_model (line 49) | def load_model(batch_size: int = 32):
function load_csv (line 78) | def load_csv(
function forecast_series (line 118) | def forecast_series(
function write_csv_output (line 144) | def write_csv_output(
function write_json_output (line 187) | def write_json_output(results: dict[str, dict], output_path: str) -> None:
function main (line 194) | def main() -> None:
FILE: scientific-skills/torch-geometric/scripts/benchmark_model.py
class GCN (line 24) | class GCN(torch.nn.Module):
method __init__ (line 25) | def __init__(self, num_features, hidden_channels, num_classes, dropout...
method forward (line 31) | def forward(self, x, edge_index, batch=None):
class GAT (line 41) | class GAT(torch.nn.Module):
method __init__ (line 42) | def __init__(self, num_features, hidden_channels, num_classes, heads=8...
method forward (line 49) | def forward(self, x, edge_index, batch=None):
class GraphSAGE (line 59) | class GraphSAGE(torch.nn.Module):
method __init__ (line 60) | def __init__(self, num_features, hidden_channels, num_classes, dropout...
method forward (line 66) | def forward(self, x, edge_index, batch=None):
function train_node_classification (line 83) | def train_node_classification(model, data, optimizer):
function test_node_classification (line 95) | def test_node_classification(model, data):
function train_graph_classification (line 109) | def train_graph_classification(model, loader, optimizer, device):
function test_graph_classification (line 127) | def test_graph_classification(model, loader, device):
function benchmark_node_classification (line 141) | def benchmark_node_classification(model_name, dataset_name, epochs, lr, ...
function benchmark_graph_classification (line 180) | def benchmark_graph_classification(model_name, dataset_name, epochs, lr,...
function run_benchmark (line 221) | def run_benchmark(args):
function main (line 289) | def main():
FILE: scientific-skills/torch-geometric/scripts/create_gnn_template.py
function generate_template (line 452) | def generate_template(model_type: str, task: str, output_path: str):
function list_templates (line 476) | def list_templates():
function main (line 488) | def main():
FILE: scientific-skills/torch-geometric/scripts/visualize_graph.py
function visualize_data (line 27) | def visualize_data(
function is_undirected (line 150) | def is_undirected(edge_index):
function plot_degree_distribution (line 163) | def plot_degree_distribution(data, output_path: Optional[str] = None):
function plot_graph_statistics (line 198) | def plot_graph_statistics(data, output_path: Optional[str] = None):
function main (line 248) | def main():
FILE: scientific-skills/treatment-plans/scripts/check_completeness.py
function read_file (line 42) | def read_file(filepath: Path) -> str:
function check_sections (line 55) | def check_sections(content: str) -> Tuple[List[bool], List[str]]:
function check_smart_goals (line 73) | def check_smart_goals(content: str) -> Tuple[bool, List[str]]:
function check_hipaa_notice (line 95) | def check_hipaa_notice(content: str) -> bool:
function check_provider_signature (line 101) | def check_provider_signature(content: str) -> bool:
function check_placeholders_remaining (line 107) | def check_placeholders_remaining(content: str) -> Tuple[int, List[str]]:
function display_results (line 127) | def display_results(filepath: Path, checklist: List[bool], missing: List...
function main (line 247) | def main():
FILE: scientific-skills/treatment-plans/scripts/generate_template.py
function get_templates_dir (line 49) | def get_templates_dir():
function list_templates (line 57) | def list_templates():
function interactive_selection (line 72) | def interactive_selection():
function get_output_filename (line 95) | def get_output_filename(template_key, custom_name=None):
function copy_template (line 108) | def copy_template(template_key, output_path):
function display_success (line 127) | def display_success(output_path, template_key):
function main (line 161) | def main():
FILE: scientific-skills/treatment-plans/scripts/timeline_generator.py
function extract_timeline_info (line 24) | def extract_timeline_info(content: str) -> Dict[str, List[Tuple[str, str...
function parse_timeframe_to_days (line 75) | def parse_timeframe_to_days(timeframe: str) -> Tuple[int, int]:
function create_text_timeline (line 117) | def create_text_timeline(timeline_data: Dict, output_file: Path = None):
function create_visual_timeline (line 161) | def create_visual_timeline(timeline_data: Dict, output_file: Path, start...
function main (line 267) | def main():
FILE: scientific-skills/uniprot-database/scripts/uniprot_client.py
function search_proteins (line 35) | def search_proteins(query: str, format: str = "json",
function get_protein (line 70) | def get_protein(accession: str, format: str = "json") -> str:
function batch_retrieve (line 92) | def batch_retrieve(accessions: List[str], format: str = "json",
function stream_results (line 109) | def stream_results(query: str, format: str = "fasta",
function map_ids (line 142) | def map_ids(ids: List[str], from_db: str, to_db: str,
function get_available_fields (line 203) | def get_available_fields() -> List[Dict]:
function get_id_mapping_databases (line 218) | def get_id_mapping_databases() -> Dict:
function main (line 233) | def main():
FILE: scientific-skills/uspto-database/scripts/patent_search.py
class PatentSearchClient (line 24) | class PatentSearchClient:
method __init__ (line 29) | def __init__(self, api_key: Optional[str] = None):
method _request (line 45) | def _request(self, endpoint: str, query: Dict, fields: Optional[List[s...
method search_patents (line 75) | def search_patents(self, query: Dict, fields: Optional[List[str]] = None,
method get_patent (line 116) | def get_patent(self, patent_number: str) -> Optional[Dict]:
method search_by_inventor (line 142) | def search_by_inventor(self, inventor_name: str, **kwargs) -> Dict:
method search_by_assignee (line 156) | def search_by_assignee(self, assignee_name: str, **kwargs) -> Dict:
method search_by_classification (line 170) | def search_by_classification(self, cpc_code: str, **kwargs) -> Dict:
method search_by_date_range (line 184) | def search_by_date_range(self, start_date: str, end_date: str, **kwarg...
method advanced_search (line 204) | def advanced_search(self, keywords: List[str], assignee: Optional[str]...
function main (line 252) | def main():
FILE: scientific-skills/uspto-database/scripts/peds_client.py
class PEDSHelper (line 29) | class PEDSHelper:
method __init__ (line 32) | def __init__(self):
method get_application (line 38) | def get_application(self, application_number: str) -> Optional[Dict]:
method get_patent (line 61) | def get_patent(self, patent_number: str) -> Optional[Dict]:
method get_transaction_history (line 78) | def get_transaction_history(self, application_number: str) -> List[Dict]:
method get_office_actions (line 93) | def get_office_actions(self, application_number: str) -> List[Dict]:
method get_status_summary (line 115) | def get_status_summary(self, application_number: str) -> Dict[str, Any]:
method analyze_prosecution (line 155) | def analyze_prosecution(self, application_number: str) -> Dict[str, Any]:
method _format_application_data (line 207) | def _format_application_data(self, raw_data: Dict) -> Dict:
function main (line 214) | def main():
FILE: scientific-skills/uspto-database/scripts/trademark_client.py
class TrademarkClient (line 23) | class TrademarkClient:
method __init__ (line 29) | def __init__(self, api_key: Optional[str] = None):
method get_trademark_by_serial (line 42) | def get_trademark_by_serial(self, serial_number: str) -> Optional[Dict]:
method get_trademark_by_registration (line 63) | def get_trademark_by_registration(self, registration_number: str) -> O...
method get_trademark_status (line 84) | def get_trademark_status(self, serial_or_registration: str) -> Dict[st...
method get_goods_and_services (line 123) | def get_goods_and_services(self, serial_or_registration: str) -> List[...
method get_owner_info (line 143) | def get_owner_info(self, serial_or_registration: str) -> List[Dict]:
method get_prosecution_history (line 163) | def get_prosecution_history(self, serial_or_registration: str) -> List...
method check_trademark_health (line 183) | def check_trademark_health(self, serial_or_registration: str) -> Dict[...
function main (line 226) | def main():
Copy disabled (too large)
Download .json
Condensed preview — 1340 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (16,613K chars).
[
{
"path": ".claude-plugin/marketplace.json",
"chars": 8135,
"preview": "{\n \"name\": \"claude-scientific-skills\",\n \"owner\": {\n \"name\": \"K-Dense Inc.\",\n \"email\": \"contact@k-dense.ai\"\n },\n"
},
{
"path": ".gitattributes",
"chars": 969,
"preview": "# Git LFS tracking for binary files\n\n# Images\n*.png filter=lfs diff=lfs merge=lfs -text\n*.jpg filter=lfs diff=lfs merge="
},
{
"path": ".github/workflows/release.yml",
"chars": 3394,
"preview": "name: Create Release\n\non:\n push:\n branches:\n - main\n paths:\n - '.claude-plugin/marketplace.json'\n work"
},
{
"path": ".gitignore",
"chars": 116,
"preview": ".claude\n.DS_Store\n\ntemp/\n\npyproject.toml\nuv.lock\n\n.venv/\n.python-version\nmain.py\n\n__pycache__/\n\n.env\n\nscan_skills.py"
},
{
"path": "LICENSE.md",
"chars": 1068,
"preview": "MIT License\n\nCopyright (c) 2025 K-Dense Inc.\n\nPermission is hereby granted, free of charge, to any person obtaining a co"
},
{
"path": "README.md",
"chars": 40750,
"preview": "# Claude Scientific Skills\n\n[](LICENSE.md)\n[![Skills"
},
{
"path": "docs/examples.md",
"chars": 112982,
"preview": "# Real-World Scientific Examples\n\nThis document provides comprehensive, practical examples demonstrating how to combine "
},
{
"path": "docs/open-source-sponsors.md",
"chars": 9385,
"preview": "# Support the Open Source Projects We Depend On\n\nClaude Scientific Skills is built on the shoulders of giants. The 139 s"
},
{
"path": "docs/scientific-skills.md",
"chars": 115678,
"preview": "# Scientific Skills\n\n## Scientific Databases\n\n- **AlphaFold DB** - Comprehensive AI-predicted protein structure database"
},
{
"path": "scientific-skills/adaptyv/SKILL.md",
"chars": 3742,
"preview": "---\nname: adaptyv\ndescription: Cloud laboratory platform for automated protein testing and validation. Use when designin"
},
{
"path": "scientific-skills/adaptyv/reference/api_reference.md",
"chars": 6665,
"preview": "# Adaptyv API Reference\n\n## Base URL\n\n```\nhttps://kq5jp7qj7wdqklhsxmovkzn4l40obksv.lambda-url.eu-central-1.on.aws\n```\n\n#"
},
{
"path": "scientific-skills/adaptyv/reference/examples.md",
"chars": 23061,
"preview": "# Code Examples\n\n## Setup and Authentication\n\n### Basic Setup\n\n```python\nimport os\nimport requests\nfrom dotenv import lo"
},
{
"path": "scientific-skills/adaptyv/reference/experiments.md",
"chars": 9926,
"preview": "# Experiment Types and Workflows\n\n## Overview\n\nAdaptyv provides multiple experimental assay types for comprehensive prot"
},
{
"path": "scientific-skills/adaptyv/reference/protein_optimization.md",
"chars": 17859,
"preview": "# Protein Sequence Optimization\n\n## Overview\n\nBefore submitting protein sequences for experimental testing, use computat"
},
{
"path": "scientific-skills/aeon/SKILL.md",
"chars": 10587,
"preview": "---\nname: aeon\ndescription: This skill should be used for time series machine learning tasks including classification, r"
},
{
"path": "scientific-skills/aeon/references/anomaly_detection.md",
"chars": 4912,
"preview": "# Anomaly Detection\n\nAeon provides anomaly detection methods for identifying unusual patterns in time series at both ser"
},
{
"path": "scientific-skills/aeon/references/classification.md",
"chars": 5377,
"preview": "# Time Series Classification\n\nAeon provides 13 categories of time series classifiers with scikit-learn compatible APIs.\n"
},
{
"path": "scientific-skills/aeon/references/clustering.md",
"chars": 3744,
"preview": "# Time Series Clustering\n\nAeon provides clustering algorithms adapted for temporal data with specialized distance metric"
},
{
"path": "scientific-skills/aeon/references/datasets_benchmarking.md",
"chars": 8871,
"preview": "# Datasets and Benchmarking\n\nAeon provides comprehensive tools for loading datasets and benchmarking time series algorit"
},
{
"path": "scientific-skills/aeon/references/distances.md",
"chars": 6408,
"preview": "# Distance Metrics\n\nAeon provides specialized distance functions for measuring similarity between time series, compatibl"
},
{
"path": "scientific-skills/aeon/references/forecasting.md",
"chars": 3879,
"preview": "# Time Series Forecasting\n\nAeon provides forecasting algorithms for predicting future time series values.\n\n## Naive and "
},
{
"path": "scientific-skills/aeon/references/networks.md",
"chars": 7849,
"preview": "# Deep Learning Networks\n\nAeon provides neural network architectures specifically designed for time series tasks. These "
},
{
"path": "scientific-skills/aeon/references/regression.md",
"chars": 3960,
"preview": "# Time Series Regression\n\nAeon provides time series regressors across 9 categories for predicting continuous values from"
},
{
"path": "scientific-skills/aeon/references/segmentation.md",
"chars": 4924,
"preview": "# Time Series Segmentation\n\nAeon provides algorithms to partition time series into regions with distinct characteristics"
},
{
"path": "scientific-skills/aeon/references/similarity_search.md",
"chars": 5240,
"preview": "# Similarity Search\n\nAeon provides tools for finding similar patterns within and across time series, including subsequen"
},
{
"path": "scientific-skills/aeon/references/transformations.md",
"chars": 7725,
"preview": "# Transformations\n\nAeon provides extensive transformation capabilities for preprocessing, feature extraction, and repres"
},
{
"path": "scientific-skills/alpha-vantage/SKILL.md",
"chars": 6241,
"preview": "---\nname: alpha-vantage\ndescription: Access real-time and historical stock market data, forex rates, cryptocurrency pric"
},
{
"path": "scientific-skills/alpha-vantage/references/commodities.md",
"chars": 3772,
"preview": "# Commodities APIs\n\nHistorical data for major commodities. All functions return `{\"name\": \"...\", \"interval\": \"...\", \"uni"
},
{
"path": "scientific-skills/alpha-vantage/references/economic-indicators.md",
"chars": 3872,
"preview": "# Economic Indicators APIs\n\nAll economic indicators return US data and follow the same response structure:\n\n```json\n{\n "
},
{
"path": "scientific-skills/alpha-vantage/references/forex-crypto.md",
"chars": 4146,
"preview": "# Forex (FX) & Cryptocurrency APIs\n\n## Foreign Exchange Rates\n\n### CURRENCY_EXCHANGE_RATE — Realtime Exchange Rate\n\nRetu"
},
{
"path": "scientific-skills/alpha-vantage/references/fundamentals.md",
"chars": 7485,
"preview": "# Fundamental Data APIs\n\n## OVERVIEW — Company Overview\n\nReturns key company information, valuation metrics, and financi"
},
{
"path": "scientific-skills/alpha-vantage/references/intelligence.md",
"chars": 4835,
"preview": "# Alpha Intelligence™ APIs\n\n## NEWS_SENTIMENT — Market News & Sentiment\n\nReturns live/historical news articles with sent"
},
{
"path": "scientific-skills/alpha-vantage/references/options.md",
"chars": 2735,
"preview": "# Options Data APIs (Premium)\n\nBoth options endpoints require a premium Alpha Vantage subscription.\n\n## REALTIME_OPTIONS"
},
{
"path": "scientific-skills/alpha-vantage/references/technical-indicators.md",
"chars": 12313,
"preview": "# Technical Indicators APIs\n\nAll technical indicators work with equities, forex pairs, and crypto. Calculated from adjus"
},
{
"path": "scientific-skills/alpha-vantage/references/time-series.md",
"chars": 4355,
"preview": "# Time Series Stock Data APIs\n\nBase URL: `https://www.alphavantage.co/query`\n\n## GLOBAL_QUOTE — Latest Price\n\nReturns th"
},
{
"path": "scientific-skills/alphafold-database/SKILL.md",
"chars": 16389,
"preview": "---\nname: alphafold-database\ndescription: Access AlphaFold 200M+ AI-predicted protein structures. Retrieve structures by"
},
{
"path": "scientific-skills/alphafold-database/references/api_reference.md",
"chars": 12517,
"preview": "# AlphaFold Database API Reference\n\nThis document provides comprehensive technical documentation for programmatic access"
},
{
"path": "scientific-skills/anndata/SKILL.md",
"chars": 10210,
"preview": "---\nname: anndata\ndescription: Data structure for annotated matrices in single-cell analysis. Use when working with .h5a"
},
{
"path": "scientific-skills/anndata/references/best_practices.md",
"chars": 12105,
"preview": "# Best Practices\n\nGuidelines for efficient and effective use of AnnData.\n\n## Memory Management\n\n### Use sparse matrices "
},
{
"path": "scientific-skills/anndata/references/concatenation.md",
"chars": 10504,
"preview": "# Concatenating AnnData Objects\n\nCombine multiple AnnData objects along either observations or variables axis.\n\n## Basic"
},
{
"path": "scientific-skills/anndata/references/data_structure.md",
"chars": 8744,
"preview": "# AnnData Object Structure\n\nThe AnnData object stores a data matrix with associated annotations, providing a flexible fr"
},
{
"path": "scientific-skills/anndata/references/io_operations.md",
"chars": 9456,
"preview": "# Input/Output Operations\n\nAnnData provides comprehensive I/O functionality for reading and writing data in various form"
},
{
"path": "scientific-skills/anndata/references/manipulation.md",
"chars": 11866,
"preview": "# Data Manipulation\n\nOperations for transforming, subsetting, and manipulating AnnData objects.\n\n## Subsetting\n\n### By i"
},
{
"path": "scientific-skills/arboreto/SKILL.md",
"chars": 6929,
"preview": "---\nname: arboreto\ndescription: Infer gene regulatory networks (GRNs) from gene expression data using scalable algorithm"
},
{
"path": "scientific-skills/arboreto/references/algorithms.md",
"chars": 4361,
"preview": "# GRN Inference Algorithms\n\nArboreto provides two algorithms for gene regulatory network (GRN) inference, both based on "
},
{
"path": "scientific-skills/arboreto/references/basic_inference.md",
"chars": 3811,
"preview": "# Basic GRN Inference with Arboreto\n\n## Input Data Requirements\n\nArboreto requires gene expression data in one of two fo"
},
{
"path": "scientific-skills/arboreto/references/distributed_computing.md",
"chars": 6734,
"preview": "# Distributed Computing with Arboreto\n\nArboreto leverages Dask for parallelized computation, enabling efficient GRN infe"
},
{
"path": "scientific-skills/arboreto/scripts/basic_grn_inference.py",
"chars": 2969,
"preview": "#!/usr/bin/env python3\n\"\"\"\nBasic GRN inference example using Arboreto.\n\nThis script demonstrates the standard workflow f"
},
{
"path": "scientific-skills/arxiv-database/SKILL.md",
"chars": 9774,
"preview": "---\nname: arxiv-database\ndescription: Search and retrieve preprints from arXiv via the Atom API. Use this skill when sea"
},
{
"path": "scientific-skills/arxiv-database/references/api_reference.md",
"chars": 12860,
"preview": "# arXiv API Reference\n\n## Overview\n\nThe arXiv API provides programmatic access to preprint metadata via an Atom XML feed"
},
{
"path": "scientific-skills/arxiv-database/scripts/arxiv_search.py",
"chars": 13600,
"preview": "#!/usr/bin/env python3\n\"\"\"\narXiv Search Tool\nSearch and retrieve preprints from arXiv via the Atom API.\nSupports keyword"
},
{
"path": "scientific-skills/astropy/SKILL.md",
"chars": 11534,
"preview": "---\nname: astropy\ndescription: Comprehensive Python library for astronomy and astrophysics. This skill should be used wh"
},
{
"path": "scientific-skills/astropy/references/coordinates.md",
"chars": 7111,
"preview": "# Astronomical Coordinates (astropy.coordinates)\n\nThe `astropy.coordinates` package provides tools for representing cele"
},
{
"path": "scientific-skills/astropy/references/cosmology.md",
"chars": 6482,
"preview": "# Cosmological Calculations (astropy.cosmology)\n\nThe `astropy.cosmology` subpackage provides tools for cosmological calc"
},
{
"path": "scientific-skills/astropy/references/fits.md",
"chars": 8670,
"preview": "# FITS File Handling (astropy.io.fits)\n\nThe `astropy.io.fits` module provides comprehensive tools for reading, writing, "
},
{
"path": "scientific-skills/astropy/references/tables.md",
"chars": 9385,
"preview": "# Table Operations (astropy.table)\n\nThe `astropy.table` module provides flexible tools for working with tabular data, wi"
},
{
"path": "scientific-skills/astropy/references/time.md",
"chars": 9083,
"preview": "# Time Handling (astropy.time)\n\nThe `astropy.time` module provides robust tools for manipulating times and dates with su"
},
{
"path": "scientific-skills/astropy/references/units.md",
"chars": 3764,
"preview": "# Units and Quantities (astropy.units)\n\nThe `astropy.units` module handles defining, converting between, and performing "
},
{
"path": "scientific-skills/astropy/references/wcs_and_other_modules.md",
"chars": 8515,
"preview": "# WCS and Other Astropy Modules\n\n## World Coordinate System (astropy.wcs)\n\nThe WCS module manages transformations betwee"
},
{
"path": "scientific-skills/benchling-integration/SKILL.md",
"chars": 13061,
"preview": "---\nname: benchling-integration\ndescription: Benchling R&D platform integration. Access registry (DNA, proteins), invent"
},
{
"path": "scientific-skills/benchling-integration/references/api_endpoints.md",
"chars": 14223,
"preview": "# Benchling REST API Endpoints Reference\n\n## Base URL\n\nAll API requests use the base URL format:\n```\nhttps://{tenant}.be"
},
{
"path": "scientific-skills/benchling-integration/references/authentication.md",
"chars": 10124,
"preview": "# Benchling Authentication Reference\n\n## Authentication Methods\n\nBenchling supports three authentication methods, each s"
},
{
"path": "scientific-skills/benchling-integration/references/sdk_reference.md",
"chars": 17618,
"preview": "# Benchling Python SDK Reference\n\n## Installation & Setup\n\n### Installation\n\n```bash\n# Stable release\npip install benchl"
},
{
"path": "scientific-skills/bgpt-paper-search/SKILL.md",
"chars": 2860,
"preview": "---\nname: bgpt-paper-search\ndescription: Search scientific papers and retrieve structured experimental data extracted fr"
},
{
"path": "scientific-skills/bindingdb-database/SKILL.md",
"chars": 12063,
"preview": "---\nname: bindingdb-database\ndescription: Query BindingDB for measured drug-target binding affinities (Ki, Kd, IC50, EC5"
},
{
"path": "scientific-skills/bindingdb-database/references/affinity_queries.md",
"chars": 5813,
"preview": "# BindingDB Affinity Query Reference\n\n## Affinity Measurement Types\n\n### Ki (Inhibition Constant)\n- **Definition**: Equi"
},
{
"path": "scientific-skills/biopython/SKILL.md",
"chars": 13828,
"preview": "---\nname: biopython\ndescription: Comprehensive molecular biology toolkit. Use for sequence manipulation, file parsing (F"
},
{
"path": "scientific-skills/biopython/references/advanced.md",
"chars": 14032,
"preview": "# Advanced Biopython Features\n\n## Sequence Motifs with Bio.motifs\n\n### Creating Motifs\n\n```python\nfrom Bio import motifs"
},
{
"path": "scientific-skills/biopython/references/alignment.md",
"chars": 8953,
"preview": "# Sequence Alignments with Bio.Align and Bio.AlignIO\n\n## Overview\n\nBio.Align provides tools for pairwise sequence alignm"
},
{
"path": "scientific-skills/biopython/references/blast.md",
"chars": 12649,
"preview": "# BLAST Operations with Bio.Blast\n\n## Overview\n\nBio.Blast provides tools for running BLAST searches (both locally and vi"
},
{
"path": "scientific-skills/biopython/references/databases.md",
"chars": 11506,
"preview": "# Database Access with Bio.Entrez\n\n## Overview\n\nBio.Entrez provides programmatic access to NCBI's Entrez databases, incl"
},
{
"path": "scientific-skills/biopython/references/phylogenetics.md",
"chars": 13835,
"preview": "# Phylogenetics with Bio.Phylo\n\n## Overview\n\nBio.Phylo provides a unified toolkit for reading, writing, analyzing, and v"
},
{
"path": "scientific-skills/biopython/references/sequence_io.md",
"chars": 7274,
"preview": "# Sequence Handling with Bio.Seq and Bio.SeqIO\n\n## Overview\n\nBio.Seq provides the `Seq` object for biological sequences "
},
{
"path": "scientific-skills/biopython/references/structure.md",
"chars": 12965,
"preview": "# Structural Bioinformatics with Bio.PDB\n\n## Overview\n\nBio.PDB provides tools for working with macromolecular 3D structu"
},
{
"path": "scientific-skills/biorxiv-database/SKILL.md",
"chars": 12551,
"preview": "---\nname: biorxiv-database\ndescription: Efficient database search tool for bioRxiv preprint server. Use this skill when "
},
{
"path": "scientific-skills/biorxiv-database/references/api_reference.md",
"chars": 6405,
"preview": "# bioRxiv API Reference\n\n## Overview\n\nThe bioRxiv API provides programmatic access to preprint metadata from the bioRxiv"
},
{
"path": "scientific-skills/biorxiv-database/scripts/biorxiv_search.py",
"chars": 14831,
"preview": "#!/usr/bin/env python3\n\"\"\"\nbioRxiv Search Tool\nA comprehensive Python tool for searching and retrieving preprints from b"
},
{
"path": "scientific-skills/bioservices/SKILL.md",
"chars": 9931,
"preview": "---\nname: bioservices\ndescription: Unified Python interface to 40+ bioinformatics services. Use when querying multiple d"
},
{
"path": "scientific-skills/bioservices/references/identifier_mapping.md",
"chars": 17511,
"preview": "# BioServices: Identifier Mapping Guide\n\nThis document provides comprehensive information about converting identifiers b"
},
{
"path": "scientific-skills/bioservices/references/services_reference.md",
"chars": 12564,
"preview": "# BioServices: Complete Services Reference\n\nThis document provides a comprehensive reference for all major services avai"
},
{
"path": "scientific-skills/bioservices/references/workflow_patterns.md",
"chars": 20118,
"preview": "# BioServices: Common Workflow Patterns\n\nThis document describes detailed multi-step workflows for common bioinformatics"
},
{
"path": "scientific-skills/bioservices/scripts/batch_id_converter.py",
"chars": 10860,
"preview": "#!/usr/bin/env python3\n\"\"\"\nBatch Identifier Converter\n\nThis script converts multiple identifiers between biological data"
},
{
"path": "scientific-skills/bioservices/scripts/compound_cross_reference.py",
"chars": 11251,
"preview": "#!/usr/bin/env python3\n\"\"\"\nCompound Cross-Database Search\n\nThis script searches for a compound by name and retrieves ide"
},
{
"path": "scientific-skills/bioservices/scripts/pathway_analysis.py",
"chars": 9529,
"preview": "#!/usr/bin/env python3\n\"\"\"\nKEGG Pathway Network Analysis\n\nThis script analyzes all pathways for an organism and extracts"
},
{
"path": "scientific-skills/bioservices/scripts/protein_analysis_workflow.py",
"chars": 12373,
"preview": "#!/usr/bin/env python3\n\"\"\"\nComplete Protein Analysis Workflow\n\nThis script performs a comprehensive protein analysis pip"
},
{
"path": "scientific-skills/brenda-database/SKILL.md",
"chars": 22162,
"preview": "---\nname: brenda-database\ndescription: Access BRENDA enzyme database via SOAP API. Retrieve kinetic parameters (Km, kcat"
},
{
"path": "scientific-skills/brenda-database/references/api_reference.md",
"chars": 15848,
"preview": "# BRENDA Database API Reference\n\n## Overview\n\nThis document provides detailed reference information for the BRENDA (BRau"
},
{
"path": "scientific-skills/brenda-database/scripts/brenda_queries.py",
"chars": 30862,
"preview": "\"\"\"\nBRENDA Database Query Utilities\n\nThis module provides high-level functions for querying and analyzing\nenzyme data fr"
},
{
"path": "scientific-skills/brenda-database/scripts/brenda_visualization.py",
"chars": 29346,
"preview": "\"\"\"\nBRENDA Database Visualization Utilities\n\nThis module provides visualization functions for BRENDA enzyme data,\ninclud"
},
{
"path": "scientific-skills/brenda-database/scripts/enzyme_pathway_builder.py",
"chars": 44849,
"preview": "\"\"\"\nEnzyme Pathway Builder for Retrosynthetic Analysis\n\nThis module provides tools for constructing enzymatic pathways a"
},
{
"path": "scientific-skills/cbioportal-database/SKILL.md",
"chars": 13086,
"preview": "---\nname: cbioportal-database\ndescription: Query cBioPortal for cancer genomics data including somatic mutations, copy n"
},
{
"path": "scientific-skills/cbioportal-database/references/study_exploration.md",
"chars": 4405,
"preview": "# cBioPortal Study Exploration Reference\n\n## Major Study Collections\n\n### TCGA (The Cancer Genome Atlas)\n\n| Study ID | C"
},
{
"path": "scientific-skills/cellxgene-census/SKILL.md",
"chars": 15439,
"preview": "---\nname: cellxgene-census\ndescription: Query the CELLxGENE Census (61M+ cells) programmatically. Use when you need expr"
},
{
"path": "scientific-skills/cellxgene-census/references/census_schema.md",
"chars": 5649,
"preview": "# CZ CELLxGENE Census Data Schema Reference\n\n## Overview\n\nThe CZ CELLxGENE Census is a versioned collection of single-ce"
},
{
"path": "scientific-skills/cellxgene-census/references/common_patterns.md",
"chars": 9935,
"preview": "# Common Query Patterns and Best Practices\n\n## Query Pattern Categories\n\n### 1. Exploratory Queries (Metadata Only)\n\nUse"
},
{
"path": "scientific-skills/chembl-database/SKILL.md",
"chars": 10250,
"preview": "---\nname: chembl-database\ndescription: Query ChEMBL bioactive molecules and drug discovery data. Search compounds by str"
},
{
"path": "scientific-skills/chembl-database/references/api_reference.md",
"chars": 6879,
"preview": "# ChEMBL Web Services API Reference\n\n## Overview\n\nChEMBL is a manually curated database of bioactive molecules with drug"
},
{
"path": "scientific-skills/chembl-database/scripts/example_queries.py",
"chars": 7160,
"preview": "#!/usr/bin/env python3\n\"\"\"\nChEMBL Database Query Examples\n\nThis script demonstrates common query patterns for the ChEMBL"
},
{
"path": "scientific-skills/cirq/SKILL.md",
"chars": 10647,
"preview": "---\nname: cirq\ndescription: Google quantum computing framework. Use when targeting Google Quantum AI hardware, designing"
},
{
"path": "scientific-skills/cirq/references/building.md",
"chars": 5910,
"preview": "# Building Quantum Circuits\n\nThis guide covers circuit construction in Cirq, including qubits, gates, operations, and ci"
},
{
"path": "scientific-skills/cirq/references/experiments.md",
"chars": 15443,
"preview": "# Running Quantum Experiments\n\nThis guide covers designing and executing quantum experiments, including parameter sweeps"
},
{
"path": "scientific-skills/cirq/references/hardware.md",
"chars": 11519,
"preview": "# Hardware Integration\n\nThis guide covers running quantum circuits on real quantum hardware through Cirq's device interf"
},
{
"path": "scientific-skills/cirq/references/noise.md",
"chars": 13352,
"preview": "# Noise Modeling and Mitigation\n\nThis guide covers noise models, noisy simulation, characterization, and error mitigatio"
},
{
"path": "scientific-skills/cirq/references/simulation.md",
"chars": 8225,
"preview": "# Simulation in Cirq\n\nThis guide covers quantum circuit simulation, including exact and noisy simulations, parameter swe"
},
{
"path": "scientific-skills/cirq/references/transformation.md",
"chars": 10138,
"preview": "# Circuit Transformations\n\nThis guide covers circuit optimization, compilation, and manipulation using Cirq's transforma"
},
{
"path": "scientific-skills/citation-management/SKILL.md",
"chars": 32592,
"preview": "---\nname: citation-management\ndescription: Comprehensive citation management for academic research. Search Google Schola"
},
{
"path": "scientific-skills/citation-management/assets/bibtex_template.bib",
"chars": 9201,
"preview": "% BibTeX Template File\n% Examples of properly formatted entries for all common types\n\n% ================================"
},
{
"path": "scientific-skills/citation-management/assets/citation_checklist.md",
"chars": 10287,
"preview": "# Citation Quality Checklist\n\nUse this checklist to ensure your citations are accurate, complete, and properly formatted"
},
{
"path": "scientific-skills/citation-management/references/bibtex_formatting.md",
"chars": 19254,
"preview": "# BibTeX Formatting Guide\n\nComprehensive guide to BibTeX entry types, required fields, formatting conventions, and best "
},
{
"path": "scientific-skills/citation-management/references/citation_validation.md",
"chars": 16042,
"preview": "# Citation Validation Guide\n\nComprehensive guide to validating citation accuracy, completeness, and formatting in BibTeX"
},
{
"path": "scientific-skills/citation-management/references/google_scholar_search.md",
"chars": 17027,
"preview": "# Google Scholar Search Guide\n\nComprehensive guide to searching Google Scholar for academic papers, including advanced s"
},
{
"path": "scientific-skills/citation-management/references/metadata_extraction.md",
"chars": 19182,
"preview": "# Metadata Extraction Guide\n\nComprehensive guide to extracting accurate citation metadata from DOIs, PMIDs, arXiv IDs, a"
},
{
"path": "scientific-skills/citation-management/references/pubmed_search.md",
"chars": 17721,
"preview": "# PubMed Search Guide\n\nComprehensive guide to searching PubMed for biomedical and life sciences literature, including Me"
},
{
"path": "scientific-skills/citation-management/scripts/doi_to_bibtex.py",
"chars": 6299,
"preview": "#!/usr/bin/env python3\n\"\"\"\nDOI to BibTeX Converter\nQuick utility to convert DOIs to BibTeX format using CrossRef API.\n\"\""
},
{
"path": "scientific-skills/citation-management/scripts/extract_metadata.py",
"chars": 20794,
"preview": "#!/usr/bin/env python3\n\"\"\"\nMetadata Extraction Tool\nExtract citation metadata from DOI, PMID, arXiv ID, or URL using var"
},
{
"path": "scientific-skills/citation-management/scripts/format_bibtex.py",
"chars": 11444,
"preview": "#!/usr/bin/env python3\n\"\"\"\nBibTeX Formatter and Cleaner\nFormat, clean, sort, and deduplicate BibTeX files.\n\"\"\"\n\nimport s"
},
{
"path": "scientific-skills/citation-management/scripts/search_google_scholar.py",
"chars": 8982,
"preview": "#!/usr/bin/env python3\n\"\"\"\nGoogle Scholar Search Tool\nSearch Google Scholar and export results.\n\nNote: This script requi"
},
{
"path": "scientific-skills/citation-management/scripts/search_pubmed.py",
"chars": 12832,
"preview": "#!/usr/bin/env python3\n\"\"\"\nPubMed Search Tool\nSearch PubMed using E-utilities API and export results.\n\"\"\"\n\nimport sys\nim"
},
{
"path": "scientific-skills/citation-management/scripts/validate_citations.py",
"chars": 17482,
"preview": "#!/usr/bin/env python3\n\"\"\"\nCitation Validation Tool\nValidate BibTeX files for accuracy, completeness, and format complia"
},
{
"path": "scientific-skills/clinical-decision-support/SKILL.md",
"chars": 26489,
"preview": "---\nname: clinical-decision-support\ndescription: Generate professional clinical decision support (CDS) documents for pha"
},
{
"path": "scientific-skills/clinical-decision-support/assets/biomarker_report_template.tex",
"chars": 13324,
"preview": "\\documentclass[10pt,letterpaper]{article}\n\n% Packages\n\\usepackage[margin=0.5in]{geometry}\n\\usepackage[utf8]{inputenc}\n\\u"
},
{
"path": "scientific-skills/clinical-decision-support/assets/clinical_pathway_template.tex",
"chars": 8311,
"preview": "\\documentclass[10pt,letterpaper,landscape]{article}\n\n% Landscape for wider flowcharts\n\\usepackage[margin=0.4in]{geometry"
},
{
"path": "scientific-skills/clinical-decision-support/assets/cohort_analysis_template.tex",
"chars": 12600,
"preview": "\\documentclass[10pt,letterpaper]{article}\n\n% Packages\n\\usepackage[margin=0.5in]{geometry}\n\\usepackage[utf8]{inputenc}\n\\u"
},
{
"path": "scientific-skills/clinical-decision-support/assets/color_schemes.tex",
"chars": 7029,
"preview": "% Clinical Decision Support Color Schemes\n% For use in LaTeX documents\n\n% =============================================="
},
{
"path": "scientific-skills/clinical-decision-support/assets/example_gbm_cohort.md",
"chars": 9310,
"preview": "# Example: GBM Molecular Subtype Cohort Analysis\n\n## Clinical Context\n\nThis example demonstrates a patient cohort analys"
},
{
"path": "scientific-skills/clinical-decision-support/assets/recommendation_strength_guide.md",
"chars": 11212,
"preview": "# Recommendation Strength Guide\n\n## GRADE Framework for Clinical Recommendations\n\n### Components of a Recommendation\n\nEv"
},
{
"path": "scientific-skills/clinical-decision-support/assets/treatment_recommendation_template.tex",
"chars": 18842,
"preview": "\\documentclass[10pt,letterpaper]{article}\n\n% Packages\n\\usepackage[margin=0.5in]{geometry}\n\\usepackage[utf8]{inputenc}\n\\u"
},
{
"path": "scientific-skills/clinical-decision-support/references/README.md",
"chars": 4849,
"preview": "# Clinical Decision Support Skill\n\nProfessional clinical decision support documents for medical professionals in pharmac"
},
{
"path": "scientific-skills/clinical-decision-support/references/biomarker_classification.md",
"chars": 28861,
"preview": "# Biomarker Classification and Interpretation Guide\n\n## Overview\n\nBiomarkers are measurable indicators of biological sta"
},
{
"path": "scientific-skills/clinical-decision-support/references/clinical_decision_algorithms.md",
"chars": 20543,
"preview": "# Clinical Decision Algorithms Guide\n\n## Overview\n\nClinical decision algorithms provide systematic, step-by-step guidanc"
},
{
"path": "scientific-skills/clinical-decision-support/references/evidence_synthesis.md",
"chars": 32021,
"preview": "# Evidence Synthesis and Guideline Integration Guide\n\n## Overview\n\nEvidence synthesis involves systematically reviewing,"
},
{
"path": "scientific-skills/clinical-decision-support/references/outcome_analysis.md",
"chars": 23811,
"preview": "# Outcome Analysis and Statistical Methods Guide\n\n## Overview\n\nRigorous outcome analysis is essential for clinical decis"
},
{
"path": "scientific-skills/clinical-decision-support/references/patient_cohort_analysis.md",
"chars": 16524,
"preview": "# Patient Cohort Analysis Guide\n\n## Overview\n\nPatient cohort analysis involves systematically studying groups of patient"
},
{
"path": "scientific-skills/clinical-decision-support/references/treatment_recommendations.md",
"chars": 19509,
"preview": "# Treatment Recommendations Guide\n\n## Overview\n\nEvidence-based treatment recommendations provide clinicians with systema"
},
{
"path": "scientific-skills/clinical-decision-support/scripts/biomarker_classifier.py",
"chars": 13310,
"preview": "#!/usr/bin/env python3\n\"\"\"\nBiomarker-Based Patient Stratification and Classification\n\nPerforms patient stratification ba"
},
{
"path": "scientific-skills/clinical-decision-support/scripts/build_decision_tree.py",
"chars": 15891,
"preview": "#!/usr/bin/env python3\n\"\"\"\nBuild Clinical Decision Tree Flowcharts in TikZ Format\n\nGenerates LaTeX/TikZ code for clinica"
},
{
"path": "scientific-skills/clinical-decision-support/scripts/create_cohort_tables.py",
"chars": 18015,
"preview": "#!/usr/bin/env python3\n\"\"\"\nGenerate Clinical Cohort Tables for Baseline Characteristics and Outcomes\n\nCreates publicatio"
},
{
"path": "scientific-skills/clinical-decision-support/scripts/generate_survival_analysis.py",
"chars": 15258,
"preview": "#!/usr/bin/env python3\n\"\"\"\nGenerate Kaplan-Meier Survival Curves for Clinical Decision Support Documents\n\nThis script cr"
},
{
"path": "scientific-skills/clinical-decision-support/scripts/validate_cds_document.py",
"chars": 12719,
"preview": "#!/usr/bin/env python3\n\"\"\"\nValidate Clinical Decision Support Documents for Quality and Completeness\n\nChecks for:\n- Evid"
},
{
"path": "scientific-skills/clinical-reports/SKILL.md",
"chars": 39686,
"preview": "---\nname: clinical-reports\ndescription: Write comprehensive clinical reports including case reports (CARE guidelines), d"
},
{
"path": "scientific-skills/clinical-reports/assets/case_report_template.md",
"chars": 9771,
"preview": "# Clinical Case Report Template\n\n## Title\n\n[Insert descriptive title that includes \"Case Report\" or \"Case Study\" and ind"
},
{
"path": "scientific-skills/clinical-reports/assets/clinical_trial_csr_template.md",
"chars": 9191,
"preview": "# Clinical Study Report (CSR) Template\n## ICH-E3 Format\n\n---\n\n# TITLE PAGE\n\n**Study Title:** [Full descriptive title inc"
},
{
"path": "scientific-skills/clinical-reports/assets/clinical_trial_sae_template.md",
"chars": 10565,
"preview": "# Serious Adverse Event (SAE) Report Template\n\n## Report Information\n\n**Report Type:** [ ] Initial Report [ ] Follow-up"
},
{
"path": "scientific-skills/clinical-reports/assets/consult_note_template.md",
"chars": 7338,
"preview": "# Consultation Note Template\n\n**Patient Name:** [Last, First] \n**Medical Record Number:** [MRN] \n**Date of Birth:** [M"
},
{
"path": "scientific-skills/clinical-reports/assets/discharge_summary_template.md",
"chars": 11982,
"preview": "# Discharge Summary Template\n\n## Patient Information\n\n**Patient Name:** [Last, First] \n**Medical Record Number:** [MRN]"
},
{
"path": "scientific-skills/clinical-reports/assets/hipaa_compliance_checklist.md",
"chars": 11798,
"preview": "# HIPAA Compliance Checklist for Clinical Reports\n\n## 18 HIPAA Identifiers - De-identification Checklist\n\nVerify that AL"
},
{
"path": "scientific-skills/clinical-reports/assets/history_physical_template.md",
"chars": 9258,
"preview": "# History and Physical Examination (H&P) Template\n\n**Patient Name:** [Last, First] \n**Medical Record Number:** [MRN] \n"
},
{
"path": "scientific-skills/clinical-reports/assets/lab_report_template.md",
"chars": 8456,
"preview": "# Laboratory Report Template\n\n## Patient Information\n\n**Patient Name:** [Last, First] \n**Medical Record Number:** [MRN]"
},
{
"path": "scientific-skills/clinical-reports/assets/pathology_report_template.md",
"chars": 6644,
"preview": "# Surgical Pathology Report Template\n\n## Patient and Specimen Information\n\n**Patient Name:** [Last, First] \n**Medical R"
},
{
"path": "scientific-skills/clinical-reports/assets/quality_checklist.md",
"chars": 9967,
"preview": "# Clinical Report Quality Assurance Checklist\n\n## General Quality Standards\n\n### Completeness\n- [ ] All required section"
},
{
"path": "scientific-skills/clinical-reports/assets/radiology_report_template.md",
"chars": 8009,
"preview": "# Radiology Report Template\n\n## Patient Information\n\n**Patient Name:** [Last, First] \n**Medical Record Number:** [MRN] "
},
{
"path": "scientific-skills/clinical-reports/assets/soap_note_template.md",
"chars": 7295,
"preview": "# SOAP Note Template\n\n## Patient Information\n\n**Patient Name:** [Last, First] or [Patient ID for teaching/research conte"
},
{
"path": "scientific-skills/clinical-reports/references/README.md",
"chars": 8511,
"preview": "# Clinical Reports Skill\n\n## Overview\n\nComprehensive skill for writing clinical reports including case reports, diagnost"
},
{
"path": "scientific-skills/clinical-reports/references/case_report_guidelines.md",
"chars": 18830,
"preview": "# Clinical Case Report Guidelines\n\n## CARE Guidelines (CAse REport)\n\nThe CARE guidelines provide a framework for transpa"
},
{
"path": "scientific-skills/clinical-reports/references/clinical_trial_reporting.md",
"chars": 20302,
"preview": "# Clinical Trial Reporting Standards\n\n## ICH-E3: Structure and Content of Clinical Study Reports\n\nThe International Coun"
},
{
"path": "scientific-skills/clinical-reports/references/data_presentation.md",
"chars": 19117,
"preview": "# Data Presentation in Clinical Reports\n\n## Tables for Clinical Data\n\n### Table Design Principles\n\n**General guidelines:"
},
{
"path": "scientific-skills/clinical-reports/references/diagnostic_reports_standards.md",
"chars": 18372,
"preview": "# Diagnostic Reports Standards\n\n## Radiology Reporting Standards\n\n### American College of Radiology (ACR) Guidelines\n\nTh"
},
{
"path": "scientific-skills/clinical-reports/references/medical_terminology.md",
"chars": 15468,
"preview": "# Medical Terminology and Coding Standards\n\n## Standard Nomenclature Systems\n\n### SNOMED CT (Systematized Nomenclature o"
},
{
"path": "scientific-skills/clinical-reports/references/patient_documentation.md",
"chars": 21255,
"preview": "# Patient Documentation Standards\n\n## SOAP Notes\n\nSOAP (Subjective, Objective, Assessment, Plan) is the standard format "
},
{
"path": "scientific-skills/clinical-reports/references/peer_review_standards.md",
"chars": 15477,
"preview": "# Peer Review Standards for Clinical Manuscripts\n\n## Overview of Clinical Manuscript Peer Review\n\n### Purpose\n\nPeer revi"
},
{
"path": "scientific-skills/clinical-reports/references/regulatory_compliance.md",
"chars": 17466,
"preview": "# Regulatory Compliance for Clinical Reports\n\n## HIPAA (Health Insurance Portability and Accountability Act)\n\n### Overvi"
},
{
"path": "scientific-skills/clinical-reports/scripts/check_deidentification.py",
"chars": 10690,
"preview": "#!/usr/bin/env python3\n\"\"\"\nCheck clinical reports for HIPAA identifiers that need removal.\n\nScans text for 18 HIPAA iden"
},
{
"path": "scientific-skills/clinical-reports/scripts/compliance_checker.py",
"chars": 2172,
"preview": "#!/usr/bin/env python3\n\"\"\"\nCheck clinical reports for regulatory compliance (HIPAA, GCP, FDA).\n\nUsage:\n python compli"
},
{
"path": "scientific-skills/clinical-reports/scripts/extract_clinical_data.py",
"chars": 2632,
"preview": "#!/usr/bin/env python3\n\"\"\"\nExtract structured clinical data from reports.\n\nUsage:\n python extract_clinical_data.py <r"
},
{
"path": "scientific-skills/clinical-reports/scripts/format_adverse_events.py",
"chars": 3047,
"preview": "#!/usr/bin/env python3\n\"\"\"\nFormat adverse event data into tables for clinical trial reports.\n\nConverts CSV or structured"
},
{
"path": "scientific-skills/clinical-reports/scripts/generate_report_template.py",
"chars": 4711,
"preview": "#!/usr/bin/env python3\n\"\"\"\nInteractive template generator for clinical reports.\n\nHelps users select and generate appropr"
},
{
"path": "scientific-skills/clinical-reports/scripts/terminology_validator.py",
"chars": 4130,
"preview": "#!/usr/bin/env python3\n\"\"\"\nValidate medical terminology and coding in clinical reports.\n\nUsage:\n python terminology_v"
},
{
"path": "scientific-skills/clinical-reports/scripts/validate_case_report.py",
"chars": 11530,
"preview": "#!/usr/bin/env python3\n\"\"\"\nValidate case reports against CARE (CAse REport) guidelines.\n\nThis script checks a clinical c"
},
{
"path": "scientific-skills/clinical-reports/scripts/validate_trial_report.py",
"chars": 2867,
"preview": "#!/usr/bin/env python3\n\"\"\"\nValidate clinical trial reports against ICH-E3 structure.\n\nChecks Clinical Study Reports (CSR"
},
{
"path": "scientific-skills/clinicaltrials-database/SKILL.md",
"chars": 14988,
"preview": "---\nname: clinicaltrials-database\ndescription: Query ClinicalTrials.gov via API v2. Search trials by condition, drug, lo"
},
{
"path": "scientific-skills/clinicaltrials-database/references/api_reference.md",
"chars": 10694,
"preview": "# ClinicalTrials.gov API v2 Reference Documentation\n\n## Overview\n\nThe ClinicalTrials.gov API v2 is a modern REST API tha"
},
{
"path": "scientific-skills/clinicaltrials-database/scripts/query_clinicaltrials.py",
"chars": 6992,
"preview": "#!/usr/bin/env python3\n\"\"\"\nClinicalTrials.gov API Query Helper\n\nA comprehensive Python script for querying the ClinicalT"
},
{
"path": "scientific-skills/clinpgx-database/SKILL.md",
"chars": 20948,
"preview": "---\nname: clinpgx-database\ndescription: Access ClinPGx pharmacogenomics data (successor to PharmGKB). Query gene-drug in"
},
{
"path": "scientific-skills/clinpgx-database/references/api_reference.md",
"chars": 17360,
"preview": "# ClinPGx API Reference\n\nComplete reference documentation for the ClinPGx REST API.\n\n## Base URL\n\n```\nhttps://api.clinpg"
},
{
"path": "scientific-skills/clinpgx-database/scripts/query_clinpgx.py",
"chars": 14295,
"preview": "#!/usr/bin/env python3\n\"\"\"\nClinPGx API Query Helper Script\n\nProvides ready-to-use functions for querying the ClinPGx dat"
},
{
"path": "scientific-skills/clinvar-database/SKILL.md",
"chars": 13228,
"preview": "---\nname: clinvar-database\ndescription: Query NCBI ClinVar for variant clinical significance. Search by gene/position, i"
},
{
"path": "scientific-skills/clinvar-database/references/api_reference.md",
"chars": 6396,
"preview": "# ClinVar API and Data Access Reference\n\n## Overview\n\nClinVar provides multiple methods for programmatic data access:\n- "
},
{
"path": "scientific-skills/clinvar-database/references/clinical_significance.md",
"chars": 9331,
"preview": "# ClinVar Clinical Significance Interpretation Guide\n\n## Overview\n\nClinVar uses standardized terminology to describe the"
},
{
"path": "scientific-skills/clinvar-database/references/data_formats.md",
"chars": 10710,
"preview": "# ClinVar Data Formats and FTP Access\n\n## Overview\n\nClinVar provides bulk data downloads in multiple formats to support "
},
{
"path": "scientific-skills/cobrapy/SKILL.md",
"chars": 12450,
"preview": "---\nname: cobrapy\ndescription: Constraint-based metabolic modeling (COBRA). FBA, FVA, gene knockouts, flux sampling, SBM"
},
{
"path": "scientific-skills/cobrapy/references/api_quick_reference.md",
"chars": 16036,
"preview": "# COBRApy API Quick Reference\n\nThis document provides quick reference for common COBRApy functions, signatures, and usag"
},
{
"path": "scientific-skills/cobrapy/references/workflows.md",
"chars": 21741,
"preview": "# COBRApy Comprehensive Workflows\n\nThis document provides detailed step-by-step workflows for common COBRApy tasks in me"
},
{
"path": "scientific-skills/consciousness-council/SKILL.md",
"chars": 8659,
"preview": "---\nname: consciousness-council\ndescription: Run a multi-perspective Mind Council deliberation on any question, decision"
},
{
"path": "scientific-skills/consciousness-council/references/advanced-configurations.md",
"chars": 5153,
"preview": "# Advanced Council Configurations\n\nReference guide for specialized Council configurations beyond the defaults.\n\n## Domai"
},
{
"path": "scientific-skills/cosmic-database/SKILL.md",
"chars": 10032,
"preview": "---\nname: cosmic-database\ndescription: Access COSMIC cancer mutation database. Query somatic mutations, Cancer Gene Cens"
},
{
"path": "scientific-skills/cosmic-database/references/cosmic_data_reference.md",
"chars": 6291,
"preview": "# COSMIC Database Reference\n\n## Overview\n\nCOSMIC (Catalogue of Somatic Mutations in Cancer) is the world's largest and m"
},
{
"path": "scientific-skills/cosmic-database/scripts/download_cosmic.py",
"chars": 7972,
"preview": "#!/usr/bin/env python3\n\"\"\"\nCOSMIC Data Download Utility\n\nThis script provides functions to download data from the COSMIC"
},
{
"path": "scientific-skills/dask/SKILL.md",
"chars": 14272,
"preview": "---\nname: dask\ndescription: Distributed computing for larger-than-RAM pandas/NumPy workflows. Use when you need to scale"
},
{
"path": "scientific-skills/dask/references/arrays.md",
"chars": 11649,
"preview": "# Dask Arrays\n\n## Overview\n\nDask Array implements NumPy's ndarray interface using blocked algorithms. It coordinates man"
},
{
"path": "scientific-skills/dask/references/bags.md",
"chars": 10869,
"preview": "# Dask Bags\n\n## Overview\n\nDask Bag implements functional operations including `map`, `filter`, `fold`, and `groupby` on "
},
{
"path": "scientific-skills/dask/references/best-practices.md",
"chars": 7317,
"preview": "# Dask Best Practices\n\n## Performance Optimization Principles\n\n### Start with Simpler Solutions First\n\nBefore implementi"
},
{
"path": "scientific-skills/dask/references/dataframes.md",
"chars": 8778,
"preview": "# Dask DataFrames\n\n## Overview\n\nDask DataFrames enable parallel processing of large tabular data by distributing work ac"
},
{
"path": "scientific-skills/dask/references/futures.md",
"chars": 12051,
"preview": "# Dask Futures\n\n## Overview\n\nDask futures extend Python's `concurrent.futures` interface, enabling immediate (non-lazy) "
},
{
"path": "scientific-skills/dask/references/schedulers.md",
"chars": 11307,
"preview": "# Dask Schedulers\n\n## Overview\n\nDask provides multiple task schedulers, each suited to different workloads. The schedule"
},
{
"path": "scientific-skills/datacommons-client/SKILL.md",
"chars": 7984,
"preview": "---\nname: datacommons-client\ndescription: Work with Data Commons, a platform providing programmatic access to public sta"
},
{
"path": "scientific-skills/datacommons-client/references/getting_started.md",
"chars": 10129,
"preview": "# Getting Started with Data Commons\n\n## Quick Start Guide\n\nThis guide provides end-to-end examples for common Data Commo"
},
{
"path": "scientific-skills/datacommons-client/references/node.md",
"chars": 6118,
"preview": "# Node Endpoint - Knowledge Graph Exploration\n\n## Purpose\n\nThe Node endpoint retrieves property relationships and values"
},
{
"path": "scientific-skills/datacommons-client/references/observation.md",
"chars": 5490,
"preview": "# Observation Endpoint - Statistical Data Queries\n\n## Purpose\n\nThe Observation API retrieves statistical observations—da"
},
{
"path": "scientific-skills/datacommons-client/references/resolve.md",
"chars": 7190,
"preview": "# Resolve Endpoint - Entity Identification\n\n## Purpose\n\nThe Resolve API identifies Data Commons IDs (DCIDs) for entities"
},
{
"path": "scientific-skills/datamol/SKILL.md",
"chars": 18861,
"preview": "---\nname: datamol\ndescription: Pythonic wrapper around RDKit with simplified interface and sensible defaults. Preferred "
}
]
// ... and 1140 more files (download for full content)
About this extraction
This page contains the full source code of the K-Dense-AI/claude-scientific-skills GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 1340 files (16.4 MB), approximately 4.0M tokens, and a symbol index with 1486 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.