Repository: abhigyanpatwari/GitNexus Branch: main Commit: 862fdf91856d Files: 1482 Total size: 3.9 MB Directory structure: gitextract_jd_m7yp2/ ├── .claude/ │ └── skills/ │ └── gitnexus/ │ ├── gitnexus-cli/ │ │ └── SKILL.md │ ├── gitnexus-debugging/ │ │ └── SKILL.md │ ├── gitnexus-exploring/ │ │ └── SKILL.md │ ├── gitnexus-guide/ │ │ └── SKILL.md │ ├── gitnexus-impact-analysis/ │ │ └── SKILL.md │ ├── gitnexus-pr-review/ │ │ └── SKILL.md │ └── gitnexus-refactoring/ │ └── SKILL.md ├── .claude-plugin/ │ └── marketplace.json ├── .cursorrules ├── .github/ │ ├── FUNDING.yml │ ├── actions/ │ │ └── setup-gitnexus/ │ │ └── action.yml │ ├── release.yml │ └── workflows/ │ ├── ci-quality.yml │ ├── ci-report.yml │ ├── ci-tests.yml │ ├── ci.yml │ ├── claude-code-review.yml │ ├── claude.yml │ └── publish.yml ├── .gitignore ├── .history/ │ └── gitnexus/ │ └── vitest.config_20260317171253.ts ├── .mcp.json ├── .sisyphus/ │ └── drafts/ │ ├── gitnexus-brainstorming.md │ └── noodlbox-comparison.md ├── .windsurfrules ├── AGENTS.md ├── CHANGELOG.md ├── CLAUDE.md ├── LICENSE ├── README.md ├── compound-engineering.local.md ├── eval/ │ ├── .gitignore │ ├── README.md │ ├── __init__.py │ ├── agents/ │ │ ├── __init__.py │ │ └── gitnexus_agent.py │ ├── analysis/ │ │ ├── __init__.py │ │ └── analyze_results.py │ ├── bridge/ │ │ ├── __init__.py │ │ ├── gitnexus_tools.sh │ │ └── mcp_bridge.py │ ├── configs/ │ │ ├── models/ │ │ │ ├── claude-haiku.yaml │ │ │ ├── claude-opus.yaml │ │ │ ├── claude-sonnet.yaml │ │ │ ├── deepseek-chat.yaml │ │ │ ├── deepseek-v3.yaml │ │ │ ├── glm-4.7.yaml │ │ │ ├── glm-5.yaml │ │ │ ├── minimax-2.5.yaml │ │ │ └── minimax-m2.1.yaml │ │ └── modes/ │ │ ├── baseline.yaml │ │ ├── native.yaml │ │ └── native_augment.yaml │ ├── environments/ │ │ ├── __init__.py │ │ └── gitnexus_docker.py │ ├── prompts/ │ │ ├── instance_baseline.jinja │ │ ├── instance_native.jinja │ │ ├── instance_native_augment.jinja │ │ ├── system_baseline.jinja │ │ ├── system_native.jinja │ │ └── system_native_augment.jinja │ ├── pyproject.toml │ └── run_eval.py ├── gitnexus/ │ ├── .claude/ │ │ └── settings.local.json │ ├── .npmignore │ ├── CHANGELOG.md │ ├── Dockerfile.test │ ├── README.md │ ├── hooks/ │ │ └── claude/ │ │ ├── gitnexus-hook.cjs │ │ ├── pre-tool-use.sh │ │ └── session-start.sh │ ├── package.json │ ├── scripts/ │ │ └── patch-tree-sitter-swift.cjs │ ├── skills/ │ │ ├── gitnexus-cli.md │ │ ├── gitnexus-debugging.md │ │ ├── gitnexus-exploring.md │ │ ├── gitnexus-guide.md │ │ ├── gitnexus-impact-analysis.md │ │ ├── gitnexus-pr-review.md │ │ └── gitnexus-refactoring.md │ ├── src/ │ │ ├── cli/ │ │ │ ├── ai-context.ts │ │ │ ├── analyze.ts │ │ │ ├── augment.ts │ │ │ ├── clean.ts │ │ │ ├── eval-server.ts │ │ │ ├── index.ts │ │ │ ├── lazy-action.ts │ │ │ ├── list.ts │ │ │ ├── mcp.ts │ │ │ ├── serve.ts │ │ │ ├── setup.ts │ │ │ ├── skill-gen.ts │ │ │ ├── status.ts │ │ │ ├── tool.ts │ │ │ └── wiki.ts │ │ ├── config/ │ │ │ ├── ignore-service.ts │ │ │ └── supported-languages.ts │ │ ├── core/ │ │ │ ├── augmentation/ │ │ │ │ └── engine.ts │ │ │ ├── embeddings/ │ │ │ │ ├── embedder.ts │ │ │ │ ├── embedding-pipeline.ts │ │ │ │ ├── index.ts │ │ │ │ ├── text-generator.ts │ │ │ │ └── types.ts │ │ │ ├── graph/ │ │ │ │ ├── graph.ts │ │ │ │ └── types.ts │ │ │ ├── ingestion/ │ │ │ │ ├── ast-cache.ts │ │ │ │ ├── call-processor.ts │ │ │ │ ├── call-routing.ts │ │ │ │ ├── cluster-enricher.ts │ │ │ │ ├── community-processor.ts │ │ │ │ ├── constants.ts │ │ │ │ ├── entry-point-scoring.ts │ │ │ │ ├── export-detection.ts │ │ │ │ ├── filesystem-walker.ts │ │ │ │ ├── framework-detection.ts │ │ │ │ ├── heritage-processor.ts │ │ │ │ ├── import-processor.ts │ │ │ │ ├── language-config.ts │ │ │ │ ├── mro-processor.ts │ │ │ │ ├── named-binding-extraction.ts │ │ │ │ ├── parsing-processor.ts │ │ │ │ ├── pipeline.ts │ │ │ │ ├── process-processor.ts │ │ │ │ ├── resolution-context.ts │ │ │ │ ├── resolvers/ │ │ │ │ │ ├── csharp.ts │ │ │ │ │ ├── go.ts │ │ │ │ │ ├── index.ts │ │ │ │ │ ├── jvm.ts │ │ │ │ │ ├── php.ts │ │ │ │ │ ├── python.ts │ │ │ │ │ ├── ruby.ts │ │ │ │ │ ├── rust.ts │ │ │ │ │ ├── standard.ts │ │ │ │ │ └── utils.ts │ │ │ │ ├── structure-processor.ts │ │ │ │ ├── symbol-table.ts │ │ │ │ ├── tree-sitter-queries.ts │ │ │ │ ├── type-env.ts │ │ │ │ ├── type-extractors/ │ │ │ │ │ ├── c-cpp.ts │ │ │ │ │ ├── csharp.ts │ │ │ │ │ ├── go.ts │ │ │ │ │ ├── index.ts │ │ │ │ │ ├── jvm.ts │ │ │ │ │ ├── php.ts │ │ │ │ │ ├── python.ts │ │ │ │ │ ├── ruby.ts │ │ │ │ │ ├── rust.ts │ │ │ │ │ ├── shared.ts │ │ │ │ │ ├── swift.ts │ │ │ │ │ ├── types.ts │ │ │ │ │ └── typescript.ts │ │ │ │ ├── utils.ts │ │ │ │ └── workers/ │ │ │ │ ├── parse-worker.ts │ │ │ │ └── worker-pool.ts │ │ │ ├── lbug/ │ │ │ │ ├── csv-generator.ts │ │ │ │ ├── lbug-adapter.ts │ │ │ │ └── schema.ts │ │ │ ├── search/ │ │ │ │ ├── bm25-index.ts │ │ │ │ └── hybrid-search.ts │ │ │ ├── tree-sitter/ │ │ │ │ └── parser-loader.ts │ │ │ └── wiki/ │ │ │ ├── generator.ts │ │ │ ├── graph-queries.ts │ │ │ ├── html-viewer.ts │ │ │ ├── llm-client.ts │ │ │ └── prompts.ts │ │ ├── lib/ │ │ │ └── utils.ts │ │ ├── mcp/ │ │ │ ├── compatible-stdio-transport.ts │ │ │ ├── core/ │ │ │ │ ├── embedder.ts │ │ │ │ └── lbug-adapter.ts │ │ │ ├── local/ │ │ │ │ └── local-backend.ts │ │ │ ├── resources.ts │ │ │ ├── server.ts │ │ │ ├── staleness.ts │ │ │ └── tools.ts │ │ ├── server/ │ │ │ ├── api.ts │ │ │ └── mcp-http.ts │ │ ├── storage/ │ │ │ ├── git.ts │ │ │ └── repo-manager.ts │ │ └── types/ │ │ └── pipeline.ts │ ├── test/ │ │ ├── fixtures/ │ │ │ ├── lang-resolution/ │ │ │ │ ├── cpp-ambiguous/ │ │ │ │ │ ├── handler_a.h │ │ │ │ │ ├── handler_b.h │ │ │ │ │ └── processor.h │ │ │ │ ├── cpp-assignment-chain/ │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── Repo.h │ │ │ │ │ │ └── User.h │ │ │ │ │ └── services/ │ │ │ │ │ └── App.cpp │ │ │ │ ├── cpp-brace-init-inference/ │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── Repo.h │ │ │ │ │ │ └── User.h │ │ │ │ │ └── services/ │ │ │ │ │ └── App.cpp │ │ │ │ ├── cpp-call-result-binding/ │ │ │ │ │ ├── app.cpp │ │ │ │ │ └── user.h │ │ │ │ ├── cpp-calls/ │ │ │ │ │ ├── main.cpp │ │ │ │ │ ├── one.h │ │ │ │ │ └── zero.h │ │ │ │ ├── cpp-chain-call/ │ │ │ │ │ ├── app.cpp │ │ │ │ │ ├── repo.h │ │ │ │ │ ├── service.h │ │ │ │ │ └── user.h │ │ │ │ ├── cpp-constructor-calls/ │ │ │ │ │ ├── app.cpp │ │ │ │ │ └── user.h │ │ │ │ ├── cpp-constructor-type-inference/ │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── Repo.h │ │ │ │ │ │ └── User.h │ │ │ │ │ └── services/ │ │ │ │ │ └── App.cpp │ │ │ │ ├── cpp-deep-field-chain/ │ │ │ │ │ ├── models.h │ │ │ │ │ └── service.cpp │ │ │ │ ├── cpp-default-params/ │ │ │ │ │ └── src/ │ │ │ │ │ └── app.cpp │ │ │ │ ├── cpp-deref-range-for/ │ │ │ │ │ ├── App.cpp │ │ │ │ │ ├── Repo.h │ │ │ │ │ └── User.h │ │ │ │ ├── cpp-diamond/ │ │ │ │ │ ├── animal.h │ │ │ │ │ ├── duck.cpp │ │ │ │ │ ├── duck.h │ │ │ │ │ ├── flyer.h │ │ │ │ │ └── swimmer.h │ │ │ │ ├── cpp-field-types/ │ │ │ │ │ ├── models.h │ │ │ │ │ └── service.cpp │ │ │ │ ├── cpp-grandparent-resolution/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── A.h │ │ │ │ │ ├── B.h │ │ │ │ │ ├── C.h │ │ │ │ │ ├── Greeting.h │ │ │ │ │ └── app.cpp │ │ │ │ ├── cpp-local-shadow/ │ │ │ │ │ ├── CMakeLists.txt │ │ │ │ │ └── src/ │ │ │ │ │ ├── main.cpp │ │ │ │ │ ├── utils.cpp │ │ │ │ │ └── utils.h │ │ │ │ ├── cpp-member-calls/ │ │ │ │ │ ├── app.cpp │ │ │ │ │ └── user.h │ │ │ │ ├── cpp-method-chain-binding/ │ │ │ │ │ ├── app.cpp │ │ │ │ │ └── models.h │ │ │ │ ├── cpp-nullable-receiver/ │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── Repo.h │ │ │ │ │ │ └── User.h │ │ │ │ │ └── services/ │ │ │ │ │ └── App.cpp │ │ │ │ ├── cpp-overload-param-types/ │ │ │ │ │ └── service.cpp │ │ │ │ ├── cpp-parent-resolution/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── BaseModel.h │ │ │ │ │ └── User.h │ │ │ │ ├── cpp-pointer-ref-fields/ │ │ │ │ │ ├── models.h │ │ │ │ │ └── service.cpp │ │ │ │ ├── cpp-range-for/ │ │ │ │ │ ├── App.cpp │ │ │ │ │ ├── Repo.h │ │ │ │ │ └── User.h │ │ │ │ ├── cpp-receiver-resolution/ │ │ │ │ │ ├── app.cpp │ │ │ │ │ ├── repo.h │ │ │ │ │ └── user.h │ │ │ │ ├── cpp-return-type/ │ │ │ │ │ ├── app.cpp │ │ │ │ │ └── user.h │ │ │ │ ├── cpp-return-type-inference/ │ │ │ │ │ ├── app.cpp │ │ │ │ │ ├── repo.h │ │ │ │ │ └── user.h │ │ │ │ ├── cpp-scoped-brace-init/ │ │ │ │ │ ├── main.cpp │ │ │ │ │ └── models.h │ │ │ │ ├── cpp-self-this-resolution/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── Repo.cpp │ │ │ │ │ └── User.cpp │ │ │ │ ├── cpp-smart-ptr-dispatch/ │ │ │ │ │ └── src/ │ │ │ │ │ └── app.cpp │ │ │ │ ├── cpp-structured-binding/ │ │ │ │ │ ├── App.cpp │ │ │ │ │ ├── Repo.h │ │ │ │ │ └── User.h │ │ │ │ ├── cpp-variadic-resolution/ │ │ │ │ │ ├── logger.h │ │ │ │ │ └── main.cpp │ │ │ │ ├── cpp-write-access/ │ │ │ │ │ ├── models.h │ │ │ │ │ └── service.cpp │ │ │ │ ├── csharp-alias-imports/ │ │ │ │ │ ├── CsharpAlias.csproj │ │ │ │ │ ├── Models/ │ │ │ │ │ │ ├── Repo.cs │ │ │ │ │ │ └── User.cs │ │ │ │ │ └── Services/ │ │ │ │ │ └── Main.cs │ │ │ │ ├── csharp-ambiguous/ │ │ │ │ │ ├── Models/ │ │ │ │ │ │ ├── Handler.cs │ │ │ │ │ │ └── IProcessor.cs │ │ │ │ │ ├── Other/ │ │ │ │ │ │ ├── Handler.cs │ │ │ │ │ │ └── IProcessor.cs │ │ │ │ │ └── Services/ │ │ │ │ │ └── UserHandler.cs │ │ │ │ ├── csharp-assignment-chain/ │ │ │ │ │ ├── AssignmentChain.csproj │ │ │ │ │ ├── Models/ │ │ │ │ │ │ ├── Repo.cs │ │ │ │ │ │ └── User.cs │ │ │ │ │ └── Program.cs │ │ │ │ ├── csharp-async-binding/ │ │ │ │ │ ├── Order.cs │ │ │ │ │ ├── OrderService.cs │ │ │ │ │ ├── Program.cs │ │ │ │ │ ├── User.cs │ │ │ │ │ └── UserService.cs │ │ │ │ ├── csharp-call-result-binding/ │ │ │ │ │ └── App.cs │ │ │ │ ├── csharp-calls/ │ │ │ │ │ ├── CallProj.csproj │ │ │ │ │ ├── Services/ │ │ │ │ │ │ └── UserService.cs │ │ │ │ │ └── Utils/ │ │ │ │ │ ├── OneArg.cs │ │ │ │ │ └── ZeroArg.cs │ │ │ │ ├── csharp-chain-call/ │ │ │ │ │ ├── Models/ │ │ │ │ │ │ ├── Repo.cs │ │ │ │ │ │ └── User.cs │ │ │ │ │ ├── Program.cs │ │ │ │ │ └── Services/ │ │ │ │ │ └── UserService.cs │ │ │ │ ├── csharp-deep-field-chain/ │ │ │ │ │ ├── Models.cs │ │ │ │ │ └── Service.cs │ │ │ │ ├── csharp-dictionary-keys-values/ │ │ │ │ │ ├── App.cs │ │ │ │ │ ├── Repo.cs │ │ │ │ │ └── User.cs │ │ │ │ ├── csharp-field-types/ │ │ │ │ │ ├── Models.cs │ │ │ │ │ └── Service.cs │ │ │ │ ├── csharp-foreach/ │ │ │ │ │ ├── ForeachProj.csproj │ │ │ │ │ ├── Models/ │ │ │ │ │ │ ├── Repo.cs │ │ │ │ │ │ └── User.cs │ │ │ │ │ └── Program.cs │ │ │ │ ├── csharp-generic-parent-resolution/ │ │ │ │ │ └── src/ │ │ │ │ │ └── Models/ │ │ │ │ │ ├── BaseModel.cs │ │ │ │ │ ├── Repo.cs │ │ │ │ │ └── User.cs │ │ │ │ ├── csharp-grandparent-resolution/ │ │ │ │ │ ├── Models/ │ │ │ │ │ │ ├── A.cs │ │ │ │ │ │ ├── B.cs │ │ │ │ │ │ ├── C.cs │ │ │ │ │ │ └── Greeting.cs │ │ │ │ │ └── Services/ │ │ │ │ │ └── App.cs │ │ │ │ ├── csharp-is-pattern/ │ │ │ │ │ ├── IsPatternProj.csproj │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── Repo.cs │ │ │ │ │ │ └── User.cs │ │ │ │ │ └── services/ │ │ │ │ │ └── App.cs │ │ │ │ ├── csharp-local-shadow/ │ │ │ │ │ ├── App/ │ │ │ │ │ │ └── Main.cs │ │ │ │ │ └── Utils/ │ │ │ │ │ └── Logger.cs │ │ │ │ ├── csharp-member-calls/ │ │ │ │ │ ├── MemberCallProj.csproj │ │ │ │ │ ├── Models/ │ │ │ │ │ │ └── User.cs │ │ │ │ │ └── Services/ │ │ │ │ │ └── UserService.cs │ │ │ │ ├── csharp-method-chain-binding/ │ │ │ │ │ └── App.cs │ │ │ │ ├── csharp-mixed-decl-chain/ │ │ │ │ │ ├── MixedDeclChain.csproj │ │ │ │ │ ├── Models/ │ │ │ │ │ │ ├── Repo.cs │ │ │ │ │ │ └── User.cs │ │ │ │ │ └── Program.cs │ │ │ │ ├── csharp-nested-member-foreach/ │ │ │ │ │ ├── App.cs │ │ │ │ │ ├── Repo.cs │ │ │ │ │ └── User.cs │ │ │ │ ├── csharp-null-check-narrowing/ │ │ │ │ │ ├── Models/ │ │ │ │ │ │ ├── Repo.cs │ │ │ │ │ │ └── User.cs │ │ │ │ │ └── Services/ │ │ │ │ │ └── App.cs │ │ │ │ ├── csharp-null-conditional/ │ │ │ │ │ ├── App.cs │ │ │ │ │ ├── Models/ │ │ │ │ │ │ ├── Repo.cs │ │ │ │ │ │ └── User.cs │ │ │ │ │ └── NullConditional.csproj │ │ │ │ ├── csharp-optional-params/ │ │ │ │ │ └── Services/ │ │ │ │ │ └── App.cs │ │ │ │ ├── csharp-overload-param-types/ │ │ │ │ │ └── Models/ │ │ │ │ │ └── UserService.cs │ │ │ │ ├── csharp-parent-resolution/ │ │ │ │ │ └── src/ │ │ │ │ │ └── Models/ │ │ │ │ │ ├── BaseModel.cs │ │ │ │ │ ├── ISerializable.cs │ │ │ │ │ └── User.cs │ │ │ │ ├── csharp-pattern-matching/ │ │ │ │ │ ├── Models/ │ │ │ │ │ │ └── Animal.cs │ │ │ │ │ ├── PatternMatchProj.csproj │ │ │ │ │ └── Services/ │ │ │ │ │ └── AnimalService.cs │ │ │ │ ├── csharp-primary-ctors/ │ │ │ │ │ ├── App.cs │ │ │ │ │ └── Models/ │ │ │ │ │ ├── Person.cs │ │ │ │ │ └── User.cs │ │ │ │ ├── csharp-proj/ │ │ │ │ │ ├── Interfaces/ │ │ │ │ │ │ └── IRepository.cs │ │ │ │ │ ├── Models/ │ │ │ │ │ │ ├── BaseEntity.cs │ │ │ │ │ │ └── User.cs │ │ │ │ │ └── Services/ │ │ │ │ │ └── UserService.cs │ │ │ │ ├── csharp-receiver-resolution/ │ │ │ │ │ ├── App.cs │ │ │ │ │ ├── Models/ │ │ │ │ │ │ ├── Repo.cs │ │ │ │ │ │ └── User.cs │ │ │ │ │ └── ReceiverProj.csproj │ │ │ │ ├── csharp-recursive-pattern/ │ │ │ │ │ ├── Models/ │ │ │ │ │ │ ├── Repo.cs │ │ │ │ │ │ └── User.cs │ │ │ │ │ ├── Program.cs │ │ │ │ │ └── RecursivePatternProj.csproj │ │ │ │ ├── csharp-return-type/ │ │ │ │ │ ├── Models/ │ │ │ │ │ │ ├── Repo.cs │ │ │ │ │ │ └── User.cs │ │ │ │ │ ├── ReturnType.csproj │ │ │ │ │ └── Services/ │ │ │ │ │ └── App.cs │ │ │ │ ├── csharp-self-this-resolution/ │ │ │ │ │ └── src/ │ │ │ │ │ └── Models/ │ │ │ │ │ ├── Repo.cs │ │ │ │ │ └── User.cs │ │ │ │ ├── csharp-super-resolution/ │ │ │ │ │ └── src/ │ │ │ │ │ └── Models/ │ │ │ │ │ ├── BaseModel.cs │ │ │ │ │ ├── Repo.cs │ │ │ │ │ └── User.cs │ │ │ │ ├── csharp-switch-pattern/ │ │ │ │ │ ├── Models/ │ │ │ │ │ │ ├── Repo.cs │ │ │ │ │ │ └── User.cs │ │ │ │ │ ├── Program.cs │ │ │ │ │ └── SwitchPattern.csproj │ │ │ │ ├── csharp-var-foreach/ │ │ │ │ │ ├── Models/ │ │ │ │ │ │ ├── Repo.cs │ │ │ │ │ │ └── User.cs │ │ │ │ │ ├── Program.cs │ │ │ │ │ └── VarForeach.csproj │ │ │ │ ├── csharp-variadic-resolution/ │ │ │ │ │ ├── Services/ │ │ │ │ │ │ └── App.cs │ │ │ │ │ ├── Utils/ │ │ │ │ │ │ └── Logger.cs │ │ │ │ │ └── VariadicProj.csproj │ │ │ │ ├── csharp-write-access/ │ │ │ │ │ ├── Models.cs │ │ │ │ │ └── Service.cs │ │ │ │ ├── field-types/ │ │ │ │ │ ├── models.ts │ │ │ │ │ └── service.ts │ │ │ │ ├── go-ambiguous/ │ │ │ │ │ ├── go.mod │ │ │ │ │ └── internal/ │ │ │ │ │ ├── models/ │ │ │ │ │ │ └── handler.go │ │ │ │ │ ├── other/ │ │ │ │ │ │ └── handler.go │ │ │ │ │ └── services/ │ │ │ │ │ └── user.go │ │ │ │ ├── go-assignment-chain/ │ │ │ │ │ ├── cmd/ │ │ │ │ │ │ └── main.go │ │ │ │ │ ├── go.mod │ │ │ │ │ └── models/ │ │ │ │ │ ├── repo.go │ │ │ │ │ └── user.go │ │ │ │ ├── go-call-result-binding/ │ │ │ │ │ ├── cmd/ │ │ │ │ │ │ └── main.go │ │ │ │ │ ├── go.mod │ │ │ │ │ └── models/ │ │ │ │ │ └── user.go │ │ │ │ ├── go-calls/ │ │ │ │ │ ├── cmd/ │ │ │ │ │ │ └── main.go │ │ │ │ │ ├── go.mod │ │ │ │ │ └── internal/ │ │ │ │ │ ├── onearg/ │ │ │ │ │ │ └── log.go │ │ │ │ │ └── zeroarg/ │ │ │ │ │ └── log.go │ │ │ │ ├── go-chain-call/ │ │ │ │ │ ├── cmd/ │ │ │ │ │ │ └── main.go │ │ │ │ │ ├── go.mod │ │ │ │ │ └── models/ │ │ │ │ │ ├── repo.go │ │ │ │ │ └── user.go │ │ │ │ ├── go-constructor-type-inference/ │ │ │ │ │ ├── cmd/ │ │ │ │ │ │ └── main.go │ │ │ │ │ ├── go.mod │ │ │ │ │ └── models/ │ │ │ │ │ ├── repo.go │ │ │ │ │ └── user.go │ │ │ │ ├── go-deep-field-chain/ │ │ │ │ │ ├── cmd/ │ │ │ │ │ │ └── main.go │ │ │ │ │ ├── go.mod │ │ │ │ │ └── models/ │ │ │ │ │ └── models.go │ │ │ │ ├── go-field-types/ │ │ │ │ │ ├── cmd/ │ │ │ │ │ │ └── main.go │ │ │ │ │ ├── go.mod │ │ │ │ │ └── models/ │ │ │ │ │ └── models.go │ │ │ │ ├── go-for-call-expr/ │ │ │ │ │ ├── cmd/ │ │ │ │ │ │ └── main.go │ │ │ │ │ ├── go.mod │ │ │ │ │ └── models/ │ │ │ │ │ ├── repo.go │ │ │ │ │ └── user.go │ │ │ │ ├── go-inc-dec-write-access/ │ │ │ │ │ └── main.go │ │ │ │ ├── go-local-shadow/ │ │ │ │ │ ├── cmd/ │ │ │ │ │ │ └── main.go │ │ │ │ │ ├── go.mod │ │ │ │ │ └── internal/ │ │ │ │ │ └── utils/ │ │ │ │ │ └── utils.go │ │ │ │ ├── go-make-builtin/ │ │ │ │ │ ├── main.go │ │ │ │ │ └── models.go │ │ │ │ ├── go-map-range/ │ │ │ │ │ ├── main.go │ │ │ │ │ └── models/ │ │ │ │ │ ├── repo.go │ │ │ │ │ └── user.go │ │ │ │ ├── go-member-calls/ │ │ │ │ │ ├── cmd/ │ │ │ │ │ │ └── main.go │ │ │ │ │ ├── go.mod │ │ │ │ │ └── models/ │ │ │ │ │ └── user.go │ │ │ │ ├── go-method-chain-binding/ │ │ │ │ │ ├── cmd/ │ │ │ │ │ │ └── main.go │ │ │ │ │ ├── go.mod │ │ │ │ │ └── models/ │ │ │ │ │ └── user.go │ │ │ │ ├── go-mixed-chain/ │ │ │ │ │ ├── cmd/ │ │ │ │ │ │ └── main.go │ │ │ │ │ ├── go.mod │ │ │ │ │ └── models/ │ │ │ │ │ └── models.go │ │ │ │ ├── go-multi-assign/ │ │ │ │ │ ├── app.go │ │ │ │ │ └── models.go │ │ │ │ ├── go-multi-return-inference/ │ │ │ │ │ ├── cmd/ │ │ │ │ │ │ └── main.go │ │ │ │ │ ├── go.mod │ │ │ │ │ └── models/ │ │ │ │ │ ├── repo.go │ │ │ │ │ └── user.go │ │ │ │ ├── go-new-builtin/ │ │ │ │ │ ├── main.go │ │ │ │ │ └── models.go │ │ │ │ ├── go-nullable-receiver/ │ │ │ │ │ ├── cmd/ │ │ │ │ │ │ └── main.go │ │ │ │ │ ├── go.mod │ │ │ │ │ └── models/ │ │ │ │ │ ├── repo.go │ │ │ │ │ └── user.go │ │ │ │ ├── go-parent-resolution/ │ │ │ │ │ ├── go.mod │ │ │ │ │ └── models/ │ │ │ │ │ ├── base.go │ │ │ │ │ └── user.go │ │ │ │ ├── go-pkg/ │ │ │ │ │ ├── cmd/ │ │ │ │ │ │ └── main.go │ │ │ │ │ ├── go.mod │ │ │ │ │ └── internal/ │ │ │ │ │ ├── auth/ │ │ │ │ │ │ └── service.go │ │ │ │ │ └── models/ │ │ │ │ │ ├── admin.go │ │ │ │ │ ├── repository.go │ │ │ │ │ └── user.go │ │ │ │ ├── go-pointer-constructor-inference/ │ │ │ │ │ ├── cmd/ │ │ │ │ │ │ └── main.go │ │ │ │ │ ├── go.mod │ │ │ │ │ └── models/ │ │ │ │ │ ├── repo.go │ │ │ │ │ └── user.go │ │ │ │ ├── go-receiver-resolution/ │ │ │ │ │ ├── cmd/ │ │ │ │ │ │ └── main.go │ │ │ │ │ ├── go.mod │ │ │ │ │ └── models/ │ │ │ │ │ ├── repo.go │ │ │ │ │ └── user.go │ │ │ │ ├── go-return-type-inference/ │ │ │ │ │ ├── cmd/ │ │ │ │ │ │ └── main.go │ │ │ │ │ ├── go.mod │ │ │ │ │ └── models/ │ │ │ │ │ ├── repo.go │ │ │ │ │ └── user.go │ │ │ │ ├── go-struct-literals/ │ │ │ │ │ ├── app.go │ │ │ │ │ └── user.go │ │ │ │ ├── go-type-assertion/ │ │ │ │ │ ├── main.go │ │ │ │ │ └── models.go │ │ │ │ ├── go-variadic-resolution/ │ │ │ │ │ ├── cmd/ │ │ │ │ │ │ └── main.go │ │ │ │ │ ├── go.mod │ │ │ │ │ └── internal/ │ │ │ │ │ └── logger/ │ │ │ │ │ └── logger.go │ │ │ │ ├── go-write-access/ │ │ │ │ │ └── main.go │ │ │ │ ├── java-ambiguous/ │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── Handler.java │ │ │ │ │ │ └── Processor.java │ │ │ │ │ ├── other/ │ │ │ │ │ │ ├── Handler.java │ │ │ │ │ │ └── Processor.java │ │ │ │ │ └── services/ │ │ │ │ │ └── UserHandler.java │ │ │ │ ├── java-assignment-chain/ │ │ │ │ │ ├── App.java │ │ │ │ │ └── models/ │ │ │ │ │ ├── Repo.java │ │ │ │ │ └── User.java │ │ │ │ ├── java-call-result-binding/ │ │ │ │ │ ├── App.java │ │ │ │ │ └── User.java │ │ │ │ ├── java-calls/ │ │ │ │ │ ├── services/ │ │ │ │ │ │ └── UserService.java │ │ │ │ │ └── util/ │ │ │ │ │ ├── OneArg.java │ │ │ │ │ └── ZeroArg.java │ │ │ │ ├── java-chain-call/ │ │ │ │ │ ├── App.java │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── Repo.java │ │ │ │ │ │ └── User.java │ │ │ │ │ └── services/ │ │ │ │ │ └── UserService.java │ │ │ │ ├── java-constructor-calls/ │ │ │ │ │ ├── App.java │ │ │ │ │ └── models/ │ │ │ │ │ └── User.java │ │ │ │ ├── java-constructor-type-inference/ │ │ │ │ │ ├── App.java │ │ │ │ │ └── models/ │ │ │ │ │ ├── Repo.java │ │ │ │ │ └── User.java │ │ │ │ ├── java-deep-field-chain/ │ │ │ │ │ ├── App.java │ │ │ │ │ └── models/ │ │ │ │ │ ├── Address.java │ │ │ │ │ ├── City.java │ │ │ │ │ └── User.java │ │ │ │ ├── java-enum-static-call/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── App.java │ │ │ │ │ └── Status.java │ │ │ │ ├── java-field-types/ │ │ │ │ │ ├── App.java │ │ │ │ │ └── models/ │ │ │ │ │ ├── Address.java │ │ │ │ │ └── User.java │ │ │ │ ├── java-foreach/ │ │ │ │ │ ├── App.java │ │ │ │ │ └── models/ │ │ │ │ │ ├── Repo.java │ │ │ │ │ └── User.java │ │ │ │ ├── java-foreach-call-expr/ │ │ │ │ │ ├── Main.java │ │ │ │ │ └── models/ │ │ │ │ │ ├── Repo.java │ │ │ │ │ └── User.java │ │ │ │ ├── java-generic-parent-resolution/ │ │ │ │ │ └── src/ │ │ │ │ │ └── models/ │ │ │ │ │ ├── BaseModel.java │ │ │ │ │ ├── Repo.java │ │ │ │ │ └── User.java │ │ │ │ ├── java-grandparent-resolution/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── A.java │ │ │ │ │ │ ├── B.java │ │ │ │ │ │ ├── C.java │ │ │ │ │ │ └── Greeting.java │ │ │ │ │ └── services/ │ │ │ │ │ └── App.java │ │ │ │ ├── java-heritage/ │ │ │ │ │ ├── interfaces/ │ │ │ │ │ │ ├── Serializable.java │ │ │ │ │ │ └── Validatable.java │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── BaseModel.java │ │ │ │ │ │ └── User.java │ │ │ │ │ └── services/ │ │ │ │ │ └── UserService.java │ │ │ │ ├── java-instanceof-pattern/ │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── Repo.java │ │ │ │ │ │ └── User.java │ │ │ │ │ └── services/ │ │ │ │ │ └── App.java │ │ │ │ ├── java-local-shadow/ │ │ │ │ │ └── src/ │ │ │ │ │ └── main/ │ │ │ │ │ └── java/ │ │ │ │ │ └── com/ │ │ │ │ │ └── example/ │ │ │ │ │ ├── app/ │ │ │ │ │ │ └── Main.java │ │ │ │ │ └── utils/ │ │ │ │ │ └── Logger.java │ │ │ │ ├── java-map-keys-values/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── App.java │ │ │ │ │ ├── Repo.java │ │ │ │ │ └── User.java │ │ │ │ ├── java-member-calls/ │ │ │ │ │ ├── models/ │ │ │ │ │ │ └── User.java │ │ │ │ │ └── services/ │ │ │ │ │ └── UserService.java │ │ │ │ ├── java-method-chain-binding/ │ │ │ │ │ ├── App.java │ │ │ │ │ └── Models.java │ │ │ │ ├── java-mixed-chain/ │ │ │ │ │ ├── App.java │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── Address.java │ │ │ │ │ │ ├── City.java │ │ │ │ │ │ └── User.java │ │ │ │ │ └── services/ │ │ │ │ │ └── UserService.java │ │ │ │ ├── java-named-imports/ │ │ │ │ │ └── com/ │ │ │ │ │ └── example/ │ │ │ │ │ ├── app/ │ │ │ │ │ │ └── Main.java │ │ │ │ │ ├── models/ │ │ │ │ │ │ └── User.java │ │ │ │ │ └── other/ │ │ │ │ │ └── User.java │ │ │ │ ├── java-nullable-receiver/ │ │ │ │ │ ├── App.java │ │ │ │ │ └── models/ │ │ │ │ │ ├── Repo.java │ │ │ │ │ └── User.java │ │ │ │ ├── java-optional-receiver/ │ │ │ │ │ ├── App.java │ │ │ │ │ └── models/ │ │ │ │ │ ├── Repo.java │ │ │ │ │ └── User.java │ │ │ │ ├── java-overload-param-types/ │ │ │ │ │ └── models/ │ │ │ │ │ ├── User.java │ │ │ │ │ └── UserService.java │ │ │ │ ├── java-parent-resolution/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── interfaces/ │ │ │ │ │ │ └── Serializable.java │ │ │ │ │ └── models/ │ │ │ │ │ ├── BaseModel.java │ │ │ │ │ └── User.java │ │ │ │ ├── java-receiver-resolution/ │ │ │ │ │ ├── App.java │ │ │ │ │ └── models/ │ │ │ │ │ ├── Repo.java │ │ │ │ │ └── User.java │ │ │ │ ├── java-return-type-inference/ │ │ │ │ │ ├── App.java │ │ │ │ │ ├── models/ │ │ │ │ │ │ └── User.java │ │ │ │ │ └── services/ │ │ │ │ │ └── UserService.java │ │ │ │ ├── java-self-this-resolution/ │ │ │ │ │ └── src/ │ │ │ │ │ └── models/ │ │ │ │ │ ├── Repo.java │ │ │ │ │ └── User.java │ │ │ │ ├── java-super-resolution/ │ │ │ │ │ └── src/ │ │ │ │ │ └── models/ │ │ │ │ │ ├── BaseModel.java │ │ │ │ │ ├── Repo.java │ │ │ │ │ └── User.java │ │ │ │ ├── java-switch-pattern/ │ │ │ │ │ ├── App.java │ │ │ │ │ └── models/ │ │ │ │ │ ├── Repo.java │ │ │ │ │ └── User.java │ │ │ │ ├── java-variadic-resolution/ │ │ │ │ │ └── com/ │ │ │ │ │ └── example/ │ │ │ │ │ ├── app/ │ │ │ │ │ │ └── Main.java │ │ │ │ │ └── util/ │ │ │ │ │ └── Logger.java │ │ │ │ ├── java-virtual-dispatch/ │ │ │ │ │ └── models/ │ │ │ │ │ └── App.java │ │ │ │ ├── java-write-access/ │ │ │ │ │ ├── App.java │ │ │ │ │ └── models/ │ │ │ │ │ ├── Address.java │ │ │ │ │ └── User.java │ │ │ │ ├── javascript-chain-call/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── app.js │ │ │ │ │ ├── repo.js │ │ │ │ │ ├── service.js │ │ │ │ │ └── user.js │ │ │ │ ├── javascript-constructor-type-inference/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── app.js │ │ │ │ │ ├── repo.js │ │ │ │ │ └── user.js │ │ │ │ ├── javascript-parent-resolution/ │ │ │ │ │ └── src/ │ │ │ │ │ └── models/ │ │ │ │ │ ├── Base.js │ │ │ │ │ └── User.js │ │ │ │ ├── javascript-self-this-resolution/ │ │ │ │ │ └── src/ │ │ │ │ │ └── models/ │ │ │ │ │ ├── Repo.js │ │ │ │ │ └── User.js │ │ │ │ ├── javascript-super-resolution/ │ │ │ │ │ └── src/ │ │ │ │ │ └── models/ │ │ │ │ │ ├── Base.js │ │ │ │ │ ├── Repo.js │ │ │ │ │ └── User.js │ │ │ │ ├── js-call-result-binding/ │ │ │ │ │ ├── app.js │ │ │ │ │ ├── models.js │ │ │ │ │ └── service.js │ │ │ │ ├── js-field-types/ │ │ │ │ │ ├── models.js │ │ │ │ │ └── service.js │ │ │ │ ├── js-fixpoint-for-loop/ │ │ │ │ │ ├── app.js │ │ │ │ │ └── models.js │ │ │ │ ├── js-jsdoc-async-return-type/ │ │ │ │ │ ├── app.js │ │ │ │ │ ├── repo.js │ │ │ │ │ └── user.js │ │ │ │ ├── js-jsdoc-qualified-return-type/ │ │ │ │ │ ├── app.js │ │ │ │ │ └── user.js │ │ │ │ ├── js-jsdoc-return-type/ │ │ │ │ │ ├── app.js │ │ │ │ │ ├── repo.js │ │ │ │ │ └── user.js │ │ │ │ ├── js-nullable-receiver/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── app.js │ │ │ │ │ ├── repo.js │ │ │ │ │ └── user.js │ │ │ │ ├── js-object-destructuring/ │ │ │ │ │ ├── app.js │ │ │ │ │ ├── models.js │ │ │ │ │ └── service.js │ │ │ │ ├── js-write-access/ │ │ │ │ │ ├── models.js │ │ │ │ │ └── service.js │ │ │ │ ├── kotlin-alias-imports/ │ │ │ │ │ ├── app/ │ │ │ │ │ │ └── App.kt │ │ │ │ │ └── models/ │ │ │ │ │ └── Models.kt │ │ │ │ ├── kotlin-ambiguous/ │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── Handler.kt │ │ │ │ │ │ └── Runnable.kt │ │ │ │ │ ├── other/ │ │ │ │ │ │ ├── Handler.kt │ │ │ │ │ │ └── Runnable.kt │ │ │ │ │ └── services/ │ │ │ │ │ └── UserHandler.kt │ │ │ │ ├── kotlin-assignment-chain/ │ │ │ │ │ ├── App.kt │ │ │ │ │ └── models/ │ │ │ │ │ ├── Repo.kt │ │ │ │ │ └── User.kt │ │ │ │ ├── kotlin-call-result-binding/ │ │ │ │ │ └── User.kt │ │ │ │ ├── kotlin-calls/ │ │ │ │ │ ├── services/ │ │ │ │ │ │ └── UserService.kt │ │ │ │ │ └── util/ │ │ │ │ │ ├── OneArg.kt │ │ │ │ │ └── ZeroArg.kt │ │ │ │ ├── kotlin-chain-call/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── App.kt │ │ │ │ │ ├── Repo.kt │ │ │ │ │ ├── User.kt │ │ │ │ │ └── UserService.kt │ │ │ │ ├── kotlin-class-method-chain/ │ │ │ │ │ ├── App.kt │ │ │ │ │ └── models/ │ │ │ │ │ ├── Repo.kt │ │ │ │ │ └── User.kt │ │ │ │ ├── kotlin-constructor-calls/ │ │ │ │ │ ├── app/ │ │ │ │ │ │ └── App.kt │ │ │ │ │ └── models/ │ │ │ │ │ └── User.kt │ │ │ │ ├── kotlin-constructor-type-inference/ │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── Repo.kt │ │ │ │ │ │ └── User.kt │ │ │ │ │ └── services/ │ │ │ │ │ └── App.kt │ │ │ │ ├── kotlin-data-class-fields/ │ │ │ │ │ ├── Models.kt │ │ │ │ │ └── Service.kt │ │ │ │ ├── kotlin-deep-field-chain/ │ │ │ │ │ ├── Models.kt │ │ │ │ │ └── Service.kt │ │ │ │ ├── kotlin-default-params/ │ │ │ │ │ └── App.kt │ │ │ │ ├── kotlin-field-types/ │ │ │ │ │ ├── Models.kt │ │ │ │ │ └── Service.kt │ │ │ │ ├── kotlin-foreach/ │ │ │ │ │ ├── App.kt │ │ │ │ │ └── models/ │ │ │ │ │ ├── Repo.kt │ │ │ │ │ └── User.kt │ │ │ │ ├── kotlin-foreach-call-expr/ │ │ │ │ │ ├── Main.kt │ │ │ │ │ └── models/ │ │ │ │ │ ├── Repo.kt │ │ │ │ │ └── User.kt │ │ │ │ ├── kotlin-generic-parent-resolution/ │ │ │ │ │ └── models/ │ │ │ │ │ ├── BaseModel.kt │ │ │ │ │ ├── Repo.kt │ │ │ │ │ └── User.kt │ │ │ │ ├── kotlin-grandparent-resolution/ │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── A.kt │ │ │ │ │ │ ├── B.kt │ │ │ │ │ │ ├── C.kt │ │ │ │ │ │ └── Greeting.kt │ │ │ │ │ └── services/ │ │ │ │ │ └── App.kt │ │ │ │ ├── kotlin-heritage/ │ │ │ │ │ ├── interfaces/ │ │ │ │ │ │ ├── Serializable.kt │ │ │ │ │ │ └── Validatable.kt │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── BaseModel.kt │ │ │ │ │ │ └── User.kt │ │ │ │ │ └── services/ │ │ │ │ │ └── UserService.kt │ │ │ │ ├── kotlin-local-shadow/ │ │ │ │ │ └── src/ │ │ │ │ │ └── main/ │ │ │ │ │ └── kotlin/ │ │ │ │ │ ├── app/ │ │ │ │ │ │ └── Main.kt │ │ │ │ │ └── utils/ │ │ │ │ │ └── Logger.kt │ │ │ │ ├── kotlin-map-keys-values/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── App.kt │ │ │ │ │ ├── Repo.kt │ │ │ │ │ └── User.kt │ │ │ │ ├── kotlin-member-calls/ │ │ │ │ │ ├── models/ │ │ │ │ │ │ └── User.kt │ │ │ │ │ └── services/ │ │ │ │ │ └── UserService.kt │ │ │ │ ├── kotlin-method-chain-binding/ │ │ │ │ │ └── Models.kt │ │ │ │ ├── kotlin-null-check-narrowing/ │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── Repo.kt │ │ │ │ │ │ └── User.kt │ │ │ │ │ └── services/ │ │ │ │ │ └── App.kt │ │ │ │ ├── kotlin-nullable-receiver/ │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── Repo.kt │ │ │ │ │ │ └── User.kt │ │ │ │ │ └── services/ │ │ │ │ │ └── App.kt │ │ │ │ ├── kotlin-overload-param-types/ │ │ │ │ │ └── services/ │ │ │ │ │ └── UserService.kt │ │ │ │ ├── kotlin-parent-resolution/ │ │ │ │ │ └── models/ │ │ │ │ │ ├── BaseModel.kt │ │ │ │ │ ├── Serializable.kt │ │ │ │ │ └── User.kt │ │ │ │ ├── kotlin-receiver-resolution/ │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── Repo.kt │ │ │ │ │ │ └── User.kt │ │ │ │ │ └── services/ │ │ │ │ │ └── App.kt │ │ │ │ ├── kotlin-return-type/ │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── Repo.kt │ │ │ │ │ │ └── User.kt │ │ │ │ │ └── services/ │ │ │ │ │ └── App.kt │ │ │ │ ├── kotlin-self-this-resolution/ │ │ │ │ │ └── models/ │ │ │ │ │ ├── AppConfig.kt │ │ │ │ │ ├── Repo.kt │ │ │ │ │ └── User.kt │ │ │ │ ├── kotlin-super-resolution/ │ │ │ │ │ └── models/ │ │ │ │ │ ├── BaseModel.kt │ │ │ │ │ ├── Repo.kt │ │ │ │ │ └── User.kt │ │ │ │ ├── kotlin-var-foreach/ │ │ │ │ │ ├── app.kt │ │ │ │ │ └── models/ │ │ │ │ │ ├── Repo.kt │ │ │ │ │ └── User.kt │ │ │ │ ├── kotlin-variadic-resolution/ │ │ │ │ │ ├── app/ │ │ │ │ │ │ └── App.kt │ │ │ │ │ └── util/ │ │ │ │ │ └── Logger.kt │ │ │ │ ├── kotlin-virtual-dispatch/ │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── Animal.kt │ │ │ │ │ │ └── Dog.kt │ │ │ │ │ └── services/ │ │ │ │ │ └── App.kt │ │ │ │ ├── kotlin-when-complex/ │ │ │ │ │ ├── App.kt │ │ │ │ │ └── models/ │ │ │ │ │ ├── Admin.kt │ │ │ │ │ ├── Repo.kt │ │ │ │ │ └── User.kt │ │ │ │ ├── kotlin-when-pattern/ │ │ │ │ │ ├── App.kt │ │ │ │ │ └── models/ │ │ │ │ │ ├── Repo.kt │ │ │ │ │ └── User.kt │ │ │ │ ├── kotlin-write-access/ │ │ │ │ │ ├── Models.kt │ │ │ │ │ └── Service.kt │ │ │ │ ├── php-alias-imports/ │ │ │ │ │ ├── app/ │ │ │ │ │ │ ├── Models/ │ │ │ │ │ │ │ ├── Repo.php │ │ │ │ │ │ │ └── User.php │ │ │ │ │ │ └── Services/ │ │ │ │ │ │ └── Main.php │ │ │ │ │ └── composer.json │ │ │ │ ├── php-ambiguous/ │ │ │ │ │ ├── app/ │ │ │ │ │ │ ├── Models/ │ │ │ │ │ │ │ ├── Dispatchable.php │ │ │ │ │ │ │ └── Handler.php │ │ │ │ │ │ ├── Other/ │ │ │ │ │ │ │ ├── Dispatchable.php │ │ │ │ │ │ │ └── Handler.php │ │ │ │ │ │ └── Services/ │ │ │ │ │ │ └── UserHandler.php │ │ │ │ │ └── composer.json │ │ │ │ ├── php-app/ │ │ │ │ │ ├── app/ │ │ │ │ │ │ ├── Contracts/ │ │ │ │ │ │ │ ├── Loggable.php │ │ │ │ │ │ │ └── Repository.php │ │ │ │ │ │ ├── Enums/ │ │ │ │ │ │ │ └── UserRole.php │ │ │ │ │ │ ├── Models/ │ │ │ │ │ │ │ ├── BaseModel.php │ │ │ │ │ │ │ └── User.php │ │ │ │ │ │ ├── Services/ │ │ │ │ │ │ │ └── UserService.php │ │ │ │ │ │ └── Traits/ │ │ │ │ │ │ ├── HasTimestamps.php │ │ │ │ │ │ └── SoftDeletes.php │ │ │ │ │ └── composer.json │ │ │ │ ├── php-assignment-chain/ │ │ │ │ │ ├── app/ │ │ │ │ │ │ ├── Models/ │ │ │ │ │ │ │ ├── Repo.php │ │ │ │ │ │ │ └── User.php │ │ │ │ │ │ └── Services/ │ │ │ │ │ │ └── AppService.php │ │ │ │ │ └── composer.json │ │ │ │ ├── php-call-result-binding/ │ │ │ │ │ └── App.php │ │ │ │ ├── php-calls/ │ │ │ │ │ ├── app/ │ │ │ │ │ │ ├── Services/ │ │ │ │ │ │ │ └── UserService.php │ │ │ │ │ │ └── Utils/ │ │ │ │ │ │ ├── OneArg/ │ │ │ │ │ │ │ └── log.php │ │ │ │ │ │ └── ZeroArg/ │ │ │ │ │ │ └── log.php │ │ │ │ │ └── composer.json │ │ │ │ ├── php-constructor-calls/ │ │ │ │ │ ├── Models/ │ │ │ │ │ │ └── User.php │ │ │ │ │ └── app.php │ │ │ │ ├── php-constructor-promotion-fields/ │ │ │ │ │ ├── Models.php │ │ │ │ │ └── Service.php │ │ │ │ ├── php-constructor-type-inference/ │ │ │ │ │ ├── app/ │ │ │ │ │ │ ├── Models/ │ │ │ │ │ │ │ ├── Repo.php │ │ │ │ │ │ │ └── User.php │ │ │ │ │ │ └── Services/ │ │ │ │ │ │ └── AppService.php │ │ │ │ │ └── composer.json │ │ │ │ ├── php-deep-field-chain/ │ │ │ │ │ ├── Models.php │ │ │ │ │ └── Service.php │ │ │ │ ├── php-default-params/ │ │ │ │ │ └── app.php │ │ │ │ ├── php-field-types/ │ │ │ │ │ ├── Models.php │ │ │ │ │ └── Service.php │ │ │ │ ├── php-foreach-call-expr/ │ │ │ │ │ ├── Repo.php │ │ │ │ │ ├── User.php │ │ │ │ │ └── main.php │ │ │ │ ├── php-foreach-generic/ │ │ │ │ │ ├── App.php │ │ │ │ │ ├── Repo.php │ │ │ │ │ └── User.php │ │ │ │ ├── php-foreach-loop/ │ │ │ │ │ ├── App.php │ │ │ │ │ ├── Repo.php │ │ │ │ │ └── User.php │ │ │ │ ├── php-foreach-member-access/ │ │ │ │ │ ├── App.php │ │ │ │ │ ├── Repo.php │ │ │ │ │ └── User.php │ │ │ │ ├── php-grandparent-resolution/ │ │ │ │ │ ├── app/ │ │ │ │ │ │ ├── Models/ │ │ │ │ │ │ │ ├── A.php │ │ │ │ │ │ │ ├── B.php │ │ │ │ │ │ │ ├── C.php │ │ │ │ │ │ │ └── Greeting.php │ │ │ │ │ │ └── Services/ │ │ │ │ │ │ └── App.php │ │ │ │ │ └── composer.json │ │ │ │ ├── php-grouped-imports/ │ │ │ │ │ ├── app/ │ │ │ │ │ │ ├── Models/ │ │ │ │ │ │ │ ├── Repo.php │ │ │ │ │ │ │ └── User.php │ │ │ │ │ │ └── Services/ │ │ │ │ │ │ └── Main.php │ │ │ │ │ └── composer.json │ │ │ │ ├── php-local-shadow/ │ │ │ │ │ ├── app/ │ │ │ │ │ │ ├── Services/ │ │ │ │ │ │ │ └── Main.php │ │ │ │ │ │ └── Utils/ │ │ │ │ │ │ └── Logger.php │ │ │ │ │ └── composer.json │ │ │ │ ├── php-member-calls/ │ │ │ │ │ ├── app/ │ │ │ │ │ │ ├── Models/ │ │ │ │ │ │ │ └── User.php │ │ │ │ │ │ └── Services/ │ │ │ │ │ │ └── UserService.php │ │ │ │ │ └── composer.json │ │ │ │ ├── php-method-chain-binding/ │ │ │ │ │ └── App.php │ │ │ │ ├── php-nullable-receiver/ │ │ │ │ │ ├── app/ │ │ │ │ │ │ ├── Models/ │ │ │ │ │ │ │ ├── Repo.php │ │ │ │ │ │ │ └── User.php │ │ │ │ │ │ └── Services/ │ │ │ │ │ │ └── AppService.php │ │ │ │ │ └── composer.json │ │ │ │ ├── php-parent-resolution/ │ │ │ │ │ ├── app/ │ │ │ │ │ │ └── Models/ │ │ │ │ │ │ ├── BaseModel.php │ │ │ │ │ │ ├── Serializable.php │ │ │ │ │ │ └── User.php │ │ │ │ │ └── composer.json │ │ │ │ ├── php-phpdoc-attribute-return-type/ │ │ │ │ │ ├── Models.php │ │ │ │ │ └── Services.php │ │ │ │ ├── php-phpdoc-return-type/ │ │ │ │ │ ├── Models.php │ │ │ │ │ └── Services.php │ │ │ │ ├── php-property-promotion/ │ │ │ │ │ └── UserService.php │ │ │ │ ├── php-receiver-resolution/ │ │ │ │ │ ├── app/ │ │ │ │ │ │ ├── Models/ │ │ │ │ │ │ │ ├── Repo.php │ │ │ │ │ │ │ └── User.php │ │ │ │ │ │ └── Services/ │ │ │ │ │ │ └── AppService.php │ │ │ │ │ └── composer.json │ │ │ │ ├── php-return-type/ │ │ │ │ │ └── app/ │ │ │ │ │ ├── Models/ │ │ │ │ │ │ ├── Repo.php │ │ │ │ │ │ └── User.php │ │ │ │ │ └── Services/ │ │ │ │ │ └── UserService.php │ │ │ │ ├── php-self-this-resolution/ │ │ │ │ │ ├── app/ │ │ │ │ │ │ └── Models/ │ │ │ │ │ │ ├── Repo.php │ │ │ │ │ │ └── User.php │ │ │ │ │ └── composer.json │ │ │ │ ├── php-super-resolution/ │ │ │ │ │ ├── app/ │ │ │ │ │ │ └── Models/ │ │ │ │ │ │ ├── BaseModel.php │ │ │ │ │ │ ├── Repo.php │ │ │ │ │ │ └── User.php │ │ │ │ │ └── composer.json │ │ │ │ ├── php-this-receiver-disambiguation/ │ │ │ │ │ ├── AdminService.php │ │ │ │ │ ├── Models.php │ │ │ │ │ └── UserService.php │ │ │ │ ├── php-typed-properties/ │ │ │ │ │ ├── app/ │ │ │ │ │ │ ├── Models/ │ │ │ │ │ │ │ └── UserRepo.php │ │ │ │ │ │ └── Services/ │ │ │ │ │ │ └── UserService.php │ │ │ │ │ └── composer.json │ │ │ │ ├── php-variadic-resolution/ │ │ │ │ │ ├── app/ │ │ │ │ │ │ ├── Services/ │ │ │ │ │ │ │ └── AppService.php │ │ │ │ │ │ └── Utils/ │ │ │ │ │ │ └── Logger.php │ │ │ │ │ └── composer.json │ │ │ │ ├── php-write-access/ │ │ │ │ │ ├── models.php │ │ │ │ │ └── service.php │ │ │ │ ├── python-alias-imports/ │ │ │ │ │ ├── app.py │ │ │ │ │ └── models.py │ │ │ │ ├── python-ambiguous/ │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── handler.py │ │ │ │ │ ├── other/ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── handler.py │ │ │ │ │ └── services/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── user_handler.py │ │ │ │ ├── python-assignment-chain/ │ │ │ │ │ ├── app.py │ │ │ │ │ ├── repo.py │ │ │ │ │ └── user.py │ │ │ │ ├── python-bare-import/ │ │ │ │ │ ├── models/ │ │ │ │ │ │ └── user.py │ │ │ │ │ └── services/ │ │ │ │ │ ├── auth.py │ │ │ │ │ └── user.py │ │ │ │ ├── python-call-result-binding/ │ │ │ │ │ ├── app.py │ │ │ │ │ ├── models.py │ │ │ │ │ └── service.py │ │ │ │ ├── python-calls/ │ │ │ │ │ ├── one.py │ │ │ │ │ ├── service.py │ │ │ │ │ └── zero.py │ │ │ │ ├── python-chain-call/ │ │ │ │ │ ├── app.py │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── repo.py │ │ │ │ │ │ └── user.py │ │ │ │ │ └── service.py │ │ │ │ ├── python-class-annotations/ │ │ │ │ │ ├── repo.py │ │ │ │ │ ├── service.py │ │ │ │ │ └── user.py │ │ │ │ ├── python-constructor-calls/ │ │ │ │ │ ├── app.py │ │ │ │ │ └── models.py │ │ │ │ ├── python-constructor-type-inference/ │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── repo.py │ │ │ │ │ │ └── user.py │ │ │ │ │ └── services/ │ │ │ │ │ └── app.py │ │ │ │ ├── python-default-params/ │ │ │ │ │ └── app.py │ │ │ │ ├── python-dict-items-loop/ │ │ │ │ │ ├── app.py │ │ │ │ │ ├── repo.py │ │ │ │ │ └── user.py │ │ │ │ ├── python-enumerate-loop/ │ │ │ │ │ ├── app.py │ │ │ │ │ ├── repo.py │ │ │ │ │ └── user.py │ │ │ │ ├── python-field-type-disambig/ │ │ │ │ │ ├── address.py │ │ │ │ │ ├── service.py │ │ │ │ │ └── user.py │ │ │ │ ├── python-field-types/ │ │ │ │ │ ├── models.py │ │ │ │ │ └── service.py │ │ │ │ ├── python-for-call-expr/ │ │ │ │ │ ├── main.py │ │ │ │ │ └── models.py │ │ │ │ ├── python-grandparent-resolution/ │ │ │ │ │ ├── app.py │ │ │ │ │ └── models/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── a.py │ │ │ │ │ ├── b.py │ │ │ │ │ ├── c.py │ │ │ │ │ └── greeting.py │ │ │ │ ├── python-local-shadow/ │ │ │ │ │ ├── app.py │ │ │ │ │ └── utils.py │ │ │ │ ├── python-match-case/ │ │ │ │ │ ├── app.py │ │ │ │ │ └── models/ │ │ │ │ │ ├── repo.py │ │ │ │ │ └── user.py │ │ │ │ ├── python-member-access-for-loop/ │ │ │ │ │ ├── app.py │ │ │ │ │ └── models/ │ │ │ │ │ ├── repo.py │ │ │ │ │ └── user.py │ │ │ │ ├── python-member-calls/ │ │ │ │ │ ├── app.py │ │ │ │ │ └── user.py │ │ │ │ ├── python-method-chain-binding/ │ │ │ │ │ ├── app.py │ │ │ │ │ └── models.py │ │ │ │ ├── python-named-imports/ │ │ │ │ │ ├── app.py │ │ │ │ │ ├── format_prefix.py │ │ │ │ │ └── format_upper.py │ │ │ │ ├── python-nullable-chain/ │ │ │ │ │ ├── app.py │ │ │ │ │ ├── repo.py │ │ │ │ │ └── user.py │ │ │ │ ├── python-nullable-receiver/ │ │ │ │ │ ├── app.py │ │ │ │ │ ├── repo.py │ │ │ │ │ └── user.py │ │ │ │ ├── python-parent-resolution/ │ │ │ │ │ └── models/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ └── user.py │ │ │ │ ├── python-pkg/ │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── base.py │ │ │ │ │ │ └── user.py │ │ │ │ │ ├── services/ │ │ │ │ │ │ └── auth.py │ │ │ │ │ └── utils/ │ │ │ │ │ └── helpers.py │ │ │ │ ├── python-qualified-constructor/ │ │ │ │ │ ├── main.py │ │ │ │ │ └── models.py │ │ │ │ ├── python-receiver-resolution/ │ │ │ │ │ ├── app.py │ │ │ │ │ ├── repo.py │ │ │ │ │ └── user.py │ │ │ │ ├── python-reexport-chain/ │ │ │ │ │ ├── app.py │ │ │ │ │ └── models/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── base.py │ │ │ │ ├── python-return-type-inference/ │ │ │ │ │ ├── app.py │ │ │ │ │ ├── models.py │ │ │ │ │ └── service.py │ │ │ │ ├── python-self-this-resolution/ │ │ │ │ │ └── models/ │ │ │ │ │ ├── repo.py │ │ │ │ │ └── user.py │ │ │ │ ├── python-static-class-methods/ │ │ │ │ │ ├── app.py │ │ │ │ │ └── service.py │ │ │ │ ├── python-super-resolution/ │ │ │ │ │ └── models/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base.py │ │ │ │ │ ├── repo.py │ │ │ │ │ └── user.py │ │ │ │ ├── python-variadic-resolution/ │ │ │ │ │ ├── app.py │ │ │ │ │ └── logger.py │ │ │ │ ├── python-walrus-chain/ │ │ │ │ │ ├── app.py │ │ │ │ │ ├── repo.py │ │ │ │ │ └── user.py │ │ │ │ ├── python-walrus-operator/ │ │ │ │ │ ├── main.py │ │ │ │ │ └── models.py │ │ │ │ ├── python-write-access/ │ │ │ │ │ ├── models.py │ │ │ │ │ └── service.py │ │ │ │ ├── ruby-ambiguous/ │ │ │ │ │ ├── lib/ │ │ │ │ │ │ └── user_handler.rb │ │ │ │ │ ├── models/ │ │ │ │ │ │ └── handler.rb │ │ │ │ │ └── other/ │ │ │ │ │ └── handler.rb │ │ │ │ ├── ruby-app/ │ │ │ │ │ └── lib/ │ │ │ │ │ ├── base_model.rb │ │ │ │ │ ├── concerns/ │ │ │ │ │ │ ├── cacheable.rb │ │ │ │ │ │ ├── loggable.rb │ │ │ │ │ │ └── serializable.rb │ │ │ │ │ ├── service.rb │ │ │ │ │ └── user.rb │ │ │ │ ├── ruby-call-result-binding/ │ │ │ │ │ └── app.rb │ │ │ │ ├── ruby-calls/ │ │ │ │ │ └── lib/ │ │ │ │ │ ├── one_arg.rb │ │ │ │ │ ├── service.rb │ │ │ │ │ └── two_args.rb │ │ │ │ ├── ruby-chain-call/ │ │ │ │ │ └── lib/ │ │ │ │ │ ├── app.rb │ │ │ │ │ ├── repo.rb │ │ │ │ │ ├── user.rb │ │ │ │ │ └── user_service.rb │ │ │ │ ├── ruby-constant-constructor/ │ │ │ │ │ ├── app.rb │ │ │ │ │ └── models.rb │ │ │ │ ├── ruby-constant-factory-call/ │ │ │ │ │ ├── admin_service.rb │ │ │ │ │ ├── app.rb │ │ │ │ │ └── user_service.rb │ │ │ │ ├── ruby-constructor-type-inference/ │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── repo.rb │ │ │ │ │ │ └── user.rb │ │ │ │ │ └── services/ │ │ │ │ │ └── app.rb │ │ │ │ ├── ruby-default-params/ │ │ │ │ │ └── app.rb │ │ │ │ ├── ruby-field-type-disambig/ │ │ │ │ │ ├── address.rb │ │ │ │ │ ├── service.rb │ │ │ │ │ └── user.rb │ │ │ │ ├── ruby-field-types/ │ │ │ │ │ ├── models.rb │ │ │ │ │ └── service.rb │ │ │ │ ├── ruby-for-in-loop/ │ │ │ │ │ ├── app.rb │ │ │ │ │ ├── repo.rb │ │ │ │ │ └── user.rb │ │ │ │ ├── ruby-grandparent-resolution/ │ │ │ │ │ └── lib/ │ │ │ │ │ ├── app.rb │ │ │ │ │ └── models/ │ │ │ │ │ ├── a.rb │ │ │ │ │ ├── b.rb │ │ │ │ │ ├── c.rb │ │ │ │ │ └── greeting.rb │ │ │ │ ├── ruby-local-shadow/ │ │ │ │ │ └── lib/ │ │ │ │ │ ├── app.rb │ │ │ │ │ └── utils.rb │ │ │ │ ├── ruby-member-calls/ │ │ │ │ │ └── lib/ │ │ │ │ │ ├── app.rb │ │ │ │ │ └── user.rb │ │ │ │ ├── ruby-method-chain-binding/ │ │ │ │ │ └── app.rb │ │ │ │ ├── ruby-namespaced-constructor/ │ │ │ │ │ ├── app.rb │ │ │ │ │ └── models/ │ │ │ │ │ └── user_service.rb │ │ │ │ ├── ruby-parent-resolution/ │ │ │ │ │ └── lib/ │ │ │ │ │ └── models/ │ │ │ │ │ ├── base_model.rb │ │ │ │ │ ├── serializable.rb │ │ │ │ │ └── user.rb │ │ │ │ ├── ruby-return-type/ │ │ │ │ │ ├── app.rb │ │ │ │ │ ├── models.rb │ │ │ │ │ └── repo.rb │ │ │ │ ├── ruby-self-this-resolution/ │ │ │ │ │ └── lib/ │ │ │ │ │ └── models/ │ │ │ │ │ ├── repo.rb │ │ │ │ │ └── user.rb │ │ │ │ ├── ruby-super-resolution/ │ │ │ │ │ └── lib/ │ │ │ │ │ └── models/ │ │ │ │ │ ├── base_model.rb │ │ │ │ │ ├── repo.rb │ │ │ │ │ └── user.rb │ │ │ │ ├── ruby-write-access/ │ │ │ │ │ ├── models.rb │ │ │ │ │ └── service.rb │ │ │ │ ├── ruby-yard-annotations/ │ │ │ │ │ ├── models.rb │ │ │ │ │ └── service.rb │ │ │ │ ├── ruby-yard-generics/ │ │ │ │ │ ├── models.rb │ │ │ │ │ └── service.rb │ │ │ │ ├── rust-alias-imports/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── main.rs │ │ │ │ │ └── models.rs │ │ │ │ ├── rust-ambiguous/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── main.rs │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── handler.rs │ │ │ │ │ │ └── mod.rs │ │ │ │ │ ├── other/ │ │ │ │ │ │ ├── handler.rs │ │ │ │ │ │ └── mod.rs │ │ │ │ │ └── services/ │ │ │ │ │ └── mod.rs │ │ │ │ ├── rust-assignment-chain/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── main.rs │ │ │ │ │ ├── repo.rs │ │ │ │ │ └── user.rs │ │ │ │ ├── rust-async-binding/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── main.rs │ │ │ │ │ ├── repo.rs │ │ │ │ │ └── user.rs │ │ │ │ ├── rust-call-result-binding/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── main.rs │ │ │ │ │ └── models.rs │ │ │ │ ├── rust-calls/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── main.rs │ │ │ │ │ ├── onearg/ │ │ │ │ │ │ └── mod.rs │ │ │ │ │ └── zeroarg/ │ │ │ │ │ └── mod.rs │ │ │ │ ├── rust-chain-call/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── main.rs │ │ │ │ │ └── models/ │ │ │ │ │ ├── mod.rs │ │ │ │ │ ├── repo.rs │ │ │ │ │ └── user.rs │ │ │ │ ├── rust-constructor-type-inference/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── main.rs │ │ │ │ │ ├── repo.rs │ │ │ │ │ └── user.rs │ │ │ │ ├── rust-deep-field-chain/ │ │ │ │ │ ├── models.rs │ │ │ │ │ └── service.rs │ │ │ │ ├── rust-default-constructor/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── main.rs │ │ │ │ │ ├── repo.rs │ │ │ │ │ └── user.rs │ │ │ │ ├── rust-err-unwrap/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── error.rs │ │ │ │ │ ├── main.rs │ │ │ │ │ ├── repo.rs │ │ │ │ │ └── user.rs │ │ │ │ ├── rust-field-types/ │ │ │ │ │ ├── models.rs │ │ │ │ │ └── service.rs │ │ │ │ ├── rust-for-call-expr/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── main.rs │ │ │ │ │ ├── repo.rs │ │ │ │ │ └── user.rs │ │ │ │ ├── rust-for-loop/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── main.rs │ │ │ │ │ ├── repo.rs │ │ │ │ │ └── user.rs │ │ │ │ ├── rust-grouped-imports/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── helpers/ │ │ │ │ │ │ └── mod.rs │ │ │ │ │ └── main.rs │ │ │ │ ├── rust-if-let/ │ │ │ │ │ ├── main.rs │ │ │ │ │ └── models.rs │ │ │ │ ├── rust-if-let-unwrap/ │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── mod.rs │ │ │ │ │ │ ├── repo.rs │ │ │ │ │ │ └── user.rs │ │ │ │ │ └── src/ │ │ │ │ │ ├── main.rs │ │ │ │ │ ├── repo.rs │ │ │ │ │ └── user.rs │ │ │ │ ├── rust-iter-for-loop/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── main.rs │ │ │ │ │ ├── repo.rs │ │ │ │ │ └── user.rs │ │ │ │ ├── rust-local-shadow/ │ │ │ │ │ ├── Cargo.toml │ │ │ │ │ └── src/ │ │ │ │ │ ├── main.rs │ │ │ │ │ └── utils.rs │ │ │ │ ├── rust-match-unwrap/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── main.rs │ │ │ │ │ ├── repo.rs │ │ │ │ │ └── user.rs │ │ │ │ ├── rust-member-calls/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── main.rs │ │ │ │ │ └── user.rs │ │ │ │ ├── rust-method-chain-binding/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── main.rs │ │ │ │ │ └── models.rs │ │ │ │ ├── rust-nullable-receiver/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── main.rs │ │ │ │ │ ├── repo.rs │ │ │ │ │ └── user.rs │ │ │ │ ├── rust-option-receiver/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── main.rs │ │ │ │ │ ├── repo.rs │ │ │ │ │ └── user.rs │ │ │ │ ├── rust-parent-resolution/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── lib.rs │ │ │ │ │ ├── serializable.rs │ │ │ │ │ └── user.rs │ │ │ │ ├── rust-receiver-resolution/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── main.rs │ │ │ │ │ ├── repo.rs │ │ │ │ │ └── user.rs │ │ │ │ ├── rust-reexport-chain/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── main.rs │ │ │ │ │ └── models/ │ │ │ │ │ ├── handler.rs │ │ │ │ │ └── mod.rs │ │ │ │ ├── rust-return-type/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── main.rs │ │ │ │ │ └── models.rs │ │ │ │ ├── rust-return-type-inference/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── main.rs │ │ │ │ │ └── models.rs │ │ │ │ ├── rust-self-struct-literal/ │ │ │ │ │ ├── main.rs │ │ │ │ │ └── models.rs │ │ │ │ ├── rust-self-this-resolution/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── repo.rs │ │ │ │ │ └── user.rs │ │ │ │ ├── rust-struct-destructuring/ │ │ │ │ │ ├── main.rs │ │ │ │ │ ├── point.rs │ │ │ │ │ └── vec2.rs │ │ │ │ ├── rust-struct-literal-inference/ │ │ │ │ │ ├── main.rs │ │ │ │ │ └── models.rs │ │ │ │ ├── rust-struct-literals/ │ │ │ │ │ ├── app.rs │ │ │ │ │ └── user.rs │ │ │ │ ├── rust-traits/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── impls/ │ │ │ │ │ │ └── button.rs │ │ │ │ │ ├── main.rs │ │ │ │ │ └── traits/ │ │ │ │ │ ├── clickable.rs │ │ │ │ │ └── drawable.rs │ │ │ │ ├── rust-write-access/ │ │ │ │ │ ├── models.rs │ │ │ │ │ └── service.rs │ │ │ │ ├── swift-constructor-type-inference/ │ │ │ │ │ ├── Models/ │ │ │ │ │ │ ├── Repo.swift │ │ │ │ │ │ └── User.swift │ │ │ │ │ └── Services/ │ │ │ │ │ └── App.swift │ │ │ │ ├── swift-init-cross-file/ │ │ │ │ │ ├── User.swift │ │ │ │ │ └── main.swift │ │ │ │ ├── swift-parent-resolution/ │ │ │ │ │ └── Sources/ │ │ │ │ │ └── Models/ │ │ │ │ │ ├── BaseModel.swift │ │ │ │ │ ├── Serializable.swift │ │ │ │ │ └── User.swift │ │ │ │ ├── swift-return-type/ │ │ │ │ │ ├── App.swift │ │ │ │ │ └── Models.swift │ │ │ │ ├── swift-return-type-inference/ │ │ │ │ │ ├── App.swift │ │ │ │ │ └── Models.swift │ │ │ │ ├── swift-self-this-resolution/ │ │ │ │ │ └── Sources/ │ │ │ │ │ └── Models/ │ │ │ │ │ ├── Repo.swift │ │ │ │ │ └── User.swift │ │ │ │ ├── ts-assignment-chain/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── app.ts │ │ │ │ │ ├── repo.ts │ │ │ │ │ └── user.ts │ │ │ │ ├── ts-call-result-binding/ │ │ │ │ │ ├── app.ts │ │ │ │ │ ├── models.ts │ │ │ │ │ └── service.ts │ │ │ │ ├── ts-class-field-foreach/ │ │ │ │ │ ├── app.ts │ │ │ │ │ └── models/ │ │ │ │ │ ├── repo.ts │ │ │ │ │ └── user.ts │ │ │ │ ├── ts-deep-field-chain/ │ │ │ │ │ ├── models.ts │ │ │ │ │ └── service.ts │ │ │ │ ├── ts-field-type-disambig/ │ │ │ │ │ ├── address.ts │ │ │ │ │ ├── service.ts │ │ │ │ │ └── user.ts │ │ │ │ ├── ts-fixpoint-for-loop/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── app.ts │ │ │ │ │ └── models.ts │ │ │ │ ├── ts-grandparent-resolution/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── app.ts │ │ │ │ │ ├── base.ts │ │ │ │ │ ├── derived.ts │ │ │ │ │ ├── greeting.ts │ │ │ │ │ └── middle.ts │ │ │ │ ├── ts-method-chain-binding/ │ │ │ │ │ ├── app.ts │ │ │ │ │ ├── models.ts │ │ │ │ │ └── service.ts │ │ │ │ ├── ts-mixed-chain/ │ │ │ │ │ ├── models.ts │ │ │ │ │ └── service.ts │ │ │ │ ├── ts-multi-hop-chain/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── app.ts │ │ │ │ │ ├── repo.ts │ │ │ │ │ └── user.ts │ │ │ │ ├── ts-null-check-narrowing/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── app.ts │ │ │ │ │ └── models.ts │ │ │ │ ├── ts-nullable-chain/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── app.ts │ │ │ │ │ ├── repo.ts │ │ │ │ │ └── user.ts │ │ │ │ ├── ts-nullable-receiver/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── app.ts │ │ │ │ │ ├── repo.ts │ │ │ │ │ └── user.ts │ │ │ │ ├── ts-object-destructuring/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── app.ts │ │ │ │ │ ├── models.ts │ │ │ │ │ └── service.ts │ │ │ │ ├── ts-optional-params/ │ │ │ │ │ └── src/ │ │ │ │ │ └── app.ts │ │ │ │ ├── ts-overload-disambiguation/ │ │ │ │ │ └── src/ │ │ │ │ │ └── app.ts │ │ │ │ ├── ts-param-property-fields/ │ │ │ │ │ ├── models.ts │ │ │ │ │ └── service.ts │ │ │ │ ├── ts-readonly-foreach/ │ │ │ │ │ ├── app.ts │ │ │ │ │ └── models/ │ │ │ │ │ ├── repo.ts │ │ │ │ │ └── user.ts │ │ │ │ ├── ts-return-type-inference/ │ │ │ │ │ ├── app.ts │ │ │ │ │ ├── models.ts │ │ │ │ │ └── service.ts │ │ │ │ ├── ts-virtual-dispatch/ │ │ │ │ │ └── src/ │ │ │ │ │ └── app.ts │ │ │ │ ├── ts-write-access/ │ │ │ │ │ ├── models.ts │ │ │ │ │ └── service.ts │ │ │ │ ├── typescript-alias-imports/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── app.ts │ │ │ │ │ └── models.ts │ │ │ │ ├── typescript-ambiguous/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── logger.ts │ │ │ │ │ ├── models.ts │ │ │ │ │ └── service.ts │ │ │ │ ├── typescript-calls/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── one.ts │ │ │ │ │ ├── service.ts │ │ │ │ │ └── zero.ts │ │ │ │ ├── typescript-cast-constructor-inference/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── app.ts │ │ │ │ │ ├── repo.ts │ │ │ │ │ └── user.ts │ │ │ │ ├── typescript-chain-call/ │ │ │ │ │ ├── app.ts │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── Repo.ts │ │ │ │ │ │ └── User.ts │ │ │ │ │ └── services/ │ │ │ │ │ └── UserService.ts │ │ │ │ ├── typescript-constructor-calls/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── app.ts │ │ │ │ │ └── user.ts │ │ │ │ ├── typescript-constructor-type-inference/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── app.ts │ │ │ │ │ ├── repo.ts │ │ │ │ │ └── user.ts │ │ │ │ ├── typescript-destructured-for-of/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── app.ts │ │ │ │ │ ├── repo.ts │ │ │ │ │ └── user.ts │ │ │ │ ├── typescript-double-cast-inference/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── app.ts │ │ │ │ │ ├── repo.ts │ │ │ │ │ └── user.ts │ │ │ │ ├── typescript-for-of-call-expr/ │ │ │ │ │ ├── main.ts │ │ │ │ │ └── models/ │ │ │ │ │ ├── repo.ts │ │ │ │ │ └── user.ts │ │ │ │ ├── typescript-generic-parent-resolution/ │ │ │ │ │ └── src/ │ │ │ │ │ └── models/ │ │ │ │ │ ├── Base.ts │ │ │ │ │ ├── Repo.ts │ │ │ │ │ └── User.ts │ │ │ │ ├── typescript-instanceof-narrowing/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── app.ts │ │ │ │ │ ├── repo.ts │ │ │ │ │ └── user.ts │ │ │ │ ├── typescript-local-shadow/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── app.ts │ │ │ │ │ └── utils.ts │ │ │ │ ├── typescript-member-access-for-loop/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── app.ts │ │ │ │ │ └── models/ │ │ │ │ │ ├── Repo.ts │ │ │ │ │ └── User.ts │ │ │ │ ├── typescript-member-calls/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── app.ts │ │ │ │ │ └── user.ts │ │ │ │ ├── typescript-named-imports/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── app.ts │ │ │ │ │ ├── format-prefix.ts │ │ │ │ │ └── format-upper.ts │ │ │ │ ├── typescript-overloaded-receiver/ │ │ │ │ │ ├── app.ts │ │ │ │ │ ├── db/ │ │ │ │ │ │ ├── Cache.ts │ │ │ │ │ │ └── Database.ts │ │ │ │ │ └── models/ │ │ │ │ │ ├── Repo.ts │ │ │ │ │ └── User.ts │ │ │ │ ├── typescript-parent-resolution/ │ │ │ │ │ └── src/ │ │ │ │ │ └── models/ │ │ │ │ │ ├── Base.ts │ │ │ │ │ └── User.ts │ │ │ │ ├── typescript-receiver-resolution/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── app.ts │ │ │ │ │ ├── repo.ts │ │ │ │ │ └── user.ts │ │ │ │ ├── typescript-reexport-chain/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── app.ts │ │ │ │ │ ├── base.ts │ │ │ │ │ └── models.ts │ │ │ │ ├── typescript-reexport-type/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── app.ts │ │ │ │ │ ├── base.ts │ │ │ │ │ └── models.ts │ │ │ │ ├── typescript-scoped-receiver/ │ │ │ │ │ └── src/ │ │ │ │ │ ├── app.ts │ │ │ │ │ ├── repo.ts │ │ │ │ │ └── user.ts │ │ │ │ ├── typescript-self-this-resolution/ │ │ │ │ │ └── src/ │ │ │ │ │ └── models/ │ │ │ │ │ ├── Repo.ts │ │ │ │ │ └── User.ts │ │ │ │ ├── typescript-static-chain/ │ │ │ │ │ ├── app.ts │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── Repo.ts │ │ │ │ │ │ └── User.ts │ │ │ │ │ └── services/ │ │ │ │ │ └── UserService.ts │ │ │ │ ├── typescript-super-resolution/ │ │ │ │ │ └── src/ │ │ │ │ │ └── models/ │ │ │ │ │ ├── Base.ts │ │ │ │ │ ├── Repo.ts │ │ │ │ │ └── User.ts │ │ │ │ ├── typescript-typed-param-chain/ │ │ │ │ │ ├── app.ts │ │ │ │ │ ├── models/ │ │ │ │ │ │ ├── Repo.ts │ │ │ │ │ │ └── User.ts │ │ │ │ │ └── services/ │ │ │ │ │ └── UserService.ts │ │ │ │ └── typescript-variadic-resolution/ │ │ │ │ └── src/ │ │ │ │ ├── app.ts │ │ │ │ └── logger.ts │ │ │ ├── local-backend-seed.ts │ │ │ ├── mini-repo/ │ │ │ │ └── src/ │ │ │ │ ├── db.ts │ │ │ │ ├── formatter.ts │ │ │ │ ├── handler.ts │ │ │ │ ├── index.ts │ │ │ │ ├── logger.ts │ │ │ │ ├── middleware.ts │ │ │ │ └── validator.ts │ │ │ ├── sample-code/ │ │ │ │ ├── simple.c │ │ │ │ ├── simple.cpp │ │ │ │ ├── simple.cs │ │ │ │ ├── simple.go │ │ │ │ ├── simple.java │ │ │ │ ├── simple.js │ │ │ │ ├── simple.php │ │ │ │ ├── simple.py │ │ │ │ ├── simple.rs │ │ │ │ ├── simple.swift │ │ │ │ ├── simple.ts │ │ │ │ ├── simple.tsx │ │ │ │ └── swift-extension.swift │ │ │ └── search-seed.ts │ │ ├── global-setup.ts │ │ ├── helpers/ │ │ │ ├── test-db.ts │ │ │ ├── test-graph.ts │ │ │ └── test-indexed-db.ts │ │ ├── integration/ │ │ │ ├── augmentation.test.ts │ │ │ ├── cli-e2e.test.ts │ │ │ ├── csv-pipeline.test.ts │ │ │ ├── enrichment.test.ts │ │ │ ├── filesystem-walker.test.ts │ │ │ ├── has-method.test.ts │ │ │ ├── hooks-e2e.test.ts │ │ │ ├── ignore-and-skip-e2e.test.ts │ │ │ ├── lbug-core-adapter.test.ts │ │ │ ├── lbug-pool-stability.test.ts │ │ │ ├── lbug-pool.test.ts │ │ │ ├── local-backend-calltool.test.ts │ │ │ ├── local-backend.test.ts │ │ │ ├── parsing.test.ts │ │ │ ├── pipeline.test.ts │ │ │ ├── query-compilation.test.ts │ │ │ ├── resolvers/ │ │ │ │ ├── cpp.test.ts │ │ │ │ ├── csharp.test.ts │ │ │ │ ├── go.test.ts │ │ │ │ ├── helpers.ts │ │ │ │ ├── java.test.ts │ │ │ │ ├── javascript.test.ts │ │ │ │ ├── kotlin.test.ts │ │ │ │ ├── php.test.ts │ │ │ │ ├── python.test.ts │ │ │ │ ├── ruby.test.ts │ │ │ │ ├── rust.test.ts │ │ │ │ ├── swift.test.ts │ │ │ │ └── typescript.test.ts │ │ │ ├── search-core.test.ts │ │ │ ├── search-pool.test.ts │ │ │ ├── setup-skills.test.ts │ │ │ ├── skills-e2e.test.ts │ │ │ ├── tree-sitter-languages.test.ts │ │ │ └── worker-pool.test.ts │ │ ├── unit/ │ │ │ ├── ai-context.test.ts │ │ │ ├── ast-cache.test.ts │ │ │ ├── bm25-search.test.ts │ │ │ ├── call-form.test.ts │ │ │ ├── call-processor.test.ts │ │ │ ├── call-routing.test.ts │ │ │ ├── calltool-dispatch.test.ts │ │ │ ├── cli-commands.test.ts │ │ │ ├── cli-index-help.test.ts │ │ │ ├── cohesion-consistency.test.ts │ │ │ ├── community-processor.test.ts │ │ │ ├── compatible-stdio-transport.test.ts │ │ │ ├── csv-escaping.test.ts │ │ │ ├── embedder.test.ts │ │ │ ├── entry-point-scoring.test.ts │ │ │ ├── eval-formatters.test.ts │ │ │ ├── extract-element-type-from-string.test.ts │ │ │ ├── extract-generic-type-args.test.ts │ │ │ ├── framework-detection.test.ts │ │ │ ├── git.test.ts │ │ │ ├── graph.test.ts │ │ │ ├── has-method.test.ts │ │ │ ├── heritage-processor.test.ts │ │ │ ├── hooks.test.ts │ │ │ ├── hybrid-search.test.ts │ │ │ ├── ignore-service.test.ts │ │ │ ├── import-processor.test.ts │ │ │ ├── ingestion-utils.test.ts │ │ │ ├── language-skip.test.ts │ │ │ ├── lazy-action.test.ts │ │ │ ├── method-signature.test.ts │ │ │ ├── mro-processor.test.ts │ │ │ ├── named-binding-extraction.test.ts │ │ │ ├── parser-loader.test.ts │ │ │ ├── pipeline-exports.test.ts │ │ │ ├── process-processor.test.ts │ │ │ ├── repo-manager.test.ts │ │ │ ├── resources.test.ts │ │ │ ├── schema.test.ts │ │ │ ├── security.test.ts │ │ │ ├── sequential-language-availability.test.ts │ │ │ ├── server.test.ts │ │ │ ├── shared-type-extractors.test.ts │ │ │ ├── skill-gen.test.ts │ │ │ ├── staleness.test.ts │ │ │ ├── structure-processor.test.ts │ │ │ ├── suffix-index-ambiguity.test.ts │ │ │ ├── symbol-resolver.test.ts │ │ │ ├── symbol-table.test.ts │ │ │ ├── tools.test.ts │ │ │ ├── tree-sitter-queries.test.ts │ │ │ ├── type-env.test.ts │ │ │ └── utils.test.ts │ │ ├── utils/ │ │ │ └── hook-test-helpers.ts │ │ └── vitest.d.ts │ ├── tsconfig.json │ ├── tsconfig.test.json │ ├── vendor/ │ │ └── leiden/ │ │ ├── index.cjs │ │ └── utils.cjs │ └── vitest.config.ts ├── gitnexus-claude-plugin/ │ ├── .claude-plugin/ │ │ └── plugin.json │ ├── .mcp.json │ ├── hooks/ │ │ ├── gitnexus-hook.js │ │ └── hooks.json │ └── skills/ │ ├── gitnexus-cli/ │ │ ├── SKILL.md │ │ └── mcp.json │ ├── gitnexus-debugging/ │ │ ├── SKILL.md │ │ └── mcp.json │ ├── gitnexus-exploring/ │ │ ├── SKILL.md │ │ └── mcp.json │ ├── gitnexus-guide/ │ │ ├── SKILL.md │ │ └── mcp.json │ ├── gitnexus-impact-analysis/ │ │ ├── SKILL.md │ │ └── mcp.json │ ├── gitnexus-pr-review/ │ │ └── SKILL.md │ └── gitnexus-refactoring/ │ ├── SKILL.md │ └── mcp.json ├── gitnexus-cursor-integration/ │ ├── hooks/ │ │ ├── augment-shell.sh │ │ └── hooks.json │ └── skills/ │ ├── gitnexus-debugging/ │ │ └── SKILL.md │ ├── gitnexus-exploring/ │ │ └── SKILL.md │ ├── gitnexus-impact-analysis/ │ │ └── SKILL.md │ ├── gitnexus-pr-review/ │ │ └── SKILL.md │ └── gitnexus-refactoring/ │ └── SKILL.md ├── gitnexus-test-setup/ │ └── .gitignore ├── gitnexus-web/ │ ├── .gitignore │ ├── api/ │ │ └── proxy.ts │ ├── index.html │ ├── package.json │ ├── public/ │ │ └── wasm/ │ │ ├── c/ │ │ │ └── tree-sitter-c.wasm │ │ ├── cpp/ │ │ │ └── tree-sitter-cpp.wasm │ │ ├── csharp/ │ │ │ └── tree-sitter-csharp.wasm │ │ ├── go/ │ │ │ └── tree-sitter-go.wasm │ │ ├── java/ │ │ │ └── tree-sitter-java.wasm │ │ ├── javascript/ │ │ │ └── tree-sitter-javascript.wasm │ │ ├── php/ │ │ │ └── tree-sitter-php.wasm │ │ ├── python/ │ │ │ └── tree-sitter-python.wasm │ │ ├── ruby/ │ │ │ └── tree-sitter-ruby.wasm │ │ ├── rust/ │ │ │ └── tree-sitter-rust.wasm │ │ ├── swift/ │ │ │ └── tree-sitter-swift.wasm │ │ ├── tree-sitter.wasm │ │ └── typescript/ │ │ ├── tree-sitter-tsx.wasm │ │ └── tree-sitter-typescript.wasm │ ├── src/ │ │ ├── App.tsx │ │ ├── components/ │ │ │ ├── BackendRepoSelector.tsx │ │ │ ├── CodeReferencesPanel.tsx │ │ │ ├── DropZone.tsx │ │ │ ├── EmbeddingStatus.tsx │ │ │ ├── FileTreePanel.tsx │ │ │ ├── GraphCanvas.tsx │ │ │ ├── Header.tsx │ │ │ ├── LoadingOverlay.tsx │ │ │ ├── MarkdownRenderer.tsx │ │ │ ├── MermaidDiagram.tsx │ │ │ ├── ProcessFlowModal.tsx │ │ │ ├── ProcessesPanel.tsx │ │ │ ├── QueryFAB.tsx │ │ │ ├── RightPanel.tsx │ │ │ ├── SettingsPanel.tsx │ │ │ ├── StatusBar.tsx │ │ │ ├── ToolCallCard.tsx │ │ │ └── WebGPUFallbackDialog.tsx │ │ ├── config/ │ │ │ ├── ignore-service.ts │ │ │ └── supported-languages.ts │ │ ├── core/ │ │ │ ├── embeddings/ │ │ │ │ ├── embedder.ts │ │ │ │ ├── embedding-pipeline.ts │ │ │ │ ├── index.ts │ │ │ │ ├── text-generator.ts │ │ │ │ └── types.ts │ │ │ ├── graph/ │ │ │ │ ├── graph.ts │ │ │ │ └── types.ts │ │ │ ├── ingestion/ │ │ │ │ ├── ast-cache.ts │ │ │ │ ├── call-processor.ts │ │ │ │ ├── call-routing.ts │ │ │ │ ├── cluster-enricher.ts │ │ │ │ ├── community-processor.ts │ │ │ │ ├── entry-point-scoring.ts │ │ │ │ ├── framework-detection.ts │ │ │ │ ├── heritage-processor.ts │ │ │ │ ├── import-processor.ts │ │ │ │ ├── parsing-processor.ts │ │ │ │ ├── pipeline.ts │ │ │ │ ├── process-processor.ts │ │ │ │ ├── structure-processor.ts │ │ │ │ ├── symbol-table.ts │ │ │ │ ├── tree-sitter-queries.ts │ │ │ │ └── utils.ts │ │ │ ├── lbug/ │ │ │ │ ├── csv-generator.ts │ │ │ │ ├── lbug-adapter.ts │ │ │ │ └── schema.ts │ │ │ ├── llm/ │ │ │ │ ├── agent.ts │ │ │ │ ├── context-builder.ts │ │ │ │ ├── index.ts │ │ │ │ ├── settings-service.ts │ │ │ │ ├── tools.ts │ │ │ │ └── types.ts │ │ │ ├── search/ │ │ │ │ ├── bm25-index.ts │ │ │ │ ├── hybrid-search.ts │ │ │ │ └── index.ts │ │ │ └── tree-sitter/ │ │ │ └── parser-loader.ts │ │ ├── hooks/ │ │ │ ├── useAppState.tsx │ │ │ ├── useBackend.ts │ │ │ ├── useSettings.ts │ │ │ └── useSigma.ts │ │ ├── index.css │ │ ├── lib/ │ │ │ ├── constants.ts │ │ │ ├── graph-adapter.ts │ │ │ ├── mermaid-generator.ts │ │ │ └── utils.ts │ │ ├── main.tsx │ │ ├── services/ │ │ │ ├── backend.ts │ │ │ ├── git-clone.ts │ │ │ ├── server-connection.ts │ │ │ └── zip.ts │ │ ├── types/ │ │ │ ├── lbug-wasm.d.ts │ │ │ └── pipeline.ts │ │ ├── vendor/ │ │ │ └── leiden/ │ │ │ ├── index.d.ts │ │ │ ├── index.js │ │ │ └── utils.js │ │ ├── vite-env.d.ts │ │ └── workers/ │ │ └── ingestion.worker.ts │ ├── tsconfig.app.json │ ├── tsconfig.json │ ├── tsconfig.node.json │ ├── vercel.json │ └── vite.config.ts ├── skills.mdm ├── type-resolution-roadmap.md └── type-resolution-system.md ================================================ FILE CONTENTS ================================================ ================================================ FILE: .claude/skills/gitnexus/gitnexus-cli/SKILL.md ================================================ --- name: gitnexus-cli description: "Use when the user needs to run GitNexus CLI commands like analyze/index a repo, check status, clean the index, generate a wiki, or list indexed repos. Examples: \"Index this repo\", \"Reanalyze the codebase\", \"Generate a wiki\"" --- # GitNexus CLI Commands All commands work via `npx` — no global install required. ## Commands ### analyze — Build or refresh the index ```bash npx gitnexus analyze ``` Run from the project root. This parses all source files, builds the knowledge graph, writes it to `.gitnexus/`, and generates CLAUDE.md / AGENTS.md context files. | Flag | Effect | | -------------- | ---------------------------------------------------------------- | | `--force` | Force full re-index even if up to date | | `--embeddings` | Enable embedding generation for semantic search (off by default) | **When to run:** First time in a project, after major code changes, or when `gitnexus://repo/{name}/context` reports the index is stale. In Claude Code, a PostToolUse hook runs `analyze` automatically after `git commit` and `git merge`, preserving embeddings if previously generated. ### status — Check index freshness ```bash npx gitnexus status ``` Shows whether the current repo has a GitNexus index, when it was last updated, and symbol/relationship counts. Use this to check if re-indexing is needed. ### clean — Delete the index ```bash npx gitnexus clean ``` Deletes the `.gitnexus/` directory and unregisters the repo from the global registry. Use before re-indexing if the index is corrupt or after removing GitNexus from a project. | Flag | Effect | | --------- | ------------------------------------------------- | | `--force` | Skip confirmation prompt | | `--all` | Clean all indexed repos, not just the current one | ### wiki — Generate documentation from the graph ```bash npx gitnexus wiki ``` Generates repository documentation from the knowledge graph using an LLM. Requires an API key (saved to `~/.gitnexus/config.json` on first use). | Flag | Effect | | ------------------- | ----------------------------------------- | | `--force` | Force full regeneration | | `--model ` | LLM model (default: minimax/minimax-m2.5) | | `--base-url ` | LLM API base URL | | `--api-key ` | LLM API key | | `--concurrency ` | Parallel LLM calls (default: 3) | | `--gist` | Publish wiki as a public GitHub Gist | ### list — Show all indexed repos ```bash npx gitnexus list ``` Lists all repositories registered in `~/.gitnexus/registry.json`. The MCP `list_repos` tool provides the same information. ## After Indexing 1. **Read `gitnexus://repo/{name}/context`** to verify the index loaded 2. Use the other GitNexus skills (`exploring`, `debugging`, `impact-analysis`, `refactoring`) for your task ## Troubleshooting - **"Not inside a git repository"**: Run from a directory inside a git repo - **Index is stale after re-analyzing**: Restart Claude Code to reload the MCP server - **Embeddings slow**: Omit `--embeddings` (it's off by default) or set `OPENAI_API_KEY` for faster API-based embedding ================================================ FILE: .claude/skills/gitnexus/gitnexus-debugging/SKILL.md ================================================ --- name: gitnexus-debugging description: "Use when the user is debugging a bug, tracing an error, or asking why something fails. Examples: \"Why is X failing?\", \"Where does this error come from?\", \"Trace this bug\"" --- # Debugging with GitNexus ## When to Use - "Why is this function failing?" - "Trace where this error comes from" - "Who calls this method?" - "This endpoint returns 500" - Investigating bugs, errors, or unexpected behavior ## Workflow ``` 1. gitnexus_query({query: ""}) → Find related execution flows 2. gitnexus_context({name: ""}) → See callers/callees/processes 3. READ gitnexus://repo/{name}/process/{name} → Trace execution flow 4. gitnexus_cypher({query: "MATCH path..."}) → Custom traces if needed ``` > If "Index is stale" → run `npx gitnexus analyze` in terminal. ## Checklist ``` - [ ] Understand the symptom (error message, unexpected behavior) - [ ] gitnexus_query for error text or related code - [ ] Identify the suspect function from returned processes - [ ] gitnexus_context to see callers and callees - [ ] Trace execution flow via process resource if applicable - [ ] gitnexus_cypher for custom call chain traces if needed - [ ] Read source files to confirm root cause ``` ## Debugging Patterns | Symptom | GitNexus Approach | | -------------------- | ---------------------------------------------------------- | | Error message | `gitnexus_query` for error text → `context` on throw sites | | Wrong return value | `context` on the function → trace callees for data flow | | Intermittent failure | `context` → look for external calls, async deps | | Performance issue | `context` → find symbols with many callers (hot paths) | | Recent regression | `detect_changes` to see what your changes affect | ## Tools **gitnexus_query** — find code related to error: ``` gitnexus_query({query: "payment validation error"}) → Processes: CheckoutFlow, ErrorHandling → Symbols: validatePayment, handlePaymentError, PaymentException ``` **gitnexus_context** — full context for a suspect: ``` gitnexus_context({name: "validatePayment"}) → Incoming calls: processCheckout, webhookHandler → Outgoing calls: verifyCard, fetchRates (external API!) → Processes: CheckoutFlow (step 3/7) ``` **gitnexus_cypher** — custom call chain traces: ```cypher MATCH path = (a)-[:CodeRelation {type: 'CALLS'}*1..2]->(b:Function {name: "validatePayment"}) RETURN [n IN nodes(path) | n.name] AS chain ``` ## Example: "Payment endpoint returns 500 intermittently" ``` 1. gitnexus_query({query: "payment error handling"}) → Processes: CheckoutFlow, ErrorHandling → Symbols: validatePayment, handlePaymentError 2. gitnexus_context({name: "validatePayment"}) → Outgoing calls: verifyCard, fetchRates (external API!) 3. READ gitnexus://repo/my-app/process/CheckoutFlow → Step 3: validatePayment → calls fetchRates (external) 4. Root cause: fetchRates calls external API without proper timeout ``` ================================================ FILE: .claude/skills/gitnexus/gitnexus-exploring/SKILL.md ================================================ --- name: gitnexus-exploring description: "Use when the user asks how code works, wants to understand architecture, trace execution flows, or explore unfamiliar parts of the codebase. Examples: \"How does X work?\", \"What calls this function?\", \"Show me the auth flow\"" --- # Exploring Codebases with GitNexus ## When to Use - "How does authentication work?" - "What's the project structure?" - "Show me the main components" - "Where is the database logic?" - Understanding code you haven't seen before ## Workflow ``` 1. READ gitnexus://repos → Discover indexed repos 2. READ gitnexus://repo/{name}/context → Codebase overview, check staleness 3. gitnexus_query({query: ""}) → Find related execution flows 4. gitnexus_context({name: ""}) → Deep dive on specific symbol 5. READ gitnexus://repo/{name}/process/{name} → Trace full execution flow ``` > If step 2 says "Index is stale" → run `npx gitnexus analyze` in terminal. ## Checklist ``` - [ ] READ gitnexus://repo/{name}/context - [ ] gitnexus_query for the concept you want to understand - [ ] Review returned processes (execution flows) - [ ] gitnexus_context on key symbols for callers/callees - [ ] READ process resource for full execution traces - [ ] Read source files for implementation details ``` ## Resources | Resource | What you get | | --------------------------------------- | ------------------------------------------------------- | | `gitnexus://repo/{name}/context` | Stats, staleness warning (~150 tokens) | | `gitnexus://repo/{name}/clusters` | All functional areas with cohesion scores (~300 tokens) | | `gitnexus://repo/{name}/cluster/{name}` | Area members with file paths (~500 tokens) | | `gitnexus://repo/{name}/process/{name}` | Step-by-step execution trace (~200 tokens) | ## Tools **gitnexus_query** — find execution flows related to a concept: ``` gitnexus_query({query: "payment processing"}) → Processes: CheckoutFlow, RefundFlow, WebhookHandler → Symbols grouped by flow with file locations ``` **gitnexus_context** — 360-degree view of a symbol: ``` gitnexus_context({name: "validateUser"}) → Incoming calls: loginHandler, apiMiddleware → Outgoing calls: checkToken, getUserById → Processes: LoginFlow (step 2/5), TokenRefresh (step 1/3) ``` ## Example: "How does payment processing work?" ``` 1. READ gitnexus://repo/my-app/context → 918 symbols, 45 processes 2. gitnexus_query({query: "payment processing"}) → CheckoutFlow: processPayment → validateCard → chargeStripe → RefundFlow: initiateRefund → calculateRefund → processRefund 3. gitnexus_context({name: "processPayment"}) → Incoming: checkoutHandler, webhookHandler → Outgoing: validateCard, chargeStripe, saveTransaction 4. Read src/payments/processor.ts for implementation details ``` ================================================ FILE: .claude/skills/gitnexus/gitnexus-guide/SKILL.md ================================================ --- name: gitnexus-guide description: "Use when the user asks about GitNexus itself — available tools, how to query the knowledge graph, MCP resources, graph schema, or workflow reference. Examples: \"What GitNexus tools are available?\", \"How do I use GitNexus?\"" --- # GitNexus Guide Quick reference for all GitNexus MCP tools, resources, and the knowledge graph schema. ## Always Start Here For any task involving code understanding, debugging, impact analysis, or refactoring: 1. **Read `gitnexus://repo/{name}/context`** — codebase overview + check index freshness 2. **Match your task to a skill below** and **read that skill file** 3. **Follow the skill's workflow and checklist** > If step 1 warns the index is stale, run `npx gitnexus analyze` in the terminal first. ## Skills | Task | Skill to read | | -------------------------------------------- | ------------------- | | Understand architecture / "How does X work?" | `gitnexus-exploring` | | Blast radius / "What breaks if I change X?" | `gitnexus-impact-analysis` | | Trace bugs / "Why is X failing?" | `gitnexus-debugging` | | Rename / extract / split / refactor | `gitnexus-refactoring` | | Tools, resources, schema reference | `gitnexus-guide` (this file) | | Index, status, clean, wiki CLI commands | `gitnexus-cli` | ## Tools Reference | Tool | What it gives you | | ---------------- | ------------------------------------------------------------------------ | | `query` | Process-grouped code intelligence — execution flows related to a concept | | `context` | 360-degree symbol view — categorized refs, processes it participates in | | `impact` | Symbol blast radius — what breaks at depth 1/2/3 with confidence | | `detect_changes` | Git-diff impact — what do your current changes affect | | `rename` | Multi-file coordinated rename with confidence-tagged edits | | `cypher` | Raw graph queries (read `gitnexus://repo/{name}/schema` first) | | `list_repos` | Discover indexed repos | ## Resources Reference Lightweight reads (~100-500 tokens) for navigation: | Resource | Content | | ---------------------------------------------- | ----------------------------------------- | | `gitnexus://repo/{name}/context` | Stats, staleness check | | `gitnexus://repo/{name}/clusters` | All functional areas with cohesion scores | | `gitnexus://repo/{name}/cluster/{clusterName}` | Area members | | `gitnexus://repo/{name}/processes` | All execution flows | | `gitnexus://repo/{name}/process/{processName}` | Step-by-step trace | | `gitnexus://repo/{name}/schema` | Graph schema for Cypher | ## Graph Schema **Nodes:** File, Function, Class, Interface, Method, Community, Process **Edges (via CodeRelation.type):** CALLS, IMPORTS, EXTENDS, IMPLEMENTS, DEFINES, MEMBER_OF, STEP_IN_PROCESS ```cypher MATCH (caller)-[:CodeRelation {type: 'CALLS'}]->(f:Function {name: "myFunc"}) RETURN caller.name, caller.filePath ``` ================================================ FILE: .claude/skills/gitnexus/gitnexus-impact-analysis/SKILL.md ================================================ --- name: gitnexus-impact-analysis description: "Use when the user wants to know what will break if they change something, or needs safety analysis before editing code. Examples: \"Is it safe to change X?\", \"What depends on this?\", \"What will break?\"" --- # Impact Analysis with GitNexus ## When to Use - "Is it safe to change this function?" - "What will break if I modify X?" - "Show me the blast radius" - "Who uses this code?" - Before making non-trivial code changes - Before committing — to understand what your changes affect ## Workflow ``` 1. gitnexus_impact({target: "X", direction: "upstream"}) → What depends on this 2. READ gitnexus://repo/{name}/processes → Check affected execution flows 3. gitnexus_detect_changes() → Map current git changes to affected flows 4. Assess risk and report to user ``` > If "Index is stale" → run `npx gitnexus analyze` in terminal. ## Checklist ``` - [ ] gitnexus_impact({target, direction: "upstream"}) to find dependents - [ ] Review d=1 items first (these WILL BREAK) - [ ] Check high-confidence (>0.8) dependencies - [ ] READ processes to check affected execution flows - [ ] gitnexus_detect_changes() for pre-commit check - [ ] Assess risk level and report to user ``` ## Understanding Output | Depth | Risk Level | Meaning | | ----- | ---------------- | ------------------------ | | d=1 | **WILL BREAK** | Direct callers/importers | | d=2 | LIKELY AFFECTED | Indirect dependencies | | d=3 | MAY NEED TESTING | Transitive effects | ## Risk Assessment | Affected | Risk | | ------------------------------ | -------- | | <5 symbols, few processes | LOW | | 5-15 symbols, 2-5 processes | MEDIUM | | >15 symbols or many processes | HIGH | | Critical path (auth, payments) | CRITICAL | ## Tools **gitnexus_impact** — the primary tool for symbol blast radius: ``` gitnexus_impact({ target: "validateUser", direction: "upstream", minConfidence: 0.8, maxDepth: 3 }) → d=1 (WILL BREAK): - loginHandler (src/auth/login.ts:42) [CALLS, 100%] - apiMiddleware (src/api/middleware.ts:15) [CALLS, 100%] → d=2 (LIKELY AFFECTED): - authRouter (src/routes/auth.ts:22) [CALLS, 95%] ``` **gitnexus_detect_changes** — git-diff based impact analysis: ``` gitnexus_detect_changes({scope: "staged"}) → Changed: 5 symbols in 3 files → Affected: LoginFlow, TokenRefresh, APIMiddlewarePipeline → Risk: MEDIUM ``` ## Example: "What breaks if I change validateUser?" ``` 1. gitnexus_impact({target: "validateUser", direction: "upstream"}) → d=1: loginHandler, apiMiddleware (WILL BREAK) → d=2: authRouter, sessionManager (LIKELY AFFECTED) 2. READ gitnexus://repo/my-app/processes → LoginFlow and TokenRefresh touch validateUser 3. Risk: 2 direct callers, 2 processes = MEDIUM ``` ================================================ FILE: .claude/skills/gitnexus/gitnexus-pr-review/SKILL.md ================================================ --- name: gitnexus-pr-review description: "Use when the user wants to review a pull request, understand what a PR changes, assess risk of merging, or check for missing test coverage. Examples: \"Review this PR\", \"What does PR #42 change?\", \"Is this PR safe to merge?\"" --- # PR Review with GitNexus ## When to Use - "Review this PR" - "What does PR #42 change?" - "Is this safe to merge?" - "What's the blast radius of this PR?" - "Are there missing tests for this PR?" - Reviewing someone else's code changes before merge ## Workflow ``` 1. gh pr diff → Get the raw diff 2. gitnexus_detect_changes({scope: "compare", base_ref: "main"}) → Map diff to affected flows 3. For each changed symbol: gitnexus_impact({target: "", direction: "upstream"}) → Blast radius per change 4. gitnexus_context({name: ""}) → Understand callers/callees 5. READ gitnexus://repo/{name}/processes → Check affected execution flows 6. Summarize findings with risk assessment ``` > If "Index is stale" → run `npx gitnexus analyze` in terminal before reviewing. ## Checklist ``` - [ ] Fetch PR diff (gh pr diff or git diff base...head) - [ ] gitnexus_detect_changes to map changes to affected execution flows - [ ] gitnexus_impact on each non-trivial changed symbol - [ ] Review d=1 items (WILL BREAK) — are callers updated? - [ ] gitnexus_context on key changed symbols to understand full picture - [ ] Check if affected processes have test coverage - [ ] Assess overall risk level - [ ] Write review summary with findings ``` ## Review Dimensions | Dimension | How GitNexus Helps | | --- | --- | | **Correctness** | `context` shows callers — are they all compatible with the change? | | **Blast radius** | `impact` shows d=1/d=2/d=3 dependents — anything missed? | | **Completeness** | `detect_changes` shows all affected flows — are they all handled? | | **Test coverage** | `impact({includeTests: true})` shows which tests touch changed code | | **Breaking changes** | d=1 upstream items that aren't updated in the PR = potential breakage | ## Risk Assessment | Signal | Risk | | --- | --- | | Changes touch <3 symbols, 0-1 processes | LOW | | Changes touch 3-10 symbols, 2-5 processes | MEDIUM | | Changes touch >10 symbols or many processes | HIGH | | Changes touch auth, payments, or data integrity code | CRITICAL | | d=1 callers exist outside the PR diff | Potential breakage — flag it | ## Tools **gitnexus_detect_changes** — map PR diff to affected execution flows: ``` gitnexus_detect_changes({scope: "compare", base_ref: "main"}) → Changed: 8 symbols in 4 files → Affected processes: CheckoutFlow, RefundFlow, WebhookHandler → Risk: MEDIUM ``` **gitnexus_impact** — blast radius per changed symbol: ``` gitnexus_impact({target: "validatePayment", direction: "upstream"}) → d=1 (WILL BREAK): - processCheckout (src/checkout.ts:42) [CALLS, 100%] - webhookHandler (src/webhooks.ts:15) [CALLS, 100%] → d=2 (LIKELY AFFECTED): - checkoutRouter (src/routes/checkout.ts:22) [CALLS, 95%] ``` **gitnexus_impact with tests** — check test coverage: ``` gitnexus_impact({target: "validatePayment", direction: "upstream", includeTests: true}) → Tests that cover this symbol: - validatePayment.test.ts [direct] - checkout.integration.test.ts [via processCheckout] ``` **gitnexus_context** — understand a changed symbol's role: ``` gitnexus_context({name: "validatePayment"}) → Incoming calls: processCheckout, webhookHandler → Outgoing calls: verifyCard, fetchRates → Processes: CheckoutFlow (step 3/7), RefundFlow (step 1/5) ``` ## Example: "Review PR #42" ``` 1. gh pr diff 42 > /tmp/pr42.diff → 4 files changed: payments.ts, checkout.ts, types.ts, utils.ts 2. gitnexus_detect_changes({scope: "compare", base_ref: "main"}) → Changed symbols: validatePayment, PaymentInput, formatAmount → Affected processes: CheckoutFlow, RefundFlow → Risk: MEDIUM 3. gitnexus_impact({target: "validatePayment", direction: "upstream"}) → d=1: processCheckout, webhookHandler (WILL BREAK) → webhookHandler is NOT in the PR diff — potential breakage! 4. gitnexus_impact({target: "PaymentInput", direction: "upstream"}) → d=1: validatePayment (in PR), createPayment (NOT in PR) → createPayment uses the old PaymentInput shape — breaking change! 5. gitnexus_context({name: "formatAmount"}) → Called by 12 functions — but change is backwards-compatible (added optional param) 6. Review summary: - MEDIUM risk — 3 changed symbols affect 2 execution flows - BUG: webhookHandler calls validatePayment but isn't updated for new signature - BUG: createPayment depends on PaymentInput type which changed - OK: formatAmount change is backwards-compatible - Tests: checkout.test.ts covers processCheckout path, but no webhook test ``` ## Review Output Format Structure your review as: ```markdown ## PR Review: **Risk: LOW / MEDIUM / HIGH / CRITICAL** ### Changes Summary - <N> symbols changed across <M> files - <P> execution flows affected ### Findings 1. **[severity]** Description of finding - Evidence from GitNexus tools - Affected callers/flows ### Missing Coverage - Callers not updated in PR: ... - Untested flows: ... ### Recommendation APPROVE / REQUEST CHANGES / NEEDS DISCUSSION ``` ================================================ FILE: .claude/skills/gitnexus/gitnexus-refactoring/SKILL.md ================================================ --- name: gitnexus-refactoring description: "Use when the user wants to rename, extract, split, move, or restructure code safely. Examples: \"Rename this function\", \"Extract this into a module\", \"Refactor this class\", \"Move this to a separate file\"" --- # Refactoring with GitNexus ## When to Use - "Rename this function safely" - "Extract this into a module" - "Split this service" - "Move this to a new file" - Any task involving renaming, extracting, splitting, or restructuring code ## Workflow ``` 1. gitnexus_impact({target: "X", direction: "upstream"}) → Map all dependents 2. gitnexus_query({query: "X"}) → Find execution flows involving X 3. gitnexus_context({name: "X"}) → See all incoming/outgoing refs 4. Plan update order: interfaces → implementations → callers → tests ``` > If "Index is stale" → run `npx gitnexus analyze` in terminal. ## Checklists ### Rename Symbol ``` - [ ] gitnexus_rename({symbol_name: "oldName", new_name: "newName", dry_run: true}) — preview all edits - [ ] Review graph edits (high confidence) and ast_search edits (review carefully) - [ ] If satisfied: gitnexus_rename({..., dry_run: false}) — apply edits - [ ] gitnexus_detect_changes() — verify only expected files changed - [ ] Run tests for affected processes ``` ### Extract Module ``` - [ ] gitnexus_context({name: target}) — see all incoming/outgoing refs - [ ] gitnexus_impact({target, direction: "upstream"}) — find all external callers - [ ] Define new module interface - [ ] Extract code, update imports - [ ] gitnexus_detect_changes() — verify affected scope - [ ] Run tests for affected processes ``` ### Split Function/Service ``` - [ ] gitnexus_context({name: target}) — understand all callees - [ ] Group callees by responsibility - [ ] gitnexus_impact({target, direction: "upstream"}) — map callers to update - [ ] Create new functions/services - [ ] Update callers - [ ] gitnexus_detect_changes() — verify affected scope - [ ] Run tests for affected processes ``` ## Tools **gitnexus_rename** — automated multi-file rename: ``` gitnexus_rename({symbol_name: "validateUser", new_name: "authenticateUser", dry_run: true}) → 12 edits across 8 files → 10 graph edits (high confidence), 2 ast_search edits (review) → Changes: [{file_path, edits: [{line, old_text, new_text, confidence}]}] ``` **gitnexus_impact** — map all dependents first: ``` gitnexus_impact({target: "validateUser", direction: "upstream"}) → d=1: loginHandler, apiMiddleware, testUtils → Affected Processes: LoginFlow, TokenRefresh ``` **gitnexus_detect_changes** — verify your changes after refactoring: ``` gitnexus_detect_changes({scope: "all"}) → Changed: 8 files, 12 symbols → Affected processes: LoginFlow, TokenRefresh → Risk: MEDIUM ``` **gitnexus_cypher** — custom reference queries: ```cypher MATCH (caller)-[:CodeRelation {type: 'CALLS'}]->(f:Function {name: "validateUser"}) RETURN caller.name, caller.filePath ORDER BY caller.filePath ``` ## Risk Rules | Risk Factor | Mitigation | | ------------------- | ----------------------------------------- | | Many callers (>5) | Use gitnexus_rename for automated updates | | Cross-area refs | Use detect_changes after to verify scope | | String/dynamic refs | gitnexus_query to find them | | External/public API | Version and deprecate properly | ## Example: Rename `validateUser` to `authenticateUser` ``` 1. gitnexus_rename({symbol_name: "validateUser", new_name: "authenticateUser", dry_run: true}) → 12 edits: 10 graph (safe), 2 ast_search (review) → Files: validator.ts, login.ts, middleware.ts, config.json... 2. Review ast_search edits (config.json: dynamic reference!) 3. gitnexus_rename({symbol_name: "validateUser", new_name: "authenticateUser", dry_run: false}) → Applied 12 edits across 8 files 4. gitnexus_detect_changes({scope: "all"}) → Affected: LoginFlow, TokenRefresh → Risk: MEDIUM — run tests for these flows ``` ================================================ FILE: .claude-plugin/marketplace.json ================================================ { "name": "gitnexus-marketplace", "owner": { "name": "GitNexus", "email": "nico@gitnexus.dev" }, "metadata": { "description": "Code intelligence powered by a knowledge graph — execution flows, blast radius, and semantic search", "homepage": "https://github.com/nicosxt/gitnexus" }, "plugins": [ { "name": "gitnexus", "version": "1.3.3", "source": "./gitnexus-claude-plugin", "description": "Code intelligence powered by a knowledge graph. Provides execution flow tracing, blast radius analysis, and augmented search across your codebase." } ] } ================================================ FILE: .cursorrules ================================================ # AI Agent Rules Follow .gitnexus/RULES.md for all project context and coding guidelines. This project uses GitNexus MCP for code intelligence. See .gitnexus/RULES.md for available tools and best practices. ================================================ FILE: .github/FUNDING.yml ================================================ # These are supported funding model platforms github: abhigyanpatwari ================================================ FILE: .github/actions/setup-gitnexus/action.yml ================================================ name: Setup GitNexus description: Setup Node.js 20, install dependencies, and optionally build inputs: build: description: Whether to run npm run build after install required: false default: 'false' runs: using: composite steps: - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4 with: node-version: 20 cache: npm cache-dependency-path: gitnexus/package-lock.json - name: Install dependencies run: npm ci shell: bash working-directory: gitnexus - name: Build if: ${{ inputs.build == 'true' }} run: npm run build shell: bash working-directory: gitnexus ================================================ FILE: .github/release.yml ================================================ changelog: exclude: labels: - chore authors: - dependabot - dependabot[bot] categories: - title: "\U0001F6A8 Security" labels: - security - title: "\U0001F4A5 Breaking Changes" labels: - breaking - title: "\U0001F680 Features" labels: - enhancement - title: "\U0001F41B Bug Fixes" labels: - bug - title: "\U0001F3CE\uFE0F Performance" labels: - performance - title: "\U0001F9EA Tests" labels: - test - title: "\U0001F504 Refactoring" labels: - refactor - title: "\U0001F477 CI/CD" labels: - ci - title: "\U0001F4E6 Dependencies" labels: - dependencies - title: "\U0001F4DD Other Changes" labels: - "*" exclude: labels: - dependencies - ci - test - refactor - chore ================================================ FILE: .github/workflows/ci-quality.yml ================================================ name: Quality Checks on: workflow_call: jobs: typecheck: runs-on: ubuntu-latest timeout-minutes: 10 steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - uses: ./.github/actions/setup-gitnexus - run: npx tsc --noEmit working-directory: gitnexus ================================================ FILE: .github/workflows/ci-report.yml ================================================ name: CI Report on: workflow_run: workflows: ['CI'] types: [completed] permissions: actions: read contents: read pull-requests: write jobs: pr-report: name: PR Report if: >- github.event.workflow_run.event == 'pull_request' && github.event.workflow_run.conclusion != 'cancelled' runs-on: ubuntu-latest timeout-minutes: 5 steps: - name: Download PR metadata uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7 with: script: | const fs = require('fs'); const path = require('path'); const artifacts = await github.rest.actions.listWorkflowRunArtifacts({ owner: context.repo.owner, repo: context.repo.repo, run_id: ${{ github.event.workflow_run.id }}, }); const meta = artifacts.data.artifacts.find(a => a.name === 'pr-meta'); if (!meta) { core.setFailed('pr-meta artifact not found — skipping report'); return; } const zip = await github.rest.actions.downloadArtifact({ owner: context.repo.owner, repo: context.repo.repo, artifact_id: meta.id, archive_format: 'zip', }); const dest = path.join(process.env.RUNNER_TEMP, 'pr-meta'); fs.mkdirSync(dest, { recursive: true }); fs.writeFileSync(path.join(dest, 'pr-meta.zip'), Buffer.from(zip.data)); - name: Extract PR metadata id: meta shell: bash run: | cd "$RUNNER_TEMP/pr-meta" unzip -o pr-meta.zip PR_NUMBER=$(cat pr-number | tr -d '[:space:]') if ! [[ "$PR_NUMBER" =~ ^[0-9]+$ ]]; then echo "::error::Invalid PR number: '$PR_NUMBER'" exit 1 fi echo "pr-number=$PR_NUMBER" >> "$GITHUB_OUTPUT" echo "quality=$(cat quality-result | tr -d '[:space:]')" >> "$GITHUB_OUTPUT" echo "tests=$(cat tests-result | tr -d '[:space:]')" >> "$GITHUB_OUTPUT" - name: Download test reports id: download-test-reports uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7 with: script: | const fs = require('fs'); const path = require('path'); const artifacts = await github.rest.actions.listWorkflowRunArtifacts({ owner: context.repo.owner, repo: context.repo.repo, run_id: ${{ github.event.workflow_run.id }}, }); const reports = artifacts.data.artifacts.find(a => a.name === 'test-reports'); if (!reports) { core.warning('test-reports artifact not found'); return; } const zip = await github.rest.actions.downloadArtifact({ owner: context.repo.owner, repo: context.repo.repo, artifact_id: reports.id, archive_format: 'zip', }); const dest = path.join(process.env.RUNNER_TEMP, 'test-reports'); fs.mkdirSync(dest, { recursive: true }); fs.writeFileSync(path.join(dest, 'test-reports.zip'), Buffer.from(zip.data)); - name: Extract test reports if: steps.download-test-reports.outcome == 'success' shell: bash run: | cd "$RUNNER_TEMP/test-reports" unzip -o test-reports.zip || true - name: Fetch cross-platform job results id: jobs uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7 with: script: | const jobs = await github.rest.actions.listJobsForWorkflowRun({ owner: context.repo.owner, repo: context.repo.repo, run_id: ${{ github.event.workflow_run.id }}, per_page: 50, }); const results = {}; for (const job of jobs.data.jobs) { if (job.name.includes('ubuntu')) results.ubuntu = job.conclusion || 'pending'; else if (job.name.includes('windows')) results.windows = job.conclusion || 'pending'; else if (job.name.includes('macos')) results.macos = job.conclusion || 'pending'; } core.setOutput('ubuntu', results.ubuntu || 'unknown'); core.setOutput('windows', results.windows || 'unknown'); core.setOutput('macos', results.macos || 'unknown'); - name: Fetch base branch coverage id: base-coverage uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7 with: script: | const fs = require('fs'); const path = require('path'); const runs = await github.rest.actions.listWorkflowRuns({ owner: context.repo.owner, repo: context.repo.repo, workflow_id: 'ci.yml', branch: 'main', status: 'success', per_page: 1, }); if (runs.data.workflow_runs.length === 0) { core.setOutput('found', 'false'); return; } const mainRunId = runs.data.workflow_runs[0].id; const artifacts = await github.rest.actions.listWorkflowRunArtifacts({ owner: context.repo.owner, repo: context.repo.repo, run_id: mainRunId, }); const testReports = artifacts.data.artifacts.find(a => a.name === 'test-reports'); if (!testReports) { core.setOutput('found', 'false'); return; } const zip = await github.rest.actions.downloadArtifact({ owner: context.repo.owner, repo: context.repo.repo, artifact_id: testReports.id, archive_format: 'zip', }); const dest = path.join(process.env.RUNNER_TEMP, 'base-coverage'); fs.mkdirSync(dest, { recursive: true }); fs.writeFileSync(path.join(dest, 'base.zip'), Buffer.from(zip.data)); core.setOutput('found', 'true'); core.setOutput('dir', dest); - name: Extract base coverage if: steps.base-coverage.outputs.found == 'true' shell: bash run: | cd "${{ steps.base-coverage.outputs.dir }}" unzip -o base.zip -d base - name: Build and post report uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7 env: PR_NUMBER: ${{ steps.meta.outputs.pr-number }} QUALITY: ${{ steps.meta.outputs.quality }} TESTS: ${{ steps.meta.outputs.tests }} UBUNTU: ${{ steps.jobs.outputs.ubuntu }} WINDOWS: ${{ steps.jobs.outputs.windows }} MACOS: ${{ steps.jobs.outputs.macos }} BASE_FOUND: ${{ steps.base-coverage.outputs.found }} BASE_DIR: ${{ steps.base-coverage.outputs.dir }} RUN_ID: ${{ github.event.workflow_run.id }} HEAD_SHA: ${{ github.event.workflow_run.head_sha }} with: script: | const fs = require('fs'); const path = require('path'); const icon = (s) => ({ success: '✅', failure: '❌', cancelled: '⏭️' }[s] || '❓'); const temp = process.env.RUNNER_TEMP; // ── Read coverage ── function readCov(dir) { const out = { stmts: 'N/A', branch: 'N/A', funcs: 'N/A', lines: 'N/A', stmtsCov: '', branchCov: '', funcsCov: '', linesCov: '' }; try { const files = require('child_process') .execSync(`find "${dir}" -name coverage-summary.json -type f`, { encoding: 'utf8' }) .trim().split('\n').filter(Boolean); if (!files.length) return out; const d = JSON.parse(fs.readFileSync(files[0], 'utf8')).total; out.stmts = d.statements.pct; out.branch = d.branches.pct; out.funcs = d.functions.pct; out.lines = d.lines.pct; out.stmtsCov = `${d.statements.covered}/${d.statements.total}`; out.branchCov = `${d.branches.covered}/${d.branches.total}`; out.funcsCov = `${d.functions.covered}/${d.functions.total}`; out.linesCov = `${d.lines.covered}/${d.lines.total}`; } catch {} return out; } const cov = readCov(path.join(temp, 'test-reports')); const base = process.env.BASE_FOUND === 'true' ? readCov(path.join(process.env.BASE_DIR, 'base')) : { stmts: 'N/A', branch: 'N/A', funcs: 'N/A', lines: 'N/A' }; // ── Read test results ── let total = 0, passed = 0, failed = 0, skipped = 0, suites = 0, duration = '0s'; let skippedTests = []; try { const files = require('child_process') .execSync(`find "${path.join(temp, 'test-reports')}" -name test-results.json -type f`, { encoding: 'utf8' }) .trim().split('\n').filter(Boolean); if (files.length) { const r = JSON.parse(fs.readFileSync(files[0], 'utf8')); total = r.numTotalTests || 0; passed = r.numPassedTests || 0; failed = r.numFailedTests || 0; skipped = r.numPendingTests || 0; suites = r.numTotalTestSuites || 0; const durS = Math.floor((Math.max(...r.testResults.map(t => t.endTime)) - r.startTime) / 1000); duration = durS >= 60 ? `${Math.floor(durS / 60)}m ${durS % 60}s` : `${durS}s`; // Collect skipped test names for (const suite of r.testResults) { for (const t of (suite.assertionResults || [])) { if (t.status === 'pending' || t.status === 'skipped') { skippedTests.push(`- ${t.ancestorTitles.join(' > ')} > ${t.title}`); } } } } } catch {} // ── Coverage delta ── function delta(pct, basePct) { if (pct === 'N/A' || basePct === 'N/A') return '—'; const d = (pct - basePct).toFixed(1); const dNum = parseFloat(d); if (dNum > 0) return `📈 +${d}%`; if (dNum < 0) return `📉 ${d}%`; return '='; } // ── Build markdown ── const { PR_NUMBER, QUALITY, TESTS, UBUNTU, WINDOWS, MACOS, RUN_ID, HEAD_SHA } = process.env; const prNumber = parseInt(PR_NUMBER, 10); const overall = (QUALITY === 'success' && TESTS === 'success') ? '✅ **All checks passed**' : '❌ **Some checks failed**'; const sha = HEAD_SHA.slice(0, 7); let body = `## CI Report\n\n${overall}   \`${sha}\`\n\n`; body += `### Pipeline\n\n`; body += `| Stage | Status | Ubuntu | Windows | macOS |\n`; body += `|-------|--------|--------|---------|-------|\n`; body += `| Typecheck | ${icon(QUALITY)} \`${QUALITY}\` | — | — | — |\n`; body += `| Tests | ${icon(TESTS)} \`${TESTS}\` | ${icon(UBUNTU)} | ${icon(WINDOWS)} | ${icon(MACOS)} |\n\n`; if (total > 0) { body += `### Tests\n\n`; body += `| Metric | Value |\n|--------|-------|\n`; body += `| Total | **${total}** |\n`; body += `| Passed | **${passed}** |\n`; if (failed > 0) body += `| Failed | **${failed}** |\n`; if (skipped > 0) body += `| Skipped | ${skipped} |\n`; body += `| Files | ${suites} |\n`; body += `| Duration | ${duration} |\n\n`; if (failed === 0) { body += `✅ All **${passed}** tests passed across **${suites}** files\n`; } else { body += `❌ **${failed}** failed / **${passed}** passed\n`; } if (skippedTests.length > 0) { body += `\n<details>\n<summary>${skipped} test(s) skipped</summary>\n\n`; body += skippedTests.join('\n') + '\n\n</details>\n'; } body += '\n'; } if (cov.stmts !== 'N/A') { body += `### Coverage\n\n`; body += `| Metric | Coverage | Covered | Base (main) | Delta |\n`; body += `|--------|----------|---------|-------------|-------|\n`; body += `| Statements | **${cov.stmts}%** | ${cov.stmtsCov} | ${base.stmts}% | ${delta(cov.stmts, base.stmts)} |\n`; body += `| Branches | **${cov.branch}%** | ${cov.branchCov} | ${base.branch}% | ${delta(cov.branch, base.branch)} |\n`; body += `| Functions | **${cov.funcs}%** | ${cov.funcsCov} | ${base.funcs}% | ${delta(cov.funcs, base.funcs)} |\n`; body += `| Lines | **${cov.lines}%** | ${cov.linesCov} | ${base.lines}% | ${delta(cov.lines, base.lines)} |\n\n`; } else { const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${RUN_ID}`; body += `### Coverage\n\n⚠️ Coverage data unavailable — check the [test job](${runUrl}) for details.\n\n`; } const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${RUN_ID}`; body += `---\n<sub>📋 [Full run](${runUrl}) · Coverage from Ubuntu · Generated by CI</sub>`; // ── Post sticky comment ── const { data: comments } = await github.rest.issues.listComments({ owner: context.repo.owner, repo: context.repo.repo, issue_number: prNumber, per_page: 100, direction: 'desc', }); const marker = '<!-- ci-report -->'; const existing = comments.find(c => c.body?.includes(marker)); const fullBody = marker + '\n' + body; if (existing) { await github.rest.issues.updateComment({ owner: context.repo.owner, repo: context.repo.repo, comment_id: existing.id, body: fullBody, }); } else { await github.rest.issues.createComment({ owner: context.repo.owner, repo: context.repo.repo, issue_number: prNumber, body: fullBody, }); } ================================================ FILE: .github/workflows/ci-tests.yml ================================================ name: Tests on: workflow_call: jobs: tests: name: ubuntu / coverage runs-on: ubuntu-latest timeout-minutes: 25 steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - uses: ./.github/actions/setup-gitnexus with: build: 'true' - name: Run all tests with coverage run: >- npx vitest run --reporter=default --reporter=json --outputFile=test-results.json --coverage --coverage.reporter=json-summary --coverage.reporter=json --coverage.reporter=text --coverage.thresholdAutoUpdate=false --coverage.reportOnFailure=true working-directory: gitnexus - name: Upload test reports if: always() uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: test-reports path: | gitnexus/coverage/coverage-summary.json gitnexus/coverage/coverage-final.json gitnexus/test-results.json retention-days: 5 cross-platform: name: ${{ matrix.os }} strategy: fail-fast: false matrix: # Ubuntu already covered by the coverage job above os: [windows-latest, macos-latest] runs-on: ${{ matrix.os }} timeout-minutes: 25 steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - uses: ./.github/actions/setup-gitnexus with: build: 'true' - run: npx vitest run working-directory: gitnexus ================================================ FILE: .github/workflows/ci.yml ================================================ name: CI on: push: branches: [main] paths-ignore: ['**.md', 'docs/**', 'LICENSE'] pull_request: branches: [main] paths-ignore: ['**.md', 'docs/**', 'LICENSE'] workflow_call: concurrency: group: ci-${{ github.ref }} cancel-in-progress: true # ── Reusable workflow orchestration ───────────────────────────────── # Each concern lives in its own workflow file for maintainability: # ci-quality.yml — typecheck (tsc --noEmit) # ci-tests.yml — all tests with coverage (ubuntu) + cross-platform # ci-report.yml — PR comment (workflow_run trigger for fork write access) jobs: quality: uses: ./.github/workflows/ci-quality.yml permissions: contents: read tests: uses: ./.github/workflows/ci-tests.yml permissions: contents: read # ── Unified CI gate ────────────────────────────────────────────── # Single required check for branch protection. ci-status: name: CI Gate needs: [quality, tests] if: always() runs-on: ubuntu-latest timeout-minutes: 5 steps: - name: Check all jobs passed shell: bash env: QUALITY: ${{ needs.quality.result }} TESTS: ${{ needs.tests.result }} run: | echo "Quality: $QUALITY" echo "Tests: $TESTS" if [[ "$QUALITY" != "success" ]] || [[ "$TESTS" != "success" ]]; then echo "::error::One or more CI jobs failed" exit 1 fi # ── PR metadata for ci-report.yml ──────────────────────────────── # Saves PR number and job results so the workflow_run-triggered # report can post comments with a write token (works for forks). save-pr-meta: name: Save PR Metadata if: always() && github.event_name == 'pull_request' needs: [quality, tests] runs-on: ubuntu-latest timeout-minutes: 5 steps: - name: Write PR metadata shell: bash env: PR_NUMBER: ${{ github.event.pull_request.number }} QUALITY: ${{ needs.quality.result }} TESTS: ${{ needs.tests.result }} run: | mkdir -p pr-meta echo "$PR_NUMBER" > pr-meta/pr-number echo "$QUALITY" > pr-meta/quality-result echo "$TESTS" > pr-meta/tests-result - name: Upload PR metadata uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: pr-meta path: pr-meta/ retention-days: 1 ================================================ FILE: .github/workflows/claude-code-review.yml ================================================ name: Claude Code Review # Uses pull_request_target so the workflow runs as defined on the default branch, # which allows access to secrets for posting review comments on fork PRs. # SECURITY: The checkout pins the fork's HEAD SHA (not the branch name) to # prevent TOCTOU races (force-push between trigger and checkout). The # claude-code-action sandboxes execution — it does NOT run arbitrary code # from the checked-out source. on: # Trigger only when explicitly requested: # - Add the "claude-review" label to a PR, OR # - Comment "@claude" or "/review" on a PR pull_request_target: types: [labeled] issue_comment: types: [created] # Serialize per-PR to avoid racing review comments. concurrency: group: claude-review-${{ github.event.issue.number || github.event.pull_request.number }} cancel-in-progress: false jobs: claude-review: # Run only when: # 1. The "claude-review" label is added to a non-draft PR by a trusted contributor, OR # 2. A trusted contributor comments "@claude" or "/review" on a PR if: | ( github.event_name == 'pull_request_target' && github.event.label.name == 'claude-review' && github.event.pull_request.draft == false && (github.event.pull_request.author_association == 'OWNER' || github.event.pull_request.author_association == 'MEMBER' || github.event.pull_request.author_association == 'COLLABORATOR') ) || ( github.event_name == 'issue_comment' && github.event.issue.pull_request && (contains(github.event.comment.body, '@claude') || contains(github.event.comment.body, '/review')) && (github.event.comment.author_association == 'OWNER' || github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'COLLABORATOR') ) runs-on: ubuntu-latest timeout-minutes: 30 permissions: contents: read pull-requests: write issues: read id-token: write steps: # For issue_comment triggers, resolve the PR number, head SHA, and fork repo - name: Resolve PR context id: pr uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7 with: script: | let pr; if (context.eventName === 'issue_comment') { const resp = await github.rest.pulls.get({ owner: context.repo.owner, repo: context.repo.repo, pull_number: context.payload.issue.number, }); pr = resp.data; } else { pr = context.payload.pull_request; } core.setOutput('number', pr.number); core.setOutput('sha', pr.head.sha); core.setOutput('repo', pr.head.repo.full_name); core.setOutput('branch', pr.head.ref); - name: Checkout PR head uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 with: repository: ${{ steps.pr.outputs.repo }} ref: ${{ steps.pr.outputs.sha }} fetch-depth: 1 - name: Run Claude Code Review id: claude-review uses: anthropics/claude-code-action@9469d113c6afd29550c402740f22d1a97dd1209b # v1 with: claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} github_token: ${{ secrets.GITHUB_TOKEN }} allowed_non_write_users: '*' show_full_output: true plugin_marketplaces: 'https://github.com/anthropics/claude-code.git' plugins: 'code-review@claude-code-plugins' prompt: '/code-review:code-review ${{ github.repository }}/pull/${{ steps.pr.outputs.number }}' ================================================ FILE: .github/workflows/claude.yml ================================================ name: Claude Code on: issue_comment: types: [created] pull_request_review_comment: types: [created] issues: types: [opened, assigned] pull_request_review: types: [submitted] # Serialize per-PR/issue to avoid racing comments. concurrency: group: claude-code-${{ github.event.issue.number || github.event.pull_request.number || github.event.issue.id }} cancel-in-progress: false jobs: claude: if: | ( github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude') && (github.event.comment.author_association == 'OWNER' || github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'COLLABORATOR') ) || ( github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude') && (github.event.comment.author_association == 'OWNER' || github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'COLLABORATOR') ) || ( github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude') && (github.event.review.author_association == 'OWNER' || github.event.review.author_association == 'MEMBER' || github.event.review.author_association == 'COLLABORATOR') ) || ( github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')) && (github.event.issue.author_association == 'OWNER' || github.event.issue.author_association == 'MEMBER' || github.event.issue.author_association == 'COLLABORATOR') ) runs-on: ubuntu-latest timeout-minutes: 30 permissions: contents: read pull-requests: write issues: write id-token: write actions: read # required for Claude to read CI results on PRs steps: # For PR-related triggers, resolve the fork repo so we can checkout correctly. - name: Resolve PR context id: pr uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7 with: script: | // Determine if this event is PR-related let prNumber = null; if (context.eventName === 'issue_comment' && context.payload.issue.pull_request) { prNumber = context.payload.issue.number; } else if (context.eventName === 'pull_request_review_comment') { prNumber = context.payload.pull_request.number; } else if (context.eventName === 'pull_request_review') { prNumber = context.payload.pull_request.number; } if (!prNumber) { core.setOutput('is_pr', 'false'); return; } const resp = await github.rest.pulls.get({ owner: context.repo.owner, repo: context.repo.repo, pull_number: prNumber, }); const pr = resp.data; core.setOutput('is_pr', 'true'); core.setOutput('number', String(prNumber)); core.setOutput('sha', pr.head.sha); core.setOutput('repo', pr.head.repo.full_name); core.setOutput('branch', pr.head.ref); - name: Checkout repository uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 with: repository: ${{ steps.pr.outputs.is_pr == 'true' && steps.pr.outputs.repo || github.repository }} ref: ${{ steps.pr.outputs.is_pr == 'true' && steps.pr.outputs.sha || '' }} fetch-depth: 1 - name: Run Claude Code id: claude uses: anthropics/claude-code-action@9469d113c6afd29550c402740f22d1a97dd1209b # v1 with: claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} github_token: ${{ secrets.GITHUB_TOKEN }} allowed_non_write_users: '*' show_full_output: true # This is an optional setting that allows Claude to read CI results on PRs additional_permissions: | actions: read ================================================ FILE: .github/workflows/publish.yml ================================================ name: Publish to npm on: push: tags: - 'v*' # No workflow-level permissions — scoped per job below. jobs: ci: uses: ./.github/workflows/ci.yml permissions: contents: read actions: read pull-requests: write publish: needs: ci runs-on: ubuntu-latest timeout-minutes: 15 permissions: contents: write id-token: write steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4 with: node-version: 20 registry-url: https://registry.npmjs.org cache: npm cache-dependency-path: gitnexus/package-lock.json - run: npm ci working-directory: gitnexus - name: Verify version consistency shell: bash run: | TAG_VERSION="${GITHUB_REF#refs/tags/v}" if ! [[ "$TAG_VERSION" =~ ^[0-9]+\.[0-9]+\.[0-9]+(-[a-zA-Z0-9.]+)?$ ]]; then echo "::error::Tag does not follow semver: v$TAG_VERSION" exit 1 fi PKG_VERSION=$(node -p "require('./package.json').version") if [ "$TAG_VERSION" != "$PKG_VERSION" ]; then echo "::error::Tag version (v$TAG_VERSION) does not match package.json version ($PKG_VERSION)" exit 1 fi echo "Version verified: $PKG_VERSION" working-directory: gitnexus - name: Build run: npm run build working-directory: gitnexus - name: Dry-run publish run: npm publish --dry-run working-directory: gitnexus - name: Publish to npm run: npm publish --provenance --access public working-directory: gitnexus env: NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} - name: Create GitHub Release uses: softprops/action-gh-release@a06a81a03ee405af7f2048a818ed3f03bbf83c7b # v2 with: generate_release_notes: true ================================================ FILE: .gitignore ================================================ # Dependencies node_modules/ # Build output dist/ # TypeScript build info *.tsbuildinfo # IDE .vscode/ .idea/ *.swp *.swo # OS .DS_Store Thumbs.db .claude/settings.local.json # Environment variables .env .env.local .env.*.local # Logs *.log npm-debug.log* # Testing coverage/ # Misc *.local .vercel .env*.local .gitnexus .claude/settings.local.json # Claude Code worktrees .claude/worktrees/ # Claude code skills .claude/skills/generated/ # Assets (screenshots, images) assets/ # Generated files (should not be indexed) repomix-output* # Design docs (local only) docs/plans/ gitnexus/test/fixtures/mini-repo/*.md gitnexus/test/fixtures/mini-repo/.claude gitnexus/test/fixtures/mini-repo/.gitignore # Ignore csharp generated obj and bin folders gitnexus/test/fixtures/lang-resolution/**/obj gitnexus/test/fixtures/lang-resolution/**/bin GitNexus.sln # Git worktrees .worktrees/ ================================================ FILE: .history/gitnexus/vitest.config_20260317171253.ts ================================================ import { defineConfig } from 'vitest/config'; export default defineConfig({ test: { globalSetup: ['test/global-setup.ts'], include: ['test/**/*.test.ts'], testTimeout: 30000, hookTimeout: 120000, pool: 'forks', globals: true, setupFiles: ['test/setup.ts'], teardownTimeout: 3000, dangerouslyIgnoreUnhandledErrors: true, // LadybugDB N-API destructor segfaults on fork exit — not a test failure coverage: { provider: 'v8', include: ['src/**/*.ts'], exclude: [ 'src/cli/index.ts', // CLI entry point (commander wiring) 'src/server/**', // HTTP server (requires network) 'src/core/wiki/**', // Wiki generation (requires LLM) ], // Auto-ratchet: vitest bumps thresholds when coverage exceeds them. // CI will fail if a PR drops below these floors. thresholds: { statements: 26, branches: 23, functions: 28, lines: 27, autoUpdate: true, }, }, }, }); ================================================ FILE: .mcp.json ================================================ { "mcpServers": { "gitnexus": { "type": "stdio", "command": "npx", "args": ["-y", "gitnexus@latest", "mcp"] } } } ================================================ FILE: .sisyphus/drafts/gitnexus-brainstorming.md ================================================ # Draft: Gitnexus Brainstorming - Clustering & Process Maps ## Initial Context - Project: **GitnexusV2** - Structure: - `gitnexus/` (Likely the core application) - `gitnexus-mcp/` (Likely a Model Context Protocol server) - Goal: Make it accurate and usable for smaller/dumber models. - Current Focus: Implementing **Clustering** and **Process Maps**. ## Findings - **Clustering**: Found `gitnexus/src/core/ingestion/cluster-enricher.ts`. - **Process Maps**: No files matched `*process*map*` yet. Searching content next. ## Open Questions - How is "process map" defined in this context? (Graph, mermaid diagram, flowchart?) - What is the input for clustering? (Code chunks, files, commits?) - What is the intended output for "smaller models"? (Simplified context, summaries?) ================================================ FILE: .sisyphus/drafts/noodlbox-comparison.md ================================================ # Draft: Gitnexus vs Noodlbox Strategy ## Objectives - Understand GitnexusV2 current state and goals. - Analyze Noodlbox capabilities from provided URL. - Compare features, architecture, and value proposition. - Provide strategic views and recommendations. ## Research Findings - [GitnexusV2]: Zero-server, browser-native (WASM), KuzuDB based. Graph + Vector hybrid search. - [Noodlbox]: CLI-first, heavy install. Has "Session Hooks" and "Search Hooks" via plugins/CLI. ## Comparison Points - **Core Philosophy**: Both bet on "Knowledge Graph + MCP" as the future. Noodlbox validates Gitnexus's direction. - **Architecture**: - *Noodlbox*: CLI/Binary based. Likely local server management. - *Gitnexus*: Zero-server, Browser-native (WASM). Lower friction, higher privacy. - **Features**: - *Communities/Processes*: Both have them. Noodlbox uses them for "context injection". Gitnexus uses them for "visual exploration + query". - *Impact Analysis*: Noodlbox has polished workflows (e.g., `detect_impact staged`). Gitnexus has the engine (`blastRadius`) but maybe not the specific workflow wrappers yet. - **UX/Integration**: - *Noodlbox*: "Hooks" (Session/Search) are a killer feature. Proactively injecting context into the agent's session. - *Gitnexus*: Powerful tools, but relies on agent *pulling* data? ## Strategic Views 1. **Validation**: The market direction is confirmed. You are building the right thing. 2. **differentiation**: Lean into "Zero-Setup / Browser-Native". Noodlbox requires `noodl init` and CLI handling. Gitnexus could just *be*. 3. **Opportunity**: Steal the "Session/Search Hooks" pattern. Make the agent smarter *automatically* without the user asking "check impact". 4. **Workflow Polish**: Noodlbox's `/detect_impact staged` is a great specific use case. Gitnexus should wrap `blastRadius` into similar concrete workflows. ## Technical Feasibility (Interception) - **Cursor**: Use `.cursorrules` to "shadow" default tools. Instruct agent to ALWAYS use `gitnexus_search` instead of `grep`. - **Claude Code**: Likely uses a private plugin API for `PreToolUse`. We can't match this exactly without an official plugin, but we can approximate it with strong prompt instructions in `AGENTS.md`. - **MCP Shadowing**: Define tools with names that conflict (e.g., `grep`)? No, unsafe. Better to use "Virtual Hooks" via system prompt instructions. ================================================ FILE: .windsurfrules ================================================ # AI Agent Rules Follow .gitnexus/RULES.md for all project context and coding guidelines. This project uses GitNexus MCP for code intelligence. See .gitnexus/RULES.md for available tools and best practices. ================================================ FILE: AGENTS.md ================================================ <!-- gitnexus:start --> # GitNexus — Code Intelligence This project is indexed by GitNexus as **GitNexus** (2184 symbols, 5245 relationships, 167 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely. > If any GitNexus tool warns the index is stale, run `npx gitnexus analyze` in terminal first. ## Always Do - **MUST run impact analysis before editing any symbol.** Before modifying a function, class, or method, run `gitnexus_impact({target: "symbolName", direction: "upstream"})` and report the blast radius (direct callers, affected processes, risk level) to the user. - **MUST run `gitnexus_detect_changes()` before committing** to verify your changes only affect expected symbols and execution flows. - **MUST warn the user** if impact analysis returns HIGH or CRITICAL risk before proceeding with edits. - When exploring unfamiliar code, use `gitnexus_query({query: "concept"})` to find execution flows instead of grepping. It returns process-grouped results ranked by relevance. - When you need full context on a specific symbol — callers, callees, which execution flows it participates in — use `gitnexus_context({name: "symbolName"})`. ## When Debugging 1. `gitnexus_query({query: "<error or symptom>"})` — find execution flows related to the issue 2. `gitnexus_context({name: "<suspect function>"})` — see all callers, callees, and process participation 3. `READ gitnexus://repo/GitNexus/process/{processName}` — trace the full execution flow step by step 4. For regressions: `gitnexus_detect_changes({scope: "compare", base_ref: "main"})` — see what your branch changed ## When Refactoring - **Renaming**: MUST use `gitnexus_rename({symbol_name: "old", new_name: "new", dry_run: true})` first. Review the preview — graph edits are safe, text_search edits need manual review. Then run with `dry_run: false`. - **Extracting/Splitting**: MUST run `gitnexus_context({name: "target"})` to see all incoming/outgoing refs, then `gitnexus_impact({target: "target", direction: "upstream"})` to find all external callers before moving code. - After any refactor: run `gitnexus_detect_changes({scope: "all"})` to verify only expected files changed. ## Never Do - NEVER edit a function, class, or method without first running `gitnexus_impact` on it. - NEVER ignore HIGH or CRITICAL risk warnings from impact analysis. - NEVER rename symbols with find-and-replace — use `gitnexus_rename` which understands the call graph. - NEVER commit changes without running `gitnexus_detect_changes()` to check affected scope. ## Tools Quick Reference | Tool | When to use | Command | |------|-------------|---------| | `query` | Find code by concept | `gitnexus_query({query: "auth validation"})` | | `context` | 360-degree view of one symbol | `gitnexus_context({name: "validateUser"})` | | `impact` | Blast radius before editing | `gitnexus_impact({target: "X", direction: "upstream"})` | | `detect_changes` | Pre-commit scope check | `gitnexus_detect_changes({scope: "staged"})` | | `rename` | Safe multi-file rename | `gitnexus_rename({symbol_name: "old", new_name: "new", dry_run: true})` | | `cypher` | Custom graph queries | `gitnexus_cypher({query: "MATCH ..."})` | ## Impact Risk Levels | Depth | Meaning | Action | |-------|---------|--------| | d=1 | WILL BREAK — direct callers/importers | MUST update these | | d=2 | LIKELY AFFECTED — indirect deps | Should test | | d=3 | MAY NEED TESTING — transitive | Test if critical path | ## Resources | Resource | Use for | |----------|---------| | `gitnexus://repo/GitNexus/context` | Codebase overview, check index freshness | | `gitnexus://repo/GitNexus/clusters` | All functional areas | | `gitnexus://repo/GitNexus/processes` | All execution flows | | `gitnexus://repo/GitNexus/process/{name}` | Step-by-step execution trace | ## Self-Check Before Finishing Before completing any code modification task, verify: 1. `gitnexus_impact` was run for all modified symbols 2. No HIGH/CRITICAL risk warnings were ignored 3. `gitnexus_detect_changes()` confirms changes match expected scope 4. All d=1 (WILL BREAK) dependents were updated ## Keeping the Index Fresh After committing code changes, the GitNexus index becomes stale. Re-run analyze to update it: ```bash npx gitnexus analyze ``` If the index previously included embeddings, preserve them by adding `--embeddings`: ```bash npx gitnexus analyze --embeddings ``` To check whether embeddings exist, inspect `.gitnexus/meta.json` — the `stats.embeddings` field shows the count (0 means no embeddings). **Running analyze without `--embeddings` will delete any previously generated embeddings.** > Claude Code users: A PostToolUse hook handles this automatically after `git commit` and `git merge`. ## CLI | Task | Read this skill file | |------|---------------------| | Understand architecture / "How does X work?" | `.claude/skills/gitnexus/gitnexus-exploring/SKILL.md` | | Blast radius / "What breaks if I change X?" | `.claude/skills/gitnexus/gitnexus-impact-analysis/SKILL.md` | | Trace bugs / "Why is X failing?" | `.claude/skills/gitnexus/gitnexus-debugging/SKILL.md` | | Rename / extract / split / refactor | `.claude/skills/gitnexus/gitnexus-refactoring/SKILL.md` | | Tools, resources, schema reference | `.claude/skills/gitnexus/gitnexus-guide/SKILL.md` | | Index, status, clean, wiki CLI commands | `.claude/skills/gitnexus/gitnexus-cli/SKILL.md` | <!-- gitnexus:end --> ================================================ FILE: CHANGELOG.md ================================================ # Changelog All notable changes to GitNexus will be documented in this file. ## [Unreleased] ### Changed - Migrated from KuzuDB to LadybugDB v0.15 (`@ladybugdb/core`, `@ladybugdb/wasm-core`) - Renamed all internal paths from `kuzu` to `lbug` (storage: `.gitnexus/kuzu` → `.gitnexus/lbug`) - Added automatic cleanup of stale KuzuDB index files - LadybugDB v0.15 requires explicit VECTOR extension loading for semantic search ## [1.4.0] - 2026-03-13 ### Added - **Language-aware symbol resolution engine** with 3-tier resolver: exact FQN → scope-walk → guarded fuzzy fallback that refuses ambiguous matches (#238) — @magyargergo - **Method Resolution Order (MRO)** with 5 language-specific strategies: C++ leftmost-base, C#/Java class-over-interface, Python C3 linearization, Rust qualified syntax, default BFS (#238) — @magyargergo - **Constructor & struct literal resolution** across all languages — `new Foo()`, `User{...}`, C# primary constructors, target-typed new (#238) — @magyargergo - **Receiver-constrained resolution** using per-file TypeEnv — disambiguates `user.save()` vs `repo.save()` via `ownerId` matching (#238) — @magyargergo - **Heritage & ownership edges** — HAS_METHOD, OVERRIDES, Go struct embedding, Swift extension heritage, method signatures (`parameterCount`, `returnType`) (#238) — @magyargergo - **Language-specific resolver directory** (`resolvers/`) — extracted JVM, Go, C#, PHP, Rust resolvers from monolithic import-processor (#238) — @magyargergo - **Type extractor directory** (`type-extractors/`) — per-language type binding extraction with `Record<SupportedLanguages, Handler>` + `satisfies` dispatch (#238) — @magyargergo - **Export detection dispatch table** — compile-time exhaustive `Record` + `satisfies` pattern replacing switch/if chains (#238) — @magyargergo - **Language config module** (`language-config.ts`) — centralized tsconfig, go.mod, composer.json, .csproj, Swift package config loaders (#238) — @magyargergo - **Optional skill generation** via `npx gitnexus analyze --skills` — generates AI agent skills from KuzuDB knowledge graph (#171) — @zander-raycraft - **First-class C# support** — sibling-based modifier scanning, record/delegate/property/field/event declaration types (#163, #170, #178 via #237) — @Alice523, @benny-yamagata, @jnMetaCode - **C/C++ support fixes** — `.h` → C++ mapping, static-linkage export detection, qualified/parenthesized declarators, 48 entry point patterns (#163, #227 via #237) — @Alice523, @bitgineer - **Rust support fixes** — sibling-based `visibility_modifier` scanning for `pub` detection (#227 via #237) — @bitgineer - **Adaptive tree-sitter buffer sizing** — `Math.min(Math.max(contentLength * 2, 512KB), 32MB)` (#216 via #237) — @JasonOA888 - **Call expression matching** in tree-sitter queries (#234 via #237) — @ex-nihilo-jg - **DeepSeek model configurations** (#217) — @JasonOA888 - 282+ new unit tests, 178 integration resolver tests across 9 languages, 53 test files, 1146 total tests passing ### Fixed - Skip unavailable native Swift parsers in sequential ingestion (#188) — @Gujiassh - Heritage heuristic language-gated — no longer applies class/interface rules to wrong languages (#238) — @magyargergo - C# `base_list` distinguishes EXTENDS vs IMPLEMENTS via symbol table + `I[A-Z]` heuristic (#238) — @magyargergo - Go `qualified_type` (`models.User`) correctly unwrapped in TypeEnv (#238) — @magyargergo - Global tier no longer blocks resolution when kind/arity filtering can narrow to 1 candidate (#238) — @magyargergo ### Changed - `import-processor.ts` reduced from 1412 → 711 lines (50% reduction) via resolver and config extraction (#238) — @magyargergo - `type-env.ts` reduced from 635 → ~125 lines via type-extractor extraction (#238) — @magyargergo - CI/CD workflows hardened with security fixes and fork PR support (#222, #225) — @magyargergo ## [1.3.11] - 2026-03-08 ### Security - Fix FTS Cypher injection by escaping backslashes in search queries (#209) — @magyargergo ### Added - Auto-reindex hook that runs `gitnexus analyze` after commits and merges, with automatic embeddings preservation (#205) — @L1nusB - 968 integration tests (up from ~840) covering unhappy paths across search, enrichment, CLI, pipeline, worker pool, and KuzuDB (#209) — @magyargergo - Coverage auto-ratcheting so thresholds bump automatically on CI (#209) — @magyargergo - Rich CI PR report with coverage bars, test counts, and threshold tracking (#209) — @magyargergo - Modular CI workflow architecture with separate unit-test, integration-test, and orchestrator jobs (#209) — @magyargergo ### Fixed - KuzuDB native addon crashes on Linux/macOS by running integration tests in isolated vitest processes with `--pool=forks` (#209) — @magyargergo - Worker pool `MODULE_NOT_FOUND` crash when script path is invalid (#209) — @magyargergo ### Changed - Added macOS to the cross-platform CI test matrix (#208) — @magyargergo ## [1.3.10] - 2026-03-07 ### Security - **MCP transport buffer cap**: Added 10 MB `MAX_BUFFER_SIZE` limit to prevent out-of-memory attacks via oversized `Content-Length` headers or unbounded newline-delimited input - **Content-Length validation**: Reject `Content-Length` values exceeding the buffer cap before allocating memory - **Stack overflow prevention**: Replaced recursive `readNewlineMessage` with iterative loop to prevent stack overflow from consecutive empty lines - **Ambiguous prefix hardening**: Tightened `looksLikeContentLength` to require 14+ bytes before matching, preventing false framing detection on short input - **Closed transport guard**: `send()` now rejects with a clear error when called after `close()`, with proper write-error propagation ### Added - **Dual-framing MCP transport** (`CompatibleStdioServerTransport`): Auto-detects Content-Length (Codex/OpenCode) and newline-delimited JSON (Cursor/Claude Code) framing on the first message, responds in the same format (#207) - **Lazy CLI module loading**: All CLI subcommands now use `createLazyAction()` to defer heavy imports (tree-sitter, ONNX, KuzuDB) until invocation, significantly improving `gitnexus mcp` startup time (#207) - **Type-safe lazy actions**: `createLazyAction` uses constrained generics to validate export names against module types at compile time - **Regression test suite**: 13 unit tests covering transport framing, security hardening, buffer limits, and lazy action loading ### Fixed - **CALLS edge sourceId alignment**: `findEnclosingFunctionId` now generates IDs with `:startLine` suffix matching node creation format, fixing process detector finding 0 entry points (#194) - **LRU cache zero maxSize crash**: Guard `createASTCache` against `maxSize=0` when repos have no parseable files (#144) ### Changed - Transport constructor accepts `NodeJS.ReadableStream` / `NodeJS.WritableStream` (widened from concrete `ReadStream`/`WriteStream`) - `processReadBuffer` simplified to break on first error instead of stale-buffer retry loop ## [1.3.9] - 2026-03-06 ### Fixed - Aligned CALLS edge sourceId with node ID format in parse worker (#194) ## [1.3.8] - 2026-03-05 ### Fixed - Force-exit after analyze to prevent KuzuDB native cleanup hang (#192) ================================================ FILE: CLAUDE.md ================================================ <!-- gitnexus:start --> # GitNexus — Code Intelligence This project is indexed by GitNexus as **GitNexus** (2184 symbols, 5245 relationships, 167 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely. > If any GitNexus tool warns the index is stale, run `npx gitnexus analyze` in terminal first. ## Always Do - **MUST run impact analysis before editing any symbol.** Before modifying a function, class, or method, run `gitnexus_impact({target: "symbolName", direction: "upstream"})` and report the blast radius (direct callers, affected processes, risk level) to the user. - **MUST run `gitnexus_detect_changes()` before committing** to verify your changes only affect expected symbols and execution flows. - **MUST warn the user** if impact analysis returns HIGH or CRITICAL risk before proceeding with edits. - When exploring unfamiliar code, use `gitnexus_query({query: "concept"})` to find execution flows instead of grepping. It returns process-grouped results ranked by relevance. - When you need full context on a specific symbol — callers, callees, which execution flows it participates in — use `gitnexus_context({name: "symbolName"})`. ## When Debugging 1. `gitnexus_query({query: "<error or symptom>"})` — find execution flows related to the issue 2. `gitnexus_context({name: "<suspect function>"})` — see all callers, callees, and process participation 3. `READ gitnexus://repo/GitNexus/process/{processName}` — trace the full execution flow step by step 4. For regressions: `gitnexus_detect_changes({scope: "compare", base_ref: "main"})` — see what your branch changed ## When Refactoring - **Renaming**: MUST use `gitnexus_rename({symbol_name: "old", new_name: "new", dry_run: true})` first. Review the preview — graph edits are safe, text_search edits need manual review. Then run with `dry_run: false`. - **Extracting/Splitting**: MUST run `gitnexus_context({name: "target"})` to see all incoming/outgoing refs, then `gitnexus_impact({target: "target", direction: "upstream"})` to find all external callers before moving code. - After any refactor: run `gitnexus_detect_changes({scope: "all"})` to verify only expected files changed. ## Never Do - NEVER edit a function, class, or method without first running `gitnexus_impact` on it. - NEVER ignore HIGH or CRITICAL risk warnings from impact analysis. - NEVER rename symbols with find-and-replace — use `gitnexus_rename` which understands the call graph. - NEVER commit changes without running `gitnexus_detect_changes()` to check affected scope. ## Tools Quick Reference | Tool | When to use | Command | |------|-------------|---------| | `query` | Find code by concept | `gitnexus_query({query: "auth validation"})` | | `context` | 360-degree view of one symbol | `gitnexus_context({name: "validateUser"})` | | `impact` | Blast radius before editing | `gitnexus_impact({target: "X", direction: "upstream"})` | | `detect_changes` | Pre-commit scope check | `gitnexus_detect_changes({scope: "staged"})` | | `rename` | Safe multi-file rename | `gitnexus_rename({symbol_name: "old", new_name: "new", dry_run: true})` | | `cypher` | Custom graph queries | `gitnexus_cypher({query: "MATCH ..."})` | ## Impact Risk Levels | Depth | Meaning | Action | |-------|---------|--------| | d=1 | WILL BREAK — direct callers/importers | MUST update these | | d=2 | LIKELY AFFECTED — indirect deps | Should test | | d=3 | MAY NEED TESTING — transitive | Test if critical path | ## Resources | Resource | Use for | |----------|---------| | `gitnexus://repo/GitNexus/context` | Codebase overview, check index freshness | | `gitnexus://repo/GitNexus/clusters` | All functional areas | | `gitnexus://repo/GitNexus/processes` | All execution flows | | `gitnexus://repo/GitNexus/process/{name}` | Step-by-step execution trace | ## Self-Check Before Finishing Before completing any code modification task, verify: 1. `gitnexus_impact` was run for all modified symbols 2. No HIGH/CRITICAL risk warnings were ignored 3. `gitnexus_detect_changes()` confirms changes match expected scope 4. All d=1 (WILL BREAK) dependents were updated ## Keeping the Index Fresh After committing code changes, the GitNexus index becomes stale. Re-run analyze to update it: ```bash npx gitnexus analyze ``` If the index previously included embeddings, preserve them by adding `--embeddings`: ```bash npx gitnexus analyze --embeddings ``` To check whether embeddings exist, inspect `.gitnexus/meta.json` — the `stats.embeddings` field shows the count (0 means no embeddings). **Running analyze without `--embeddings` will delete any previously generated embeddings.** > Claude Code users: A PostToolUse hook handles this automatically after `git commit` and `git merge`. ## CLI | Task | Read this skill file | |------|---------------------| | Understand architecture / "How does X work?" | `.claude/skills/gitnexus/gitnexus-exploring/SKILL.md` | | Blast radius / "What breaks if I change X?" | `.claude/skills/gitnexus/gitnexus-impact-analysis/SKILL.md` | | Trace bugs / "Why is X failing?" | `.claude/skills/gitnexus/gitnexus-debugging/SKILL.md` | | Rename / extract / split / refactor | `.claude/skills/gitnexus/gitnexus-refactoring/SKILL.md` | | Tools, resources, schema reference | `.claude/skills/gitnexus/gitnexus-guide/SKILL.md` | | Index, status, clean, wiki CLI commands | `.claude/skills/gitnexus/gitnexus-cli/SKILL.md` | <!-- gitnexus:end --> ================================================ FILE: LICENSE ================================================ PolyForm Noncommercial License 1.0.0 <https://polyformproject.org/licenses/noncommercial/1.0.0> ## Acceptance In order to get any license under these terms, you must agree to them as both strict obligations and conditions to all your licenses. ## Copyright License The licensor grants you a copyright license for the software to do everything you might do with the software that would otherwise infringe the licensor's copyright in it for any permitted purpose. However, you may only distribute the software according to [Distribution License](#distribution-license) and make changes or new works based on the software according to [Changes and New Works License](#changes-and-new-works-license). ## Distribution License The licensor grants you an additional copyright license to distribute copies of the software. Your license to distribute covers distributing the software with changes and new works permitted by [Changes and New Works License](#changes-and-new-works-license). ## Notices You must ensure that anyone who gets a copy of any part of the software from you also gets a copy of these terms or the URL for them above, as well as copies of any plain-text lines beginning with `Required Notice:` that the licensor provided with the software. For example: > Required Notice: Copyright Abhigyan Patwari (https://github.com/abhigyanpatwari/GitNexus) ## Changes and New Works License The licensor grants you an additional copyright license to make changes and new works based on the software for any permitted purpose. ## Patent License The licensor grants you a patent license for the software that covers patent claims the licensor can license, or becomes able to license, that you would infringe by using the software. ## Noncommercial Purposes Any noncommercial purpose is a permitted purpose. ## Personal Uses Personal use for research, experiment, and testing for the benefit of public knowledge, personal study, private entertainment, hobby projects, amateur pursuits, or religious observance, without any anticipated commercial application, is use for a permitted purpose. ## Noncommercial Organizations Use by any charitable organization, educational institution, public research organization, public safety or health organization, environmental protection organization, or government institution is use for a permitted purpose regardless of the source of funding or obligations resulting from the funding. ## Fair Use You may have "fair use" rights for the software under the law. These terms do not limit them. ## No Other Rights These terms do not allow you to sublicense or transfer any of your licenses to anyone else, or prevent the licensor from granting licenses to anyone else. These terms do not imply any other licenses. ## Patent Defense If you make any written claim that the software infringes or contributes to infringement of any patent, your patent license for the software granted under these terms ends immediately. If your company makes such a claim, your patent license ends immediately for work on behalf of your company. ## Violations The first time you are notified in writing that you have violated any of these terms, or done anything with the software not covered by your licenses, your licenses can nonetheless continue if you come into full compliance with these terms, and take practical steps to correct past violations, within 32 days of receiving notice. Otherwise, all your licenses end immediately. ## No Liability ***As far as the law allows, the software comes as is, without any warranty or condition, and the licensor will not be liable to you for any damages arising out of these terms or the use or nature of the software, under any kind of legal claim.*** ## Definitions The **licensor** is the individual or entity offering these terms, and the **software** is the software the licensor makes available under these terms. **You** refers to the individual or entity agreeing to these terms. **Your company** is any legal entity, sole proprietorship, or other kind of organization that you work for, plus all organizations that have control over, are under the control of, or are under common control with that organization. **Control** means ownership of substantially all the assets of an entity, or the power to direct its management and policies by vote, contract, or otherwise. Control can be direct or indirect. **Your licenses** are all the licenses granted to you for the software under these terms. **Use** means anything you do with the software requiring one of your licenses. ================================================ FILE: README.md ================================================ # GitNexus ⚠️ Important Notice:** GitNexus has NO official cryptocurrency, token, or coin. Any token/coin using the GitNexus name on Pump.fun or any other platform is **not affiliated with, endorsed by, or created by** this project or its maintainers. Do not purchase any cryptocurrency claiming association with GitNexus. <div align="center"> <a href="https://trendshift.io/repositories/19809" target="_blank"> <img src="https://trendshift.io/api/badge/repositories/19809" alt="abhigyanpatwari%2FGitNexus | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/> </a> <h2>Join the official Discord to discuss ideas, issues etc!</h2> <a href="https://discord.gg/AAsRVT6fGb"> <img src="https://img.shields.io/discord/1477255801545429032?color=5865F2&logo=discord&logoColor=white" alt="Discord"/> </a> <a href="https://www.npmjs.com/package/gitnexus"> <img src="https://img.shields.io/npm/v/gitnexus.svg" alt="npm version"/> </a> <a href="https://polyformproject.org/licenses/noncommercial/1.0.0/"> <img src="https://img.shields.io/badge/License-PolyForm%20Noncommercial-blue.svg" alt="License: PolyForm Noncommercial"/> </a> </div> **Building nervous system for agent context.** Indexes any codebase into a knowledge graph — every dependency, call chain, cluster, and execution flow — then exposes it through smart tools so AI agents never miss code. https://github.com/user-attachments/assets/172685ba-8e54-4ea7-9ad1-e31a3398da72 > *Like DeepWiki, but deeper.* DeepWiki helps you *understand* code. GitNexus lets you *analyze* it — because a knowledge graph tracks every relationship, not just descriptions. **TL;DR:** The **Web UI** is a quick way to chat with any repo. The **CLI + MCP** is how you make your AI agent actually reliable — it gives Cursor, Claude Code, and friends a deep architectural view of your codebase so they stop missing dependencies, breaking call chains, and shipping blind edits. Even smaller models get full architectural clarity, making it compete with goliath models. --- ## Star History [![Star History Chart](https://api.star-history.com/svg?repos=abhigyanpatwari/GitNexus&type=date&legend=top-left)](https://www.star-history.com/#abhigyanpatwari/GitNexus&type=date&legend=top-left) ## Two Ways to Use GitNexus | | **CLI + MCP** | **Web UI** | | ----------------- | -------------------------------------------------------------- | ------------------------------------------------------------ | | **What** | Index repos locally, connect AI agents via MCP | Visual graph explorer + AI chat in browser | | **For** | Daily development with Cursor, Claude Code, Windsurf, OpenCode, Codex | Quick exploration, demos, one-off analysis | | **Scale** | Full repos, any size | Limited by browser memory (~5k files), or unlimited via backend mode | | **Install** | `npm install -g gitnexus` | No install —[gitnexus.vercel.app](https://gitnexus.vercel.app) | | **Storage** | LadybugDB native (fast, persistent) | LadybugDB WASM (in-memory, per session) | | **Parsing** | Tree-sitter native bindings | Tree-sitter WASM | | **Privacy** | Everything local, no network | Everything in-browser, no server | > **Bridge mode:** `gitnexus serve` connects the two — the web UI auto-detects the local server and can browse all your CLI-indexed repos without re-uploading or re-indexing. --- ## CLI + MCP (recommended) The CLI indexes your repository and runs an MCP server that gives AI agents deep codebase awareness. ### Quick Start ```bash # Index your repo (run from repo root) npx gitnexus analyze ``` That's it. This indexes the codebase, installs agent skills, registers Claude Code hooks, and creates `AGENTS.md` / `CLAUDE.md` context files — all in one command. To configure MCP for your editor, run `npx gitnexus setup` once — or set it up manually below. ### MCP Setup `gitnexus setup` auto-detects your editors and writes the correct global MCP config. You only need to run it once. ### Editor Support | Editor | MCP | Skills | Hooks (auto-augment) | Support | | --------------------- | --- | ------ | -------------------- | -------------- | | **Claude Code** | Yes | Yes | Yes (PreToolUse + PostToolUse) | **Full** | | **Cursor** | Yes | Yes | — | MCP + Skills | | **Windsurf** | Yes | — | — | MCP | | **OpenCode** | Yes | Yes | — | MCP + Skills | | **Codex** | Yes | — | — | MCP | > **Claude Code** gets the deepest integration: MCP tools + agent skills + PreToolUse hooks that enrich searches with graph context + PostToolUse hooks that auto-reindex after commits. ### Community Integrations | Agent | Install | Source | |-------|---------|--------| | [pi](https://pi.dev) | `pi install npm:pi-gitnexus` | [pi-gitnexus](https://github.com/tintinweb/pi-gitnexus) | If you prefer manual configuration: **Claude Code** (full support — MCP + skills + hooks): ```bash claude mcp add gitnexus -- npx -y gitnexus@latest mcp ``` **Cursor** (`~/.cursor/mcp.json` — global, works for all projects): ```json { "mcpServers": { "gitnexus": { "command": "npx", "args": ["-y", "gitnexus@latest", "mcp"] } } } ``` **OpenCode** (`~/.config/opencode/config.json`): ```json { "mcp": { "gitnexus": { "command": "npx", "args": ["-y", "gitnexus@latest", "mcp"] } } } ``` **Codex** (`~/.codex/config.toml` for system scope, or `.codex/config.toml` for project scope): ```toml [mcp_servers.gitnexus] command = "npx" args = ["-y", "gitnexus@latest", "mcp"] ``` ### CLI Commands ```bash gitnexus setup # Configure MCP for your editors (one-time) gitnexus analyze [path] # Index a repository (or update stale index) gitnexus analyze --force # Force full re-index gitnexus analyze --skills # Generate repo-specific skill files from detected communities gitnexus analyze --skip-embeddings # Skip embedding generation (faster) gitnexus analyze --embeddings # Enable embedding generation (slower, better search) gitnexus analyze --verbose # Log skipped files when parsers are unavailable gitnexus mcp # Start MCP server (stdio) — serves all indexed repos gitnexus serve # Start local HTTP server (multi-repo) for web UI connection gitnexus list # List all indexed repositories gitnexus status # Show index status for current repo gitnexus clean # Delete index for current repo gitnexus clean --all --force # Delete all indexes gitnexus wiki [path] # Generate repository wiki from knowledge graph gitnexus wiki --model <model> # Wiki with custom LLM model (default: gpt-4o-mini) gitnexus wiki --base-url <url> # Wiki with custom LLM API base URL ``` ### What Your AI Agent Gets **7 tools** exposed via MCP: | Tool | What It Does | `repo` Param | | ------------------ | ----------------------------------------------------------------- | -------------- | | `list_repos` | Discover all indexed repositories | — | | `query` | Process-grouped hybrid search (BM25 + semantic + RRF) | Optional | | `context` | 360-degree symbol view — categorized refs, process participation | Optional | | `impact` | Blast radius analysis with depth grouping and confidence | Optional | | `detect_changes` | Git-diff impact — maps changed lines to affected processes | Optional | | `rename` | Multi-file coordinated rename with graph + text search | Optional | | `cypher` | Raw Cypher graph queries | Optional | > When only one repo is indexed, the `repo` parameter is optional. With multiple repos, specify which one: `query({query: "auth", repo: "my-app"})`. **Resources** for instant context: | Resource | Purpose | | ----------------------------------------- | ---------------------------------------------------- | | `gitnexus://repos` | List all indexed repositories (read this first) | | `gitnexus://repo/{name}/context` | Codebase stats, staleness check, and available tools | | `gitnexus://repo/{name}/clusters` | All functional clusters with cohesion scores | | `gitnexus://repo/{name}/cluster/{name}` | Cluster members and details | | `gitnexus://repo/{name}/processes` | All execution flows | | `gitnexus://repo/{name}/process/{name}` | Full process trace with steps | | `gitnexus://repo/{name}/schema` | Graph schema for Cypher queries | **2 MCP prompts** for guided workflows: | Prompt | What It Does | | ----------------- | ------------------------------------------------------------------------- | | `detect_impact` | Pre-commit change analysis — scope, affected processes, risk level | | `generate_map` | Architecture documentation from the knowledge graph with mermaid diagrams | **4 agent skills** installed to `.claude/skills/` automatically: - **Exploring** — Navigate unfamiliar code using the knowledge graph - **Debugging** — Trace bugs through call chains - **Impact Analysis** — Analyze blast radius before changes - **Refactoring** — Plan safe refactors using dependency mapping **Repo-specific skills** generated with `--skills`: When you run `gitnexus analyze --skills`, GitNexus detects the functional areas of your codebase (via Leiden community detection) and generates a `SKILL.md` file for each one under `.claude/skills/generated/`. Each skill describes a module's key files, entry points, execution flows, and cross-area connections — so your AI agent gets targeted context for the exact area of code you're working in. Skills are regenerated on each `--skills` run to stay current with the codebase. --- ## Multi-Repo MCP Architecture GitNexus uses a **global registry** so one MCP server can serve multiple indexed repos. No per-project MCP config needed — set it up once and it works everywhere. ```mermaid flowchart TD subgraph CLI [CLI Commands] Setup["gitnexus setup"] Analyze["gitnexus analyze"] Clean["gitnexus clean"] List["gitnexus list"] end subgraph Registry ["~/.gitnexus/"] RegFile["registry.json"] end subgraph Repos [Project Repos] RepoA[".gitnexus/ in repo A"] RepoB[".gitnexus/ in repo B"] end subgraph MCP [MCP Server] Server["server.ts"] Backend["LocalBackend"] Pool["Connection Pool"] ConnA["LadybugDB conn A"] ConnB["LadybugDB conn B"] end Setup -->|"writes global MCP config"| CursorConfig["~/.cursor/mcp.json"] Analyze -->|"registers repo"| RegFile Analyze -->|"stores index"| RepoA Clean -->|"unregisters repo"| RegFile List -->|"reads"| RegFile Server -->|"reads registry"| RegFile Server --> Backend Backend --> Pool Pool -->|"lazy open"| ConnA Pool -->|"lazy open"| ConnB ConnA -->|"queries"| RepoA ConnB -->|"queries"| RepoB ``` **How it works:** Each `gitnexus analyze` stores the index in `.gitnexus/` inside the repo (portable, gitignored) and registers a pointer in `~/.gitnexus/registry.json`. When an AI agent starts, the MCP server reads the registry and can serve any indexed repo. LadybugDB connections are opened lazily on first query and evicted after 5 minutes of inactivity (max 5 concurrent). If only one repo is indexed, the `repo` parameter is optional on all tools — agents don't need to change anything. --- ## Web UI (browser-based) A fully client-side graph explorer and AI chat. No server, no install — your code never leaves the browser. **Try it now:** [gitnexus.vercel.app](https://gitnexus.vercel.app) — drag & drop a ZIP and start exploring. <img width="2550" height="1343" alt="gitnexus_img" src="https://github.com/user-attachments/assets/cc5d637d-e0e5-48e6-93ff-5bcfdb929285" /> Or run locally: ```bash git clone https://github.com/abhigyanpatwari/gitnexus.git cd gitnexus/gitnexus-web npm install npm run dev ``` The web UI uses the same indexing pipeline as the CLI but runs entirely in WebAssembly (Tree-sitter WASM, LadybugDB WASM, in-browser embeddings). It's great for quick exploration but limited by browser memory for larger repos. **Local Backend Mode:** Run `gitnexus serve` and open the web UI locally — it auto-detects the server and shows all your indexed repos, with full AI chat support. No need to re-upload or re-index. The agent's tools (Cypher queries, search, code navigation) route through the backend HTTP API automatically. --- ## The Problem GitNexus Solves Tools like **Cursor**, **Claude Code**, **Cline**, **Roo Code**, and **Windsurf** are powerful — but they don't truly know your codebase structure. **What happens:** 1. AI edits `UserService.validate()` 2. Doesn't know 47 functions depend on its return type 3. **Breaking changes ship** ### Traditional Graph RAG vs GitNexus Traditional approaches give the LLM raw graph edges and hope it explores enough. GitNexus **precomputes structure at index time** — clustering, tracing, scoring — so tools return complete context in one call: ```mermaid flowchart TB subgraph Traditional["Traditional Graph RAG"] direction TB U1["User: What depends on UserService?"] U1 --> LLM1["LLM receives raw graph"] LLM1 --> Q1["Query 1: Find callers"] Q1 --> Q2["Query 2: What files?"] Q2 --> Q3["Query 3: Filter tests?"] Q3 --> Q4["Query 4: High-risk?"] Q4 --> OUT1["Answer after 4+ queries"] end subgraph GN["GitNexus Smart Tools"] direction TB U2["User: What depends on UserService?"] U2 --> TOOL["impact UserService upstream"] TOOL --> PRECOMP["Pre-structured response: 8 callers, 3 clusters, all 90%+ confidence"] PRECOMP --> OUT2["Complete answer, 1 query"] end ``` **Core innovation: Precomputed Relational Intelligence** - **Reliability** — LLM can't miss context, it's already in the tool response - **Token efficiency** — No 10-query chains to understand one function - **Model democratization** — Smaller LLMs work because tools do the heavy lifting --- ## How It Works GitNexus builds a complete knowledge graph of your codebase through a multi-phase indexing pipeline: 1. **Structure** — Walks the file tree and maps folder/file relationships 2. **Parsing** — Extracts functions, classes, methods, and interfaces using Tree-sitter ASTs 3. **Resolution** — Resolves imports, function calls, heritage, constructor inference, and `self`/`this` receiver types across files with language-aware logic 4. **Clustering** — Groups related symbols into functional communities 5. **Processes** — Traces execution flows from entry points through call chains 6. **Search** — Builds hybrid search indexes for fast retrieval ### Supported Languages | Language | Imports | Named Bindings | Exports | Heritage | Type Annotations | Constructor Inference | Config | Frameworks | Entry Points | |----------|---------|----------------|---------|----------|-----------------|---------------------|--------|------------|-------------| | TypeScript | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | JavaScript | ✓ | ✓ | ✓ | ✓ | — | ✓ | ✓ | ✓ | ✓ | | Python | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | Java | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | — | ✓ | ✓ | | Kotlin | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | — | ✓ | ✓ | | C# | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | Go | ✓ | — | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | Rust | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | — | ✓ | ✓ | | PHP | ✓ | ✓ | ✓ | — | ✓ | ✓ | ✓ | ✓ | ✓ | | Ruby | ✓ | — | ✓ | ✓ | — | ✓ | — | ✓ | ✓ | | Swift | — | — | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | C | — | — | ✓ | — | ✓ | ✓ | — | ✓ | ✓ | | C++ | — | — | ✓ | ✓ | ✓ | ✓ | — | ✓ | ✓ | **Imports** — cross-file import resolution · **Named Bindings** — `import { X as Y }` / re-export tracking · **Exports** — public/exported symbol detection · **Heritage** — class inheritance, interfaces, mixins · **Type Annotations** — explicit type extraction for receiver resolution · **Constructor Inference** — infer receiver type from constructor calls (`self`/`this` resolution included for all languages) · **Config** — language toolchain config parsing (tsconfig, go.mod, etc.) · **Frameworks** — AST-based framework pattern detection · **Entry Points** — entry point scoring heuristics --- ## Tool Examples ### Impact Analysis ``` impact({target: "UserService", direction: "upstream", minConfidence: 0.8}) TARGET: Class UserService (src/services/user.ts) UPSTREAM (what depends on this): Depth 1 (WILL BREAK): handleLogin [CALLS 90%] -> src/api/auth.ts:45 handleRegister [CALLS 90%] -> src/api/auth.ts:78 UserController [CALLS 85%] -> src/controllers/user.ts:12 Depth 2 (LIKELY AFFECTED): authRouter [IMPORTS] -> src/routes/auth.ts ``` Options: `maxDepth`, `minConfidence`, `relationTypes` (`CALLS`, `IMPORTS`, `EXTENDS`, `IMPLEMENTS`), `includeTests` ### Process-Grouped Search ``` query({query: "authentication middleware"}) processes: - summary: "LoginFlow" priority: 0.042 symbol_count: 4 process_type: cross_community step_count: 7 process_symbols: - name: validateUser type: Function filePath: src/auth/validate.ts process_id: proc_login step_index: 2 definitions: - name: AuthConfig type: Interface filePath: src/types/auth.ts ``` ### Context (360-degree Symbol View) ``` context({name: "validateUser"}) symbol: uid: "Function:validateUser" kind: Function filePath: src/auth/validate.ts startLine: 15 incoming: calls: [handleLogin, handleRegister, UserController] imports: [authRouter] outgoing: calls: [checkPassword, createSession] processes: - name: LoginFlow (step 2/7) - name: RegistrationFlow (step 3/5) ``` ### Detect Changes (Pre-Commit) ``` detect_changes({scope: "all"}) summary: changed_count: 12 affected_count: 3 changed_files: 4 risk_level: medium changed_symbols: [validateUser, AuthService, ...] affected_processes: [LoginFlow, RegistrationFlow, ...] ``` ### Rename (Multi-File) ``` rename({symbol_name: "validateUser", new_name: "verifyUser", dry_run: true}) status: success files_affected: 5 total_edits: 8 graph_edits: 6 (high confidence) text_search_edits: 2 (review carefully) changes: [...] ``` ### Cypher Queries ```cypher -- Find what calls auth functions with high confidence MATCH (c:Community {heuristicLabel: 'Authentication'})<-[:CodeRelation {type: 'MEMBER_OF'}]-(fn) MATCH (caller)-[r:CodeRelation {type: 'CALLS'}]->(fn) WHERE r.confidence > 0.8 RETURN caller.name, fn.name, r.confidence ORDER BY r.confidence DESC ``` --- ## Wiki Generation Generate LLM-powered documentation from your knowledge graph: ```bash # Requires an LLM API key (OPENAI_API_KEY, etc.) gitnexus wiki # Use a custom model or provider gitnexus wiki --model gpt-4o gitnexus wiki --base-url https://api.anthropic.com/v1 # Force full regeneration gitnexus wiki --force ``` The wiki generator reads the indexed graph structure, groups files into modules via LLM, generates per-module documentation pages, and creates an overview page — all with cross-references to the knowledge graph. --- ## Tech Stack | Layer | CLI | Web | | ------------------------- | ------------------------------------- | --------------------------------------- | | **Runtime** | Node.js (native) | Browser (WASM) | | **Parsing** | Tree-sitter native bindings | Tree-sitter WASM | | **Database** | LadybugDB native | LadybugDB WASM | | **Embeddings** | HuggingFace transformers.js (GPU/CPU) | transformers.js (WebGPU/WASM) | | **Search** | BM25 + semantic + RRF | BM25 + semantic + RRF | | **Agent Interface** | MCP (stdio) | LangChain ReAct agent | | **Visualization** | — | Sigma.js + Graphology (WebGL) | | **Frontend** | — | React 18, TypeScript, Vite, Tailwind v4 | | **Clustering** | Graphology | Graphology | | **Concurrency** | Worker threads + async | Web Workers + Comlink | --- ## Roadmap ### Actively Building - [ ] **LLM Cluster Enrichment** — Semantic cluster names via LLM API - [ ] **AST Decorator Detection** — Parse @Controller, @Get, etc. - [ ] **Incremental Indexing** — Only re-index changed files ### Recently Completed - [X] Constructor-Inferred Type Resolution, `self`/`this` Receiver Mapping - [X] Wiki Generation, Multi-File Rename, Git-Diff Impact Analysis - [X] Process-Grouped Search, 360-Degree Context, Claude Code Hooks - [X] Multi-Repo MCP, Zero-Config Setup, 13 Language Support - [X] Community Detection, Process Detection, Confidence Scoring - [X] Hybrid Search, Vector Index --- ## Security & Privacy - **CLI**: Everything runs locally on your machine. No network calls. Index stored in `.gitnexus/` (gitignored). Global registry at `~/.gitnexus/` stores only paths and metadata. - **Web**: Everything runs in your browser. No code uploaded to any server. API keys stored in localStorage only. - Open source — audit the code yourself. --- ## Acknowledgments - [Tree-sitter](https://tree-sitter.github.io/) — AST parsing - [LadybugDB](https://ladybugdb.com/) — Embedded graph database with vector support (formerly KuzuDB) - [Sigma.js](https://www.sigmajs.org/) — WebGL graph rendering - [transformers.js](https://huggingface.co/docs/transformers.js) — Browser ML - [Graphology](https://graphology.github.io/) — Graph data structures - [MCP](https://modelcontextprotocol.io/) — Model Context Protocol ================================================ FILE: compound-engineering.local.md ================================================ --- review_agents: [kieran-typescript-reviewer, pattern-recognition-specialist, architecture-strategist, data-integrity-guardian, security-sentinel, performance-oracle, code-simplicity-reviewer] plan_review_agents: [kieran-typescript-reviewer, architecture-strategist, code-simplicity-reviewer] voltagent_agents: [voltagent-lang:typescript-pro, voltagent-qa-sec:security-auditor, voltagent-data-ai:database-optimizer] --- # Review Context ## Project Overview GitNexus is a code intelligence tool that builds a knowledge graph from source code using tree-sitter AST parsing across 12 languages and KuzuDB for graph storage. Two packages: `gitnexus/` (CLI/MCP, TypeScript) and `gitnexus-web/` (browser). ## Cross-Language Pattern Consistency (pattern-recognition-specialist) - 12 language-specific type extractors in `gitnexus/src/core/ingestion/type-extractors/` must follow identical patterns for: async unwrapping, constructor binding, namespace handling, nullable type stripping, for-loop element typing. - Past bugs: C#/Rust missing `await_expression` unwrapping that TypeScript handled correctly; PHP backslash namespace splitting inconsistent with other languages' `::` / `.` splitting. - When reviewing type extractor changes, verify the same pattern exists in ALL applicable language files — asymmetry is the #1 source of bugs. ## Data Integrity (data-integrity-guardian) - KuzuDB graph operations: schema in `gitnexus/src/core/kuzu/schema.ts`, adapter in `kuzu-adapter.ts`. - The ingestion pipeline writes symbols and relationships to the graph — changes to node/relation schemas or the ingestion pipeline can corrupt the index. - Known issue: KuzuDB `close()` hangs on Linux due to C++ destructor — use `detachKuzu()` pattern. - `lbug-adapter.ts` fallback path needs quote/newline escaping for Cypher injection prevention. ## Security (security-sentinel) - Cypher query construction in `lbug-adapter.ts` and `kuzu-adapter.ts` — watch for injection via unescaped user-provided symbol names. - CLI accepts `--repo` parameter and file paths — validate against path traversal. - MCP server exposes tools to external AI agents — all tool inputs are untrusted. ## Performance (performance-oracle) - Tree-sitter buffer size is adaptive (512KB–32MB) via `getTreeSitterBufferSize()` in `constants.ts`. - The ingestion pipeline processes entire repositories — O(n) per file with potential O(n²) in cross-file resolution. - KuzuDB batch inserts vs individual inserts matter for large repos. ## Architecture (architecture-strategist) - Ingestion pipeline phases: structure → parsing → imports → calls → heritage → processes → type resolution. - Shared modules: `export-detection.ts`, `constants.ts`, `utils.ts` — changes here have wide blast radius. - `gitnexus-web` package drifts behind CLI — flag if a change should be mirrored. ## Voltagent Supplementary Agents Invoke these via the Agent tool alongside `/ce:review` for deeper specialist analysis. These cover gaps that compound-engineering agents don't: ### voltagent-lang:typescript-pro **When:** Changes touch type-resolution logic, generics, conditional types, or complex type-level programming in `type-env.ts`, `type-extractors/*.ts`, or `types.ts`. **Why:** The type resolution system uses advanced TypeScript patterns (discriminated unions, mapped types, recursive generics) that benefit from deep TS type-system review beyond what kieran-typescript-reviewer covers. ### voltagent-qa-sec:security-auditor **When:** Changes touch MCP tool handlers, Cypher query construction, CLI argument parsing, or any code that processes external input. **Why:** GitNexus is an MCP server — all tool inputs come from untrusted AI agents. Systematic OWASP-level audit catches injection vectors that spot-checking misses. Past finding: `lbug-adapter.ts` fallback path had unescaped newlines in Cypher queries. ### voltagent-data-ai:database-optimizer **When:** Changes touch `kuzu-adapter.ts`, `schema.ts`, `lbug-adapter.ts`, or any Cypher query construction/execution. **Why:** No CE agent specializes in graph database optimization. KuzuDB batch insert patterns, index usage, and query planning directly affect analysis speed on large repos. ## Review Tooling - Use `gitnexus_impact()` before approving changes to any symbol — check d=1 (WILL BREAK) callers. - Use `gitnexus_detect_changes({scope: "compare", base_ref: "main"})` to map PR diffs to affected execution flows. - Use claude-mem to surface past architectural decisions relevant to the code under review. ================================================ FILE: eval/.gitignore ================================================ # Evaluation results (large, should not be committed) results/ *.traj.json preds.json # Python __pycache__/ *.pyc *.egg-info/ .eggs/ dist/ build/ # Environment .env .venv/ ================================================ FILE: eval/README.md ================================================ # GitNexus SWE-bench Evaluation Harness Evaluate whether GitNexus code intelligence improves AI agent performance on real software engineering tasks. Runs SWE-bench instances across multiple models and compares baseline (no graph) vs GitNexus-enhanced configurations. ## What This Tests **Hypothesis**: Giving AI agents structural code intelligence (call graphs, execution flows, blast radius analysis) improves their ability to resolve real GitHub issues — measured by resolve rate, cost, and efficiency. **Evaluation modes:** | Mode | What the agent gets | |------|-------------------| | `baseline` | Standard bash tools (grep, find, cat, sed) — control group | | `native` | Baseline + explicit GitNexus tools via eval-server (~100ms) | | `native_augment` | Native tools + grep results automatically enriched with graph context (**recommended**) | > **Recommended**: Use `native_augment` mode. It mirrors the Claude Code model — the agent gets both explicit GitNexus tools (fast bash commands) AND automatic enrichment of grep results with callers, callees, and execution flows. The agent decides when to use explicit tools vs rely on enriched search output. **Models supported:** - Claude 3.5 Haiku, Claude Sonnet 4, Claude Opus 4 - MiniMax M1 2.5 - GLM 4.7, GLM 5 - Any model supported by litellm (add a YAML config) ## Prerequisites - Python 3.11+ - Docker (for SWE-bench containers) - Node.js 18+ (for GitNexus) - API keys for your chosen models ## Setup ```bash cd eval # Install dependencies pip install -e . # Set up API keys — copy the template and fill in your keys cp .env.example .env # Then edit .env and paste your key(s) ``` All models are routed through **OpenRouter** by default, so a single `OPENROUTER_API_KEY` is all you need. To use provider APIs directly (Anthropic, ZhipuAI, etc.), edit the model YAML in `configs/models/` and set the corresponding key in `.env`. ```bash # Pull SWE-bench Docker images (pulled on-demand, but you can pre-pull) docker pull swebench/sweb.eval.x86_64.django_1776_django-16527:latest ``` ## Quick Start ### Debug a single instance ```bash # Fastest way to verify everything works python run_eval.py debug -m claude-haiku -i django__django-16527 --subset lite ``` ### Run a single configuration ```bash # 5 instances, Claude Sonnet, native_augment mode (default) python run_eval.py single -m claude-sonnet --subset lite --slice 0:5 # Baseline comparison (no GitNexus) python run_eval.py single -m claude-sonnet --mode baseline --subset lite --slice 0:5 # Full Lite benchmark, 4 parallel workers python run_eval.py single -m claude-sonnet --subset lite -w 4 ``` ### Run the full matrix ```bash # All models x all modes python run_eval.py matrix --subset lite -w 4 # Key comparison: baseline vs native_augment python run_eval.py matrix -m claude-sonnet -m claude-haiku --modes baseline --modes native_augment --subset lite --slice 0:50 ``` ### Analyze results ```bash # Summary table python -m analysis.analyze_results results/ # Compare modes for a specific model python -m analysis.analyze_results compare-modes results/ -m claude-sonnet # GitNexus tool usage analysis python -m analysis.analyze_results gitnexus-usage results/ # Export as CSV for further analysis python -m analysis.analyze_results summary results/ --format csv > results.csv # Run official SWE-bench test evaluation python -m analysis.analyze_results summary results/ --swebench-eval ``` ### List available configurations ```bash python run_eval.py list-configs ``` ## Architecture ``` eval/ run_eval.py # Main entry point (single, matrix, debug commands) agents/ gitnexus_agent.py # GitNexusAgent: extends DefaultAgent with augmentation + metrics environments/ gitnexus_docker.py # Docker env with GitNexus + eval-server + standalone tool scripts bridge/ gitnexus_tools.sh # Bash wrappers (legacy — now standalone scripts are installed directly) mcp_bridge.py # Legacy MCP bridge (kept for reference) prompts/ system_baseline.jinja # System: persona + format rules instance_baseline.jinja # Instance: task + workflow system_native.jinja # System: + GitNexus tool reference instance_native.jinja # Instance: + GitNexus debugging workflow system_native_augment.jinja # System: + GitNexus tools + grep enrichment docs instance_native_augment.jinja # Instance: + GitNexus workflow + risk assessment configs/ models/ # Per-model YAML configs modes/ # Per-mode YAML configs (baseline, native, native_augment) analysis/ analyze_results.py # Post-run comparative analysis results/ # Output directory (gitignored) ``` ## How It Works ### Template structure mini-swe-agent requires two Jinja templates: - **system_template** → system message: persona, format rules, tool reference (static) - **instance_template** → first user message: task, workflow, rules, examples (contains `{{task}}`) Each mode has a `system_{mode}.jinja` + `instance_{mode}.jinja` pair. The agent loads both automatically based on the configured mode. ### Per-instance flow 1. Docker container starts with SWE-bench instance (repo at specific commit) 2. **GitNexus setup**: Node.js + gitnexus installed, `gitnexus analyze` runs (or restores from cache) 3. **Eval-server starts**: `gitnexus eval-server` daemon (persistent HTTP server, keeps LadybugDB warm) 4. **Standalone tool scripts installed** in `/usr/local/bin/` — works with `subprocess.run` (no `.bashrc` needed) 5. Agent runs with the configured model + system prompt + GitNexus tools 6. Agent's patch is extracted as a git diff 7. Metrics collected: cost, tokens, tool calls, GitNexus usage, augmentation stats ### Tool architecture ``` Agent → bash command → /usr/local/bin/gitnexus-query → curl localhost:4848/tool/query (fast path: eval-server, ~100ms) → npx gitnexus query (fallback: cold CLI, ~5-10s) ``` Each tool script in `/usr/local/bin/` is standalone — no sourcing, no env inheritance needed. This is critical because mini-swe-agent runs every command via `subprocess.run` in a fresh subshell. ### Eval-server The eval-server is a lightweight HTTP daemon that: - Keeps LadybugDB warm in memory (no cold start per tool call) - Returns LLM-friendly text (not raw JSON — saves tokens) - Includes next-step hints to guide tool chaining (query → context → impact → fix) - Auto-shuts down after idle timeout ### Index caching SWE-bench repos repeat (Django has 200+ instances at different commits). The harness caches GitNexus indexes per `(repo, commit)` hash in `~/.gitnexus-eval-cache/` to avoid redundant re-indexing. ### Grep augmentation (native_augment mode) When the agent runs `grep` or `rg`, the observation is post-processed: the agent class calls `gitnexus-augment` on the search pattern and appends `[GitNexus]` annotations showing callers, callees, and execution flows for matched symbols. This mirrors the Claude Code / Cursor hook integration. ## Adding Models Create a YAML file in `configs/models/`: ```yaml # configs/models/my-model.yaml model: model_name: "openrouter/provider/model-name" cost_tracking: "ignore_errors" # if not in litellm's cost DB model_kwargs: max_tokens: 8192 temperature: 0 ``` The model name follows [litellm conventions](https://docs.litellm.ai/docs/providers). ## Metrics Collected | Metric | Description | |--------|-------------| | Patch Rate | % of instances where agent produced a patch | | Resolve Rate | % of instances where patch passes tests (requires --swebench-eval) | | Total Cost | API cost across all instances | | Avg Cost/Instance | Cost efficiency | | API Calls | Number of LLM calls | | GN Tool Calls | How many GitNexus tools the agent used | | Augment Hits | How many grep/find results got enriched | | Augment Hit Rate | % of search commands that got useful enrichment | ================================================ FILE: eval/__init__.py ================================================ # GitNexus SWE-bench Evaluation Harness ================================================ FILE: eval/agents/__init__.py ================================================ ================================================ FILE: eval/agents/gitnexus_agent.py ================================================ """ GitNexus-Enhanced Agent for SWE-bench Evaluation Extends mini-swe-agent's DefaultAgent with: 1. Native augment mode: GitNexus tools via eval-server + grep enrichment (recommended) 2. Native mode: GitNexus tools via eval-server only 3. Baseline mode: Pure mini-swe-agent (no GitNexus — control group) The agent class itself is minimal — the heavy lifting is in: - Prompt selection (system + instance templates per mode) - Observation post-processing (grep result augmentation) - Metrics tracking (which tools the agent actually uses) Template structure (matches mini-swe-agent's expectations): system_template → system message: persona + format rules + tool reference instance_template → first user message: task + workflow + rules + examples """ import logging import re import time from enum import Enum from pathlib import Path from minisweagent import Environment, Model from minisweagent.agents.default import AgentConfig, DefaultAgent logger = logging.getLogger("gitnexus_agent") PROMPTS_DIR = Path(__file__).parent.parent / "prompts" class GitNexusMode(str, Enum): """Evaluation modes for GitNexus integration.""" BASELINE = "baseline" # No GitNexus — pure mini-swe-agent NATIVE = "native" # GitNexus tools via eval-server NATIVE_AUGMENT = "native_augment" # Native tools + grep enrichment (recommended) class GitNexusAgentConfig(AgentConfig): """Extended config for GitNexus evaluation agent.""" gitnexus_mode: GitNexusMode = GitNexusMode.BASELINE augment_timeout: float = 5.0 augment_min_pattern_length: int = 3 track_gitnexus_usage: bool = True class GitNexusAgent(DefaultAgent): """ Agent that optionally enriches its capabilities with GitNexus code intelligence. In BASELINE mode, behaves identically to DefaultAgent. In NATIVE mode, GitNexus tools are available as bash commands via eval-server. In NATIVE_AUGMENT mode, GitNexus tools + automatic grep result enrichment. """ def __init__(self, model: Model, env: Environment, *, config_class: type = GitNexusAgentConfig, **kwargs): mode = kwargs.get("gitnexus_mode", GitNexusMode.BASELINE) if isinstance(mode, str): mode = GitNexusMode(mode) # Load system template system_file = PROMPTS_DIR / f"system_{mode.value}.jinja" if system_file.exists() and "system_template" not in kwargs: kwargs["system_template"] = system_file.read_text() # Load instance template instance_file = PROMPTS_DIR / f"instance_{mode.value}.jinja" if instance_file.exists() and "instance_template" not in kwargs: kwargs["instance_template"] = instance_file.read_text() super().__init__(model, env, config_class=config_class, **kwargs) self.gitnexus_mode = mode self.gitnexus_metrics = GitNexusMetrics() def execute_actions(self, message: dict) -> list[dict]: """Execute actions with optional GitNexus augmentation and tracking.""" if self.config.track_gitnexus_usage: self._track_tool_usage(message) outputs = [self.env.execute(action) for action in message.get("extra", {}).get("actions", [])] # Augment grep/find observations in NATIVE_AUGMENT mode if self.gitnexus_mode == GitNexusMode.NATIVE_AUGMENT: actions = message.get("extra", {}).get("actions", []) for i, (action, output) in enumerate(zip(actions, outputs)): augmented = self._maybe_augment(action, output) if augmented: outputs[i] = augmented return self.add_messages( *self.model.format_observation_messages(message, outputs, self.get_template_vars()) ) def _maybe_augment(self, action: dict, output: dict) -> dict | None: """ If the action is a search command (grep, find, rg, ag), augment the output with GitNexus knowledge graph context. """ command = action.get("command", "") if not command: return None pattern = self._extract_search_pattern(command) if not pattern or len(pattern) < self.config.augment_min_pattern_length: return None start = time.time() try: augment_result = self.env.execute({ "command": f'gitnexus-augment "{pattern}" 2>&1 || true', "timeout": self.config.augment_timeout, }) elapsed = time.time() - start self.gitnexus_metrics.augmentation_calls += 1 self.gitnexus_metrics.augmentation_time += elapsed augment_text = augment_result.get("output", "").strip() if augment_text and "[GitNexus]" in augment_text: original_output = output.get("output", "") output = dict(output) output["output"] = f"{original_output}\n\n{augment_text}" self.gitnexus_metrics.augmentation_hits += 1 return output except Exception as e: logger.debug(f"Augmentation failed for pattern '{pattern}': {e}") self.gitnexus_metrics.augmentation_errors += 1 return None @staticmethod def _extract_search_pattern(command: str) -> str | None: """Extract the search pattern from a grep/find/rg command.""" patterns = [ r'(?:grep|rg|ag)\s+(?:-[a-zA-Z]*\s+)*["\']([^"\']+)["\']', r'(?:grep|rg|ag)\s+(?:-[a-zA-Z]*\s+)*(\S+)', ] for pat in patterns: match = re.search(pat, command) if match: result = match.group(1) if result.startswith("/") or result.startswith("."): continue if result.startswith("-"): continue return result return None def _track_tool_usage(self, message: dict): """Track which GitNexus tools the agent uses.""" for action in message.get("extra", {}).get("actions", []): command = action.get("command", "") if "gitnexus-query" in command: self.gitnexus_metrics.tool_calls["query"] += 1 elif "gitnexus-context" in command: self.gitnexus_metrics.tool_calls["context"] += 1 elif "gitnexus-impact" in command: self.gitnexus_metrics.tool_calls["impact"] += 1 elif "gitnexus-cypher" in command: self.gitnexus_metrics.tool_calls["cypher"] += 1 elif "gitnexus-overview" in command: self.gitnexus_metrics.tool_calls["overview"] += 1 def serialize(self, *extra_dicts) -> dict: """Serialize with GitNexus-specific metrics.""" gitnexus_data = { "info": { "gitnexus": { "mode": self.gitnexus_mode.value, "metrics": self.gitnexus_metrics.to_dict(), }, }, } return super().serialize(gitnexus_data, *extra_dicts) class GitNexusMetrics: """Tracks GitNexus-specific metrics during evaluation.""" def __init__(self): self.tool_calls: dict[str, int] = { "query": 0, "context": 0, "impact": 0, "cypher": 0, "overview": 0, } self.augmentation_calls: int = 0 self.augmentation_hits: int = 0 self.augmentation_errors: int = 0 self.augmentation_time: float = 0.0 self.index_time: float = 0.0 @property def total_tool_calls(self) -> int: return sum(self.tool_calls.values()) def to_dict(self) -> dict: return { "tool_calls": dict(self.tool_calls), "total_tool_calls": self.total_tool_calls, "augmentation_calls": self.augmentation_calls, "augmentation_hits": self.augmentation_hits, "augmentation_errors": self.augmentation_errors, "augmentation_time_seconds": round(self.augmentation_time, 2), "index_time_seconds": round(self.index_time, 2), } ================================================ FILE: eval/analysis/__init__.py ================================================ ================================================ FILE: eval/analysis/analyze_results.py ================================================ #!/usr/bin/env python3 """ Results Analyzer for GitNexus SWE-bench Evaluation Reads evaluation results and generates comparative analysis: - Resolve rate by model x mode - Cost comparison (total, per-instance) - Token/API call efficiency - GitNexus tool usage patterns - Augmentation hit rates Usage: python -m analysis.analyze_results /path/to/results python -m analysis.analyze_results /path/to/results --format markdown python -m analysis.analyze_results /path/to/results --swebench-eval # run actual test verification """ import json import logging import os import subprocess import sys from pathlib import Path from typing import Any import typer from rich.console import Console from rich.table import Table logger = logging.getLogger("analyze_results") console = Console() app = typer.Typer(rich_markup_mode="rich", add_completion=False) def load_run_results(results_dir: Path) -> dict[str, dict]: """ Load all run results from the results directory. Returns: {run_id: {summary, preds, instances}} """ runs = {} for run_dir in sorted(results_dir.iterdir()): if not run_dir.is_dir(): continue run_id = run_dir.name run_data: dict[str, Any] = {"run_id": run_id, "dir": run_dir} # Load summary summary_path = run_dir / "summary.json" if summary_path.exists(): run_data["summary"] = json.loads(summary_path.read_text()) # Load predictions preds_path = run_dir / "preds.json" if preds_path.exists(): run_data["preds"] = json.loads(preds_path.read_text()) # Load individual trajectories for detailed metrics run_data["trajectories"] = {} for traj_dir in run_dir.iterdir(): if not traj_dir.is_dir(): continue for traj_file in traj_dir.glob("*.traj.json"): try: traj = json.loads(traj_file.read_text()) instance_id = traj.get("instance_id", traj_dir.name) run_data["trajectories"][instance_id] = traj except Exception: pass if run_data.get("preds") or run_data.get("summary"): runs[run_id] = run_data return runs def parse_run_id(run_id: str) -> tuple[str, str]: """Parse 'model_mode' into (model, mode).""" # Handle multi-word model names like 'minimax-2.5' # Modes are: baseline, mcp, augment, full known_modes = {"baseline", "mcp", "augment", "full"} parts = run_id.rsplit("_", 1) if len(parts) == 2 and parts[1] in known_modes: return parts[0], parts[1] return run_id, "unknown" def compute_metrics(run_data: dict) -> dict: """Compute evaluation metrics for a single run.""" preds = run_data.get("preds", {}) summary = run_data.get("summary", {}) trajectories = run_data.get("trajectories", {}) n_instances = len(preds) n_with_patch = sum(1 for p in preds.values() if p.get("model_patch", "").strip()) # Cost and API call metrics from trajectories costs = [] api_calls = [] gn_tool_calls = [] gn_augment_hits = [] gn_augment_calls = [] for instance_id, traj in trajectories.items(): info = traj.get("info", {}) model_stats = info.get("model_stats", {}) costs.append(model_stats.get("instance_cost", 0)) api_calls.append(model_stats.get("api_calls", 0)) gn = info.get("gitnexus", {}).get("metrics", {}) if gn: gn_tool_calls.append(gn.get("total_tool_calls", 0)) gn_augment_hits.append(gn.get("augmentation_hits", 0)) gn_augment_calls.append(gn.get("augmentation_calls", 0)) # Also try summary-level metrics if not costs and summary: results = summary.get("results", []) for r in results: costs.append(r.get("cost", 0)) api_calls.append(r.get("n_calls", 0)) gn = r.get("gitnexus_metrics", {}) if gn: gn_tool_calls.append(gn.get("total_tool_calls", 0)) gn_augment_hits.append(gn.get("augmentation_hits", 0)) gn_augment_calls.append(gn.get("augmentation_calls", 0)) total_cost = sum(costs) total_calls = sum(api_calls) return { "n_instances": n_instances, "n_with_patch": n_with_patch, "patch_rate": n_with_patch / max(n_instances, 1), "total_cost": total_cost, "avg_cost": total_cost / max(n_instances, 1), "total_api_calls": total_calls, "avg_api_calls": total_calls / max(n_instances, 1), "total_gn_tool_calls": sum(gn_tool_calls), "avg_gn_tool_calls": sum(gn_tool_calls) / max(len(gn_tool_calls), 1) if gn_tool_calls else 0, "total_augment_hits": sum(gn_augment_hits), "total_augment_calls": sum(gn_augment_calls), "augment_hit_rate": sum(gn_augment_hits) / max(sum(gn_augment_calls), 1) if gn_augment_calls else 0, } def run_swebench_evaluation(results_dir: Path, run_id: str, subset: str = "lite") -> dict | None: """ Run the official SWE-bench evaluation on predictions. Requires: pip install swebench """ preds_path = results_dir / run_id / "preds.json" if not preds_path.exists(): return None dataset_mapping = { "lite": "princeton-nlp/SWE-Bench_Lite", "verified": "princeton-nlp/SWE-Bench_Verified", "full": "princeton-nlp/SWE-Bench", } try: eval_output = results_dir / run_id / "swebench_eval" cmd = [ sys.executable, "-m", "swebench.harness.run_evaluation", "--dataset_name", dataset_mapping.get(subset, subset), "--predictions_path", str(preds_path), "--max_workers", "4", "--run_id", run_id, "--output_dir", str(eval_output), ] logger.info(f"Running SWE-bench evaluation for {run_id}...") result = subprocess.run(cmd, capture_output=True, text=True, timeout=600) if result.returncode == 0: # Parse evaluation results report_path = eval_output / run_id / "results.json" if report_path.exists(): return json.loads(report_path.read_text()) logger.error(f"SWE-bench eval failed: {result.stderr[:500]}") return None except Exception as e: logger.error(f"SWE-bench eval error: {e}") return None # ─── CLI Commands ─────────────────────────────────────────────────────────── @app.command() def summary( results_dir: str = typer.Argument(..., help="Path to results directory"), format: str = typer.Option("table", "--format", help="Output format: table, markdown, json, csv"), swebench_eval: bool = typer.Option(False, "--swebench-eval", help="Run official SWE-bench test evaluation"), subset: str = typer.Option("lite", "--subset", help="SWE-bench subset (for --swebench-eval)"), ): """Generate comparative analysis of evaluation results.""" results_path = Path(results_dir) if not results_path.exists(): console.print(f"[red]Results directory not found: {results_path}[/red]") raise typer.Exit(1) runs = load_run_results(results_path) if not runs: console.print("[yellow]No evaluation results found[/yellow]") raise typer.Exit(0) console.print(f"\n[bold]Found {len(runs)} evaluation runs[/bold]\n") # Compute metrics per run all_metrics = {} for run_id, run_data in runs.items(): model, mode = parse_run_id(run_id) metrics = compute_metrics(run_data) metrics["model"] = model metrics["mode"] = mode # Optionally run SWE-bench evaluation if swebench_eval: eval_result = run_swebench_evaluation(results_path, run_id, subset) if eval_result: metrics["resolved"] = eval_result.get("resolved", 0) metrics["resolve_rate"] = eval_result.get("resolved", 0) / max(metrics["n_instances"], 1) all_metrics[run_id] = metrics if format == "table": _print_table(all_metrics) elif format == "markdown": _print_markdown(all_metrics) elif format == "json": console.print(json.dumps(all_metrics, indent=2)) elif format == "csv": _print_csv(all_metrics) @app.command() def compare_modes( results_dir: str = typer.Argument(..., help="Path to results directory"), model: str = typer.Option(..., "-m", "--model", help="Model to compare across modes"), ): """Compare modes for a specific model (baseline vs mcp vs augment vs full).""" results_path = Path(results_dir) runs = load_run_results(results_path) # Filter to the specified model model_runs = { run_id: data for run_id, data in runs.items() if parse_run_id(run_id)[0] == model } if not model_runs: console.print(f"[yellow]No results found for model: {model}[/yellow]") raise typer.Exit(1) console.print(f"\n[bold]Mode comparison for {model}[/bold]\n") metrics = {} for run_id, run_data in model_runs.items(): _, mode = parse_run_id(run_id) metrics[mode] = compute_metrics(run_data) # Print comparison table table = Table(title=f"Mode Comparison: {model}") table.add_column("Metric", style="bold") for mode in ["baseline", "mcp", "augment", "full"]: if mode in metrics: table.add_column(mode, justify="right") rows = [ ("Instances", "n_instances", "d"), ("With Patch", "n_with_patch", "d"), ("Patch Rate", "patch_rate", ".1%"), ("Total Cost", "total_cost", "$.4f"), ("Avg Cost", "avg_cost", "$.4f"), ("Total API Calls", "total_api_calls", "d"), ("Avg API Calls", "avg_api_calls", ".1f"), ("GN Tool Calls", "total_gn_tool_calls", "d"), ("Augment Hits", "total_augment_hits", "d"), ("Augment Hit Rate", "augment_hit_rate", ".1%"), ] for label, key, fmt in rows: values = [] for mode in ["baseline", "mcp", "augment", "full"]: if mode in metrics: v = metrics[mode].get(key, 0) if fmt == ".1%": values.append(f"{v:.1%}") elif fmt == "$.4f": values.append(f"${v:.4f}") elif fmt == ".1f": values.append(f"{v:.1f}") else: values.append(str(v)) table.add_row(label, *values) # Add delta rows (improvement over baseline) if "baseline" in metrics: baseline_cost = metrics["baseline"]["avg_cost"] baseline_calls = metrics["baseline"]["avg_api_calls"] table.add_section() for mode in ["mcp", "augment", "full"]: if mode not in metrics: continue mode_cost = metrics[mode]["avg_cost"] mode_calls = metrics[mode]["avg_api_calls"] cost_delta = ((mode_cost - baseline_cost) / max(baseline_cost, 0.001)) * 100 calls_delta = ((mode_calls - baseline_calls) / max(baseline_calls, 1)) * 100 cost_str = f"{cost_delta:+.1f}%" calls_str = f"{calls_delta:+.1f}%" # Color-code: negative is good (cheaper/fewer calls) cost_color = "green" if cost_delta < 0 else "red" calls_color = "green" if calls_delta < 0 else "red" console.print(f" {mode} vs baseline: cost [{cost_color}]{cost_str}[/{cost_color}], calls [{calls_color}]{calls_str}[/{calls_color}]") console.print(table) @app.command() def gitnexus_usage( results_dir: str = typer.Argument(..., help="Path to results directory"), ): """Analyze GitNexus tool usage patterns across all runs.""" results_path = Path(results_dir) runs = load_run_results(results_path) console.print("\n[bold]GitNexus Tool Usage Analysis[/bold]\n") table = Table(title="Tool Usage by Run") table.add_column("Run", style="bold") table.add_column("query", justify="right") table.add_column("context", justify="right") table.add_column("impact", justify="right") table.add_column("cypher", justify="right") table.add_column("Total", justify="right") table.add_column("Augment Hits", justify="right") for run_id, run_data in sorted(runs.items()): _, mode = parse_run_id(run_id) if mode == "baseline": continue # Aggregate tool calls across trajectories tool_totals: dict[str, int] = {"query": 0, "context": 0, "impact": 0, "cypher": 0, "overview": 0} augment_hits = 0 for traj in run_data.get("trajectories", {}).values(): gn = traj.get("info", {}).get("gitnexus", {}).get("metrics", {}) for tool, count in gn.get("tool_calls", {}).items(): tool_totals[tool] = tool_totals.get(tool, 0) + count augment_hits += gn.get("augmentation_hits", 0) # Also check summary for r in run_data.get("summary", {}).get("results", []): gn = r.get("gitnexus_metrics", {}) for tool, count in gn.get("tool_calls", {}).items(): tool_totals[tool] = tool_totals.get(tool, 0) + count augment_hits += gn.get("augmentation_hits", 0) total = sum(tool_totals.values()) if total > 0 or augment_hits > 0: table.add_row( run_id, str(tool_totals.get("query", 0)), str(tool_totals.get("context", 0)), str(tool_totals.get("impact", 0)), str(tool_totals.get("cypher", 0)), str(total), str(augment_hits), ) console.print(table) # ─── Output Formatters ───────────────────────────────────────────────────── def _print_table(all_metrics: dict): """Print rich table summary.""" table = Table(title="Evaluation Results") table.add_column("Run", style="bold") table.add_column("Model") table.add_column("Mode") table.add_column("N", justify="right") table.add_column("Patched", justify="right") table.add_column("Rate", justify="right") table.add_column("Cost", justify="right") table.add_column("Calls", justify="right") table.add_column("GN Tools", justify="right") for run_id, m in sorted(all_metrics.items()): resolved_str = "" if "resolve_rate" in m: resolved_str = f" ({m['resolve_rate']:.0%})" table.add_row( run_id, m["model"], m["mode"], str(m["n_instances"]), str(m["n_with_patch"]), f"{m['patch_rate']:.0%}{resolved_str}", f"${m['total_cost']:.2f}", str(m["total_api_calls"]), str(m["total_gn_tool_calls"]) if m["total_gn_tool_calls"] > 0 else "-", ) console.print(table) def _print_markdown(all_metrics: dict): """Print markdown table.""" print("| Run | Model | Mode | N | Patched | Rate | Cost | Calls | GN Tools |") print("|-----|-------|------|---|---------|------|------|-------|----------|") for run_id, m in sorted(all_metrics.items()): gn = str(m["total_gn_tool_calls"]) if m["total_gn_tool_calls"] > 0 else "-" print(f"| {run_id} | {m['model']} | {m['mode']} | {m['n_instances']} | {m['n_with_patch']} | {m['patch_rate']:.0%} | ${m['total_cost']:.2f} | {m['total_api_calls']} | {gn} |") def _print_csv(all_metrics: dict): """Print CSV output.""" print("run_id,model,mode,n_instances,n_with_patch,patch_rate,total_cost,avg_cost,total_api_calls,avg_api_calls,total_gn_tool_calls,total_augment_hits,augment_hit_rate") for run_id, m in sorted(all_metrics.items()): print( f"{run_id},{m['model']},{m['mode']},{m['n_instances']},{m['n_with_patch']}," f"{m['patch_rate']:.4f},{m['total_cost']:.4f},{m['avg_cost']:.4f}," f"{m['total_api_calls']},{m['avg_api_calls']:.1f},{m['total_gn_tool_calls']}," f"{m['total_augment_hits']},{m['augment_hit_rate']:.4f}" ) if __name__ == "__main__": logging.basicConfig(level=logging.INFO) app() ================================================ FILE: eval/bridge/__init__.py ================================================ ================================================ FILE: eval/bridge/gitnexus_tools.sh ================================================ #!/bin/bash # GitNexus CLI tool wrappers for SWE-bench evaluation # # These functions call the GitNexus eval-server (HTTP daemon) for near-instant # tool responses. The eval-server keeps KuzuDB warm in memory. # # If the eval-server is not running, falls back to direct CLI commands. # # Usage: # gitnexus-query "how does authentication work" # gitnexus-context "validateUser" # gitnexus-impact "AuthService" upstream # gitnexus-cypher "MATCH (n:Function) RETURN n.name LIMIT 10" # gitnexus-overview GITNEXUS_EVAL_PORT="${GITNEXUS_EVAL_PORT:-4848}" GITNEXUS_EVAL_URL="http://127.0.0.1:${GITNEXUS_EVAL_PORT}" _gitnexus_call() { local tool="$1" shift local json_body="$1" # Try eval-server first (fastest path — KuzuDB stays warm) local result result=$(curl -sf -X POST "${GITNEXUS_EVAL_URL}/tool/${tool}" \ -H "Content-Type: application/json" \ -d "${json_body}" 2>/dev/null) if [ $? -eq 0 ] && [ -n "$result" ]; then echo "$result" return 0 fi # Fallback: direct CLI (cold start, slower but always works) case "$tool" in query) local q=$(echo "$json_body" | python3 -c "import sys,json; print(json.load(sys.stdin).get('query',''))" 2>/dev/null) npx gitnexus query "$q" 2>&1 ;; context) local n=$(echo "$json_body" | python3 -c "import sys,json; print(json.load(sys.stdin).get('name',''))" 2>/dev/null) npx gitnexus context "$n" 2>&1 ;; impact) local t=$(echo "$json_body" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('target',''))" 2>/dev/null) local d=$(echo "$json_body" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('direction','upstream'))" 2>/dev/null) npx gitnexus impact "$t" --direction "$d" 2>&1 ;; cypher) local cq=$(echo "$json_body" | python3 -c "import sys,json; print(json.load(sys.stdin).get('query',''))" 2>/dev/null) npx gitnexus cypher "$cq" 2>&1 ;; *) echo "Unknown tool: $tool" >&2 return 1 ;; esac } gitnexus-query() { local query="$1" local task_context="${2:-}" local goal="${3:-}" if [ -z "$query" ]; then echo "Usage: gitnexus-query <query> [task_context] [goal]" echo "Search the code knowledge graph for execution flows related to a concept." echo "" echo "Examples:" echo ' gitnexus-query "authentication flow"' echo ' gitnexus-query "database connection" "fixing connection pool leak"' return 1 fi local args="{\"query\": \"$query\"" [ -n "$task_context" ] && args="$args, \"task_context\": \"$task_context\"" [ -n "$goal" ] && args="$args, \"goal\": \"$goal\"" args="$args}" _gitnexus_call query "$args" } gitnexus-context() { local name="$1" local file_path="${2:-}" if [ -z "$name" ]; then echo "Usage: gitnexus-context <symbol_name> [file_path]" echo "Get a 360-degree view of a code symbol: callers, callees, processes, file location." echo "" echo "Examples:" echo ' gitnexus-context "validateUser"' echo ' gitnexus-context "AuthService" "src/auth/service.py"' return 1 fi local args="{\"name\": \"$name\"" [ -n "$file_path" ] && args="$args, \"file_path\": \"$file_path\"" args="$args}" _gitnexus_call context "$args" } gitnexus-impact() { local target="$1" local direction="${2:-upstream}" if [ -z "$target" ]; then echo "Usage: gitnexus-impact <symbol_name> [upstream|downstream]" echo "Analyze the blast radius of changing a code symbol." echo "" echo " upstream = what depends on this (what breaks if you change it)" echo " downstream = what this depends on (what it uses)" echo "" echo "Examples:" echo ' gitnexus-impact "AuthService" upstream' echo ' gitnexus-impact "validateUser" downstream' return 1 fi _gitnexus_call impact "{\"target\": \"$target\", \"direction\": \"$direction\"}" } gitnexus-cypher() { local query="$1" if [ -z "$query" ]; then echo "Usage: gitnexus-cypher <cypher_query>" echo "Execute a raw Cypher query against the code knowledge graph." echo "" echo "Schema: Nodes: File, Function, Class, Method, Interface, Community, Process" echo "Edges via CodeRelation.type: CALLS, IMPORTS, EXTENDS, IMPLEMENTS, DEFINES, MEMBER_OF, STEP_IN_PROCESS" echo "" echo "Examples:" echo " gitnexus-cypher 'MATCH (a)-[:CodeRelation {type: \"CALLS\"}]->(b:Function {name: \"save\"}) RETURN a.name, a.filePath'" echo " gitnexus-cypher 'MATCH (n:Class) RETURN n.name, n.filePath LIMIT 20'" return 1 fi _gitnexus_call cypher "{\"query\": \"$query\"}" } gitnexus-overview() { echo "=== Code Knowledge Graph Overview ===" _gitnexus_call list_repos '{}' } # Export functions so they're available in subshells export -f _gitnexus_call 2>/dev/null export -f gitnexus-query 2>/dev/null export -f gitnexus-context 2>/dev/null export -f gitnexus-impact 2>/dev/null export -f gitnexus-cypher 2>/dev/null export -f gitnexus-overview 2>/dev/null ================================================ FILE: eval/bridge/mcp_bridge.py ================================================ """ MCP Bridge for GitNexus Starts the GitNexus MCP server as a subprocess and provides a Python interface to call MCP tools. Used by the bash wrapper scripts and the augmentation layer.. The bridge communicates with the MCP server via stdio using the JSON-RPC protocol. """ import json import logging import os import subprocess import sys import threading import time from pathlib import Path from typing import Any logger = logging.getLogger("mcp_bridge") class MCPBridge: """ Manages a GitNexus MCP server subprocess and proxies tool calls to it. Usage: bridge = MCPBridge(repo_path="/path/to/repo") bridge.start() result = bridge.call_tool("query", {"query": "authentication"}) bridge.stop() """ def __init__(self, repo_path: str | None = None): self.repo_path = repo_path or os.getcwd() self.process: subprocess.Popen | None = None self._request_id = 0 self._lock = threading.Lock() self._started = False def start(self) -> bool: """Start the GitNexus MCP server subprocess.""" if self._started: return True try: # Find gitnexus binary gitnexus_bin = self._find_gitnexus() if not gitnexus_bin: logger.error("GitNexus not found. Install with: npm install -g gitnexus") return False self.process = subprocess.Popen( [gitnexus_bin, "mcp"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=self.repo_path, text=False, ) # Send initialize request init_result = self._send_request("initialize", { "protocolVersion": "2024-11-05", "capabilities": {}, "clientInfo": {"name": "gitnexus-eval", "version": "0.1.0"}, }) if init_result is None: logger.error("MCP server failed to initialize") self.stop() return False # Send initialized notification self._send_notification("notifications/initialized", {}) self._started = True logger.info("MCP bridge started successfully") return True except Exception as e: logger.error(f"Failed to start MCP bridge: {e}") self.stop() return False def stop(self): """Stop the MCP server subprocess.""" if self.process: try: self.process.stdin.close() self.process.terminate() self.process.wait(timeout=5) except Exception: try: self.process.kill() except Exception: pass self.process = None self._started = False def call_tool(self, tool_name: str, arguments: dict[str, Any] | None = None) -> dict[str, Any] | None: """ Call a GitNexus MCP tool and return the result. Returns the tool result content or None on error. """ if not self._started: logger.error("MCP bridge not started") return None result = self._send_request("tools/call", { "name": tool_name, "arguments": arguments or {}, }) if result is None: return None # Extract text content from MCP response content = result.get("content", []) if content and isinstance(content, list): texts = [item.get("text", "") for item in content if item.get("type") == "text"] return {"text": "\n".join(texts), "raw": content} return {"text": "", "raw": content} def list_tools(self) -> list[dict]: """List available MCP tools.""" result = self._send_request("tools/list", {}) if result: return result.get("tools", []) return [] def read_resource(self, uri: str) -> str | None: """Read an MCP resource by URI.""" result = self._send_request("resources/read", {"uri": uri}) if result: contents = result.get("contents", []) if contents: return contents[0].get("text", "") return None def _find_gitnexus(self) -> str | None: """Find the gitnexus CLI binary.""" # Check if npx is available (preferred - uses local install) for cmd in ["npx"]: try: result = subprocess.run( [cmd, "gitnexus", "--version"], capture_output=True, text=True, timeout=15, cwd=self.repo_path, ) if result.returncode == 0: return cmd # Will use "npx gitnexus mcp" except Exception: continue # Check for global install try: result = subprocess.run( ["gitnexus", "--version"], capture_output=True, text=True, timeout=10, ) if result.returncode == 0: return "gitnexus" except Exception: pass return None def _next_id(self) -> int: with self._lock: self._request_id += 1 return self._request_id def _send_request(self, method: str, params: dict) -> dict | None: """Send a JSON-RPC request and wait for response.""" if not self.process or not self.process.stdin or not self.process.stdout: return None request_id = self._next_id() request = { "jsonrpc": "2.0", "id": request_id, "method": method, "params": params, } try: message = json.dumps(request) # MCP uses Content-Length header framing header = f"Content-Length: {len(message.encode('utf-8'))}\r\n\r\n" self.process.stdin.write(header.encode("utf-8")) self.process.stdin.write(message.encode("utf-8")) self.process.stdin.flush() # Read response response = self._read_response(timeout=30) if response and response.get("id") == request_id: if "error" in response: logger.error(f"MCP error: {response['error']}") return None return response.get("result") return None except Exception as e: logger.error(f"MCP request failed: {e}") return None def _send_notification(self, method: str, params: dict): """Send a JSON-RPC notification (no response expected).""" if not self.process or not self.process.stdin: return notification = { "jsonrpc": "2.0", "method": method, "params": params, } try: message = json.dumps(notification) header = f"Content-Length: {len(message.encode('utf-8'))}\r\n\r\n" self.process.stdin.write(header.encode("utf-8")) self.process.stdin.write(message.encode("utf-8")) self.process.stdin.flush() except Exception as e: logger.error(f"MCP notification failed: {e}") def _read_response(self, timeout: float = 30) -> dict | None: """Read a JSON-RPC response from the MCP server.""" if not self.process or not self.process.stdout: return None start = time.time() try: while time.time() - start < timeout: # Read Content-Length header header_line = b"" while True: byte = self.process.stdout.read(1) if not byte: return None header_line += byte if header_line.endswith(b"\r\n\r\n"): break if header_line.endswith(b"\n\n"): break # Parse content length header_str = header_line.decode("utf-8").strip() content_length = None for line in header_str.split("\r\n"): if line.lower().startswith("content-length:"): content_length = int(line.split(":")[1].strip()) break if content_length is None: continue # Read body body = self.process.stdout.read(content_length) if not body: return None message = json.loads(body.decode("utf-8")) # Skip notifications (no id), return responses if "id" in message: return message return None except Exception as e: logger.error(f"Error reading MCP response: {e}") return None class MCPToolCLI: """ CLI wrapper that exposes MCP tools as simple command-line calls. Used by the bash wrapper scripts inside Docker containers. Usage from bash: python -m bridge.mcp_bridge query '{"query": "authentication"}' python -m bridge.mcp_bridge context '{"name": "validateUser"}' """ def __init__(self): self.bridge = MCPBridge() def run(self, tool_name: str, args_json: str = "{}") -> int: """Run a single tool call and print the result.""" try: args = json.loads(args_json) except json.JSONDecodeError: # Try to parse as simple key=value pairs args = self._parse_simple_args(args_json) if not self.bridge.start(): print("ERROR: Failed to start GitNexus MCP bridge", file=sys.stderr) return 1 try: result = self.bridge.call_tool(tool_name, args) if result: print(result.get("text", "")) return 0 else: print("No results", file=sys.stderr) return 1 finally: self.bridge.stop() @staticmethod def _parse_simple_args(args_str: str) -> dict: """Parse 'key=value key2=value2' style arguments.""" args = {} for part in args_str.split(): if "=" in part: key, value = part.split("=", 1) args[key] = value return args if __name__ == "__main__": if len(sys.argv) < 2: print("Usage: python -m bridge.mcp_bridge <tool_name> [args_json]", file=sys.stderr) print("Tools: query, context, impact, cypher, list_repos, detect_changes, rename", file=sys.stderr) sys.exit(1) tool = sys.argv[1] args_json = sys.argv[2] if len(sys.argv) > 2 else "{}" cli = MCPToolCLI() sys.exit(cli.run(tool, args_json)) ================================================ FILE: eval/configs/models/claude-haiku.yaml ================================================ # Claude Haiku 4.5 — fast, cheap, good baseline # Via OpenRouter (set OPENROUTER_API_KEY in .env) model: model_name: "openrouter/anthropic/claude-haiku-4.5" cost_tracking: "ignore_errors" model_kwargs: max_tokens: 8192 temperature: 0 ================================================ FILE: eval/configs/models/claude-opus.yaml ================================================ # Claude Opus 4 — most capable, highest cost # Via OpenRouter (set OPENROUTER_API_KEY in .env) # To use Anthropic directly, change to: anthropic/claude-opus-4-20250514 model: model_name: "openrouter/anthropic/claude-opus-4" cost_tracking: "ignore_errors" model_kwargs: max_tokens: 16384 temperature: 0 ================================================ FILE: eval/configs/models/claude-sonnet.yaml ================================================ # Claude Sonnet 4 — strong all-around model # Via OpenRouter (set OPENROUTER_API_KEY in .env) # To use Anthropic directly, change to: anthropic/claude-sonnet-4-20250514 model: model_name: "openrouter/anthropic/claude-sonnet-4" cost_tracking: "ignore_errors" model_kwargs: max_tokens: 16384 temperature: 0 ================================================ FILE: eval/configs/models/deepseek-chat.yaml ================================================ model: deepseek-ai/deepseek-chat provider: openrouter cost: input: 0.14 # per 1M tokens output: 0.28 # per 1M tokens # Native DeepSeek API (direct) api_key: null base_url: null # For OpenRouter, uncomment below and comment out direct config above # api_key: \${OPENROUTER_API_KEY} # base_url: https://openrouter.ai/api/v1 ================================================ FILE: eval/configs/models/deepseek-v3.yaml ================================================ model: deepseek-ai/DeepSeek-V3 provider: openrouter cost: input: 0.27 # per 1M tokens output: 1.10 # per 1M tokens # Native DeepSeek API (direct) # Get your API key at: https://platform.deepseek.com/ # Or use OpenRouter with: OPENROUTER_API_KEY api_key: null base_url: null # For OpenRouter, uncomment below and comment out direct config above # api_key: \${OPENROUTER_API_KEY} # base_url: https://openrouter.ai/api/v1 ================================================ FILE: eval/configs/models/glm-4.7.yaml ================================================ # GLM 4.7 — via OpenRouter (set OPENROUTER_API_KEY in .env) model: model_name: "openrouter/zhipuai/glm-4.7" cost_tracking: "ignore_errors" model_kwargs: max_tokens: 8192 temperature: 0 ================================================ FILE: eval/configs/models/glm-5.yaml ================================================ # GLM 5 — via OpenRouter (set OPENROUTER_API_KEY in .env) model: model_name: "openrouter/zhipuai/glm-5" cost_tracking: "ignore_errors" model_kwargs: max_tokens: 8192 temperature: 0 ================================================ FILE: eval/configs/models/minimax-2.5.yaml ================================================ # MiniMax M1 2.5 — via OpenRouter (set OPENROUTER_API_KEY in .env) model: model_name: "openrouter/minimax/minimax-m1-2.5" cost_tracking: "ignore_errors" model_kwargs: max_tokens: 8192 temperature: 0 ================================================ FILE: eval/configs/models/minimax-m2.1.yaml ================================================ # MiniMax M2.5 — via OpenRouter (set OPENROUTER_API_KEY in .env) # Uses text-based model class because MiniMax doesn't support tool_calls natively. # The action_regex tells mini-swe-agent to parse ```bash blocks from responses. model: model_class: litellm_textbased model_name: "openrouter/minimax/minimax-m2.5" action_regex: "```(?:bash|mswea_bash_command)\\s*\\n(.*?)\\n```" cost_tracking: "ignore_errors" model_kwargs: max_tokens: 8192 temperature: 0 ================================================ FILE: eval/configs/modes/baseline.yaml ================================================ # Baseline mode — no GitNexus, pure mini-swe-agent (control group) agent: agent_class: "eval.agents.gitnexus_agent.GitNexusAgent" gitnexus_mode: "baseline" step_limit: 30 cost_limit: 3.0 environment: environment_class: "docker" ================================================ FILE: eval/configs/modes/native.yaml ================================================ # Native mode — GitNexus tools only, no grep enrichment # # Explicit tools: gitnexus-query, gitnexus-context, gitnexus-impact, gitnexus-cypher # Available as fast bash commands (~100ms via eval-server) # # Use this mode to isolate the value of explicit tools without grep augmentation. agent: agent_class: "eval.agents.gitnexus_agent.GitNexusAgent" gitnexus_mode: "native" step_limit: 30 cost_limit: 3.0 track_gitnexus_usage: true environment: environment_class: "eval.environments.gitnexus_docker.GitNexusDockerEnvironment" enable_gitnexus: true skip_embeddings: true gitnexus_timeout: 120 eval_server_port: 4848 ================================================ FILE: eval/configs/modes/native_augment.yaml ================================================ # Native + Augment mode — the primary evaluation mode # # Combines two capabilities (mirroring the Claude Code model): # 1. Explicit GitNexus tools: gitnexus-query, gitnexus-context, gitnexus-impact, gitnexus-cypher # Available as fast bash commands (~100ms via eval-server) # 2. Automatic grep enrichment: grep/rg results are transparently augmented with # [GitNexus] annotations showing callers, callees, and execution flows # # The agent decides when to use explicit tools vs rely on enriched grep results. agent: agent_class: "eval.agents.gitnexus_agent.GitNexusAgent" gitnexus_mode: "native_augment" step_limit: 30 cost_limit: 3.0 augment_timeout: 5.0 augment_min_pattern_length: 3 track_gitnexus_usage: true environment: environment_class: "eval.environments.gitnexus_docker.GitNexusDockerEnvironment" enable_gitnexus: true skip_embeddings: true gitnexus_timeout: 120 eval_server_port: 4848 ================================================ FILE: eval/environments/__init__.py ================================================ ================================================ FILE: eval/environments/gitnexus_docker.py ================================================ """ GitNexus Docker Environment for SWE-bench Evaluation Extends mini-swe-agent's Docker environment to: 1. Install GitNexus (Node.js + npm + gitnexus package) 2. Run `gitnexus analyze` on the repository 3. Start the eval-server daemon (persistent HTTP server with warm KuzuDB) 4. Install standalone tool scripts in /usr/local/bin/ (works with subprocess.run) 5. Cache indexes per (repo, base_commit) to avoid re-indexing IMPORTANT: mini-swe-agent runs every command with subprocess.run in a fresh subshell. This means .bashrc is NOT sourced, exported functions are NOT available, and env vars don't persist. The tool scripts must be standalone executables in $PATH. Architecture: Agent bash cmd → /usr/local/bin/gitnexus-query → curl localhost:4848/tool/query → eval-server → KuzuDB Fallback: → npx gitnexus query (cold start, slower) Tool call latency: ~50-100ms via eval-server, ~5-10s via CLI fallback. """ import hashlib import json import logging import shutil import time from pathlib import Path from minisweagent.environments.docker import DockerEnvironment logger = logging.getLogger("gitnexus_docker") DEFAULT_CACHE_DIR = Path.home() / ".gitnexus-eval-cache" EVAL_SERVER_PORT = 4848 # Standalone tool scripts installed into /usr/local/bin/ inside the container. # Each script calls the eval-server via curl, with a CLI fallback. # These are standalone — no sourcing, no env inheritance needed. TOOL_SCRIPT_QUERY = r'''#!/bin/bash PORT="${GITNEXUS_EVAL_PORT:-__PORT__}" query="$1"; task_ctx="${2:-}"; goal="${3:-}" [ -z "$query" ] && echo "Usage: gitnexus-query <query> [task_context] [goal]" && exit 1 args="{\"query\": \"$query\"" [ -n "$task_ctx" ] && args="$args, \"task_context\": \"$task_ctx\"" [ -n "$goal" ] && args="$args, \"goal\": \"$goal\"" args="$args}" result=$(curl -sf -X POST "http://127.0.0.1:${PORT}/tool/query" -H "Content-Type: application/json" -d "$args" 2>/dev/null) if [ $? -eq 0 ] && [ -n "$result" ]; then echo "$result"; exit 0; fi cd /testbed && npx gitnexus query "$query" 2>&1 ''' TOOL_SCRIPT_CONTEXT = r'''#!/bin/bash PORT="${GITNEXUS_EVAL_PORT:-__PORT__}" name="$1"; file_path="${2:-}" [ -z "$name" ] && echo "Usage: gitnexus-context <symbol_name> [file_path]" && exit 1 args="{\"name\": \"$name\"" [ -n "$file_path" ] && args="$args, \"file_path\": \"$file_path\"" args="$args}" result=$(curl -sf -X POST "http://127.0.0.1:${PORT}/tool/context" -H "Content-Type: application/json" -d "$args" 2>/dev/null) if [ $? -eq 0 ] && [ -n "$result" ]; then echo "$result"; exit 0; fi cd /testbed && npx gitnexus context "$name" 2>&1 ''' TOOL_SCRIPT_IMPACT = r'''#!/bin/bash PORT="${GITNEXUS_EVAL_PORT:-__PORT__}" target="$1"; direction="${2:-upstream}" [ -z "$target" ] && echo "Usage: gitnexus-impact <symbol_name> [upstream|downstream]" && exit 1 result=$(curl -sf -X POST "http://127.0.0.1:${PORT}/tool/impact" -H "Content-Type: application/json" -d "{\"target\": \"$target\", \"direction\": \"$direction\"}" 2>/dev/null) if [ $? -eq 0 ] && [ -n "$result" ]; then echo "$result"; exit 0; fi cd /testbed && npx gitnexus impact "$target" --direction "$direction" 2>&1 ''' TOOL_SCRIPT_CYPHER = r'''#!/bin/bash PORT="${GITNEXUS_EVAL_PORT:-__PORT__}" query="$1" [ -z "$query" ] && echo "Usage: gitnexus-cypher <cypher_query>" && exit 1 result=$(curl -sf -X POST "http://127.0.0.1:${PORT}/tool/cypher" -H "Content-Type: application/json" -d "{\"query\": \"$query\"}" 2>/dev/null) if [ $? -eq 0 ] && [ -n "$result" ]; then echo "$result"; exit 0; fi cd /testbed && npx gitnexus cypher "$query" 2>&1 ''' TOOL_SCRIPT_AUGMENT = r'''#!/bin/bash cd /testbed && npx gitnexus augment "$1" 2>&1 || true ''' TOOL_SCRIPT_OVERVIEW = r'''#!/bin/bash PORT="${GITNEXUS_EVAL_PORT:-__PORT__}" echo "=== Code Knowledge Graph Overview ===" result=$(curl -sf -X POST "http://127.0.0.1:${PORT}/tool/list_repos" -H "Content-Type: application/json" -d "{}" 2>/dev/null) if [ $? -eq 0 ] && [ -n "$result" ]; then echo "$result"; exit 0; fi cd /testbed && npx gitnexus list 2>&1 ''' class GitNexusDockerEnvironment(DockerEnvironment): """ Docker environment with GitNexus pre-installed, indexed, and eval-server running. Setup flow: 1. Start Docker container (base SWE-bench image) 2. Install Node.js + gitnexus inside the container 3. Run `gitnexus analyze` (or restore from cache) 4. Start `gitnexus eval-server` daemon (keeps KuzuDB warm) 5. Install standalone tool scripts in /usr/local/bin/ 6. Agent runs with near-instant GitNexus tool calls """ def __init__( self, *, enable_gitnexus: bool = True, cache_dir: str | Path | None = None, skip_embeddings: bool = True, gitnexus_timeout: int = 120, eval_server_port: int = EVAL_SERVER_PORT, **kwargs, ): super().__init__(**kwargs) self.enable_gitnexus = enable_gitnexus self.cache_dir = Path(cache_dir) if cache_dir else DEFAULT_CACHE_DIR self.skip_embeddings = skip_embeddings self.gitnexus_timeout = gitnexus_timeout self.eval_server_port = eval_server_port self.index_time: float = 0.0 self._gitnexus_ready = False def start(self) -> dict: """Start the container and set up GitNexus.""" result = super().start() if self.enable_gitnexus: try: self._setup_gitnexus() except Exception as e: logger.warning(f"GitNexus setup failed, continuing without it: {e}") self._gitnexus_ready = False return result def _setup_gitnexus(self): """Install and configure GitNexus in the container.""" start = time.time() self._ensure_nodejs() self._install_gitnexus() self._index_repository() self._start_eval_server() self._install_tools() self.index_time = time.time() - start self._gitnexus_ready = True logger.info(f"GitNexus setup completed in {self.index_time:.1f}s") def _ensure_nodejs(self): """Ensure Node.js >= 18 is available in the container.""" check = self.execute({"command": "node --version 2>/dev/null || echo 'NOT_FOUND'"}) output = check.get("output", "").strip() if "NOT_FOUND" in output: logger.info("Installing Node.js in container...") install_cmds = [ "apt-get update -qq", "apt-get install -y -qq curl ca-certificates", "curl -fsSL https://deb.nodesource.com/setup_20.x | bash -", "apt-get install -y -qq nodejs", ] for cmd in install_cmds: result = self.execute({"command": cmd, "timeout": 60}) if result.get("returncode", 1) != 0: raise RuntimeError(f"Failed to install Node.js: {result.get('output', '')}") else: logger.info(f"Node.js already available: {output}") def _install_gitnexus(self): """Install the gitnexus npm package globally.""" check = self.execute({"command": "npx gitnexus --version 2>/dev/null || echo 'NOT_FOUND'"}) if "NOT_FOUND" in check.get("output", ""): logger.info("Installing gitnexus...") result = self.execute({ "command": "npm install -g gitnexus", "timeout": 60, }) if result.get("returncode", 1) != 0: raise RuntimeError(f"Failed to install gitnexus: {result.get('output', '')}") def _index_repository(self): """Run gitnexus analyze on the repo, using cache if available.""" repo_info = self._get_repo_info() cache_key = self._make_cache_key(repo_info) cache_path = self.cache_dir / cache_key if cache_path.exists(): logger.info(f"Restoring GitNexus index from cache: {cache_key}") self._restore_cache(cache_path) return logger.info("Running gitnexus analyze...") skip_flag = "--skip-embeddings" if self.skip_embeddings else "" result = self.execute({ "command": f"cd /testbed && npx gitnexus analyze . {skip_flag} 2>&1", "timeout": self.gitnexus_timeout, }) if result.get("returncode", 1) != 0: output = result.get("output", "") if "error" in output.lower() and "indexed" not in output.lower(): raise RuntimeError(f"gitnexus analyze failed: {output[-500:]}") self._save_cache(cache_path, repo_info) def _start_eval_server(self): """Start the GitNexus eval-server daemon in the background.""" logger.info(f"Starting eval-server on port {self.eval_server_port}...") self.execute({ "command": ( f"nohup npx gitnexus eval-server --port {self.eval_server_port} " f"--idle-timeout 600 " f"> /tmp/gitnexus-eval-server.log 2>&1 &" ), "timeout": 5, }) # Wait for the server to be ready (up to 15s for KuzuDB init) for i in range(30): time.sleep(0.5) health = self.execute({ "command": f"curl -sf http://127.0.0.1:{self.eval_server_port}/health 2>/dev/null || echo 'NOT_READY'", "timeout": 3, }) output = health.get("output", "").strip() if "NOT_READY" not in output and "ok" in output: logger.info(f"Eval-server ready after {(i + 1) * 0.5:.1f}s") return log_output = self.execute({ "command": "cat /tmp/gitnexus-eval-server.log 2>/dev/null | tail -20", }) logger.warning( f"Eval-server didn't become ready in 15s. " f"Tools will fall back to direct CLI.\n" f"Server log: {log_output.get('output', 'N/A')}" ) def _install_tools(self): """ Install standalone GitNexus tool scripts in /usr/local/bin/. Each script is a self-contained bash script that: 1. Calls the eval-server via curl (fast path, ~100ms) 2. Falls back to direct CLI if eval-server is unavailable These are standalone executables — no sourcing, env inheritance, or .bashrc needed. This is critical because mini-swe-agent runs every command via subprocess.run in a fresh subshell. Uses heredocs with quoted delimiter to avoid all quoting/escaping issues. """ port = str(self.eval_server_port) tools = { "gitnexus-query": TOOL_SCRIPT_QUERY, "gitnexus-context": TOOL_SCRIPT_CONTEXT, "gitnexus-impact": TOOL_SCRIPT_IMPACT, "gitnexus-cypher": TOOL_SCRIPT_CYPHER, "gitnexus-augment": TOOL_SCRIPT_AUGMENT, "gitnexus-overview": TOOL_SCRIPT_OVERVIEW, } for name, script in tools.items(): script_content = script.replace("__PORT__", port).strip() # Use heredoc with quoted delimiter — prevents all variable expansion and quoting issues self.execute({ "command": f"cat << 'GITNEXUS_SCRIPT_EOF' > /usr/local/bin/{name}\n{script_content}\nGITNEXUS_SCRIPT_EOF\nchmod +x /usr/local/bin/{name}", "timeout": 5, }) logger.info(f"Installed {len(tools)} GitNexus tool scripts in /usr/local/bin/") def _get_repo_info(self) -> dict: """Get repository identity info from the container.""" repo_result = self.execute({ "command": "cd /testbed && basename $(git remote get-url origin 2>/dev/null || basename $(pwd)) .git" }) commit_result = self.execute({"command": "cd /testbed && git rev-parse HEAD 2>/dev/null || echo unknown"}) return { "repo": repo_result.get("output", "unknown").strip(), "commit": commit_result.get("output", "unknown").strip(), } @staticmethod def _make_cache_key(repo_info: dict) -> str: """Create a deterministic cache key from repo info.""" content = f"{repo_info['repo']}:{repo_info['commit']}" return hashlib.sha256(content.encode()).hexdigest()[:16] def _save_cache(self, cache_path: Path, repo_info: dict): """Save the GitNexus index to the host cache directory.""" try: cache_path.mkdir(parents=True, exist_ok=True) find_result = self.execute({ "command": "find /root/.gitnexus -name 'kuzu' -type d 2>/dev/null | head -1" }) gitnexus_dir = find_result.get("output", "").strip() if gitnexus_dir: parent = str(Path(gitnexus_dir).parent) self.execute({ "command": f"cd {parent} && tar czf /tmp/gitnexus-cache.tar.gz .", "timeout": 30, }) container_id = getattr(self, "_container_id", None) or getattr(self, "container_id", None) if container_id: import subprocess as sp sp.run( ["docker", "cp", f"{container_id}:/tmp/gitnexus-cache.tar.gz", str(cache_path / "index.tar.gz")], check=True, capture_output=True, ) (cache_path / "metadata.json").write_text(json.dumps(repo_info, indent=2)) logger.info(f"Cached GitNexus index: {cache_path}") except Exception as e: logger.warning(f"Failed to cache GitNexus index: {e}") if cache_path.exists(): shutil.rmtree(cache_path, ignore_errors=True) def _restore_cache(self, cache_path: Path): """Restore a cached GitNexus index into the container.""" try: cache_tarball = cache_path / "index.tar.gz" if not cache_tarball.exists(): logger.warning("Cache tarball not found, re-indexing") self._index_repository() return container_id = getattr(self, "_container_id", None) or getattr(self, "container_id", None) if container_id: import subprocess as sp self.execute({"command": "mkdir -p /root/.gitnexus"}) storage_result = self.execute({ "command": "npx gitnexus list 2>/dev/null | grep -o '/root/.gitnexus/[^ ]*' | head -1 || echo '/root/.gitnexus/repos/default'" }) storage_path = storage_result.get("output", "").strip() or "/root/.gitnexus/repos/default" self.execute({"command": f"mkdir -p {storage_path}"}) sp.run( ["docker", "cp", str(cache_tarball), f"{container_id}:/tmp/gitnexus-cache.tar.gz"], check=True, capture_output=True, ) self.execute({ "command": f"cd {storage_path} && tar xzf /tmp/gitnexus-cache.tar.gz", "timeout": 30, }) logger.info("GitNexus index restored from cache") except Exception as e: logger.warning(f"Failed to restore cache, re-indexing: {e}") self._index_repository() def stop(self) -> dict: """Stop the container, shutting down eval-server first.""" if self._gitnexus_ready: try: self.execute({ "command": f"curl -sf -X POST http://127.0.0.1:{self.eval_server_port}/shutdown 2>/dev/null || true", "timeout": 3, }) except Exception: pass return super().stop() def get_template_vars(self) -> dict: """Add GitNexus-specific template variables.""" base_vars = super().get_template_vars() base_vars["gitnexus_ready"] = self._gitnexus_ready base_vars["gitnexus_index_time"] = self.index_time return base_vars def serialize(self) -> dict: """Include GitNexus environment info in serialization.""" base = super().serialize() base.setdefault("info", {})["gitnexus_env"] = { "enabled": self.enable_gitnexus, "ready": self._gitnexus_ready, "index_time_seconds": round(self.index_time, 2), "skip_embeddings": self.skip_embeddings, "eval_server_port": self.eval_server_port, } return base ================================================ FILE: eval/prompts/instance_baseline.jinja ================================================ Please solve this issue: {{task}} You can execute bash commands and edit files to implement the necessary changes. ## Recommended Workflow This workflows should be done step-by-step so that you can iterate on your changes and any possible problems. 1. Analyze the codebase by finding and reading relevant files 2. Create a script to reproduce the issue 3. Edit the source code to resolve the issue 4. Verify your fix works by running your script again 5. Test edge cases to ensure your fix is robust 6. Submit your changes and finish your work by issuing the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`. Do not combine it with any other command. After this command, you cannot continue working on this task. ## Important Rules 1. Every response must contain exactly one action 2. The action must be enclosed in triple backticks 3. Directory or environment variable changes are not persistent. Every action is executed in a new subshell. However, you can prefix any action with `MY_ENV_VAR=MY_VALUE cd /path/to/working/dir && ...` or write/load environment variables from files <system_info> {{system}} {{release}} {{version}} {{machine}} </system_info> ## Formatting your response Here is an example of a correct response: <example_response> THOUGHT: I need to understand the structure of the repository first. Let me check what files are in the current directory to get a better understanding of the codebase. ```mswea_bash_command ls -la ``` </example_response> ## Useful command examples ### Create a new file: ```bash cat <<'EOF' > newfile.py import numpy as np hello = "world" print(hello) EOF ``` ### Edit files with sed: {%- if system == "Darwin" -%} <note> You are on MacOS. For all the below examples, you need to use `sed -i ''` instead of `sed -i`. </note> {%- endif -%} ```bash # Replace all occurrences sed -i 's/old_string/new_string/g' filename.py # Replace only first occurrence sed -i 's/old_string/new_string/' filename.py # Replace all occurrences in lines 1-10 sed -i '1,10s/old_string/new_string/g' filename.py ``` ### View file content: ```bash # View specific lines with numbers nl -ba filename.py | sed -n '10,20p' ``` ### Any other command you want to run ```bash anything ``` ================================================ FILE: eval/prompts/instance_native.jinja ================================================ Please solve this issue: {{task}} You can execute bash commands and edit files to implement the necessary changes. ## Recommended Workflow Work step-by-step so you can iterate on your changes and catch problems early. 1. **Understand the issue** — read the problem statement, identify the symptom and affected area 2. **Find the relevant code** — use `gitnexus-query "<feature area>"` to find execution flows, or `grep` for specific strings 3. **Understand the suspect** — use `gitnexus-context "<symbol>"` to see all callers and callees, then `cat` to read the source 4. **Check blast radius** — before editing shared code, run `gitnexus-impact "<symbol>" upstream` to see what depends on it 5. **Implement the fix** — make minimal, targeted changes 6. **Verify** — run relevant tests, check edge cases 7. **Submit** — issue: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT` Do not combine it with any other command. After this command, you cannot continue working on this task. ## Debugging Patterns | Symptom | Approach | |---------|----------| | Error message / exception | `gitnexus-query` for error text → `gitnexus-context` on throw sites | | Wrong return value | `gitnexus-context` on the function → trace callees for data flow | | Missing feature / incomplete behavior | `gitnexus-query` for feature area → find the execution flow → locate the gap | | Need to understand callers | `gitnexus-context` — graph-complete, finds callers grep would miss | ## Risk Assessment Before editing shared code, check the blast radius: | Impact | Risk | Action | |--------|------|--------| | <5 symbols at d=1 | Low | Fix with confidence | | 5-15 symbols at d=1 | Medium | Fix carefully, run broader tests | | >15 symbols at d=1 | High | Minimal change, run full test suite | ## Important Rules 1. Every response must contain exactly one action 2. The action must be enclosed in triple backticks 3. Directory or environment variable changes are not persistent. Every action is executed in a new subshell. However, you can prefix any action with `MY_ENV_VAR=MY_VALUE cd /path/to/working/dir && ...` or write/load environment variables from files 4. Make minimal, targeted changes. Don't refactor unrelated code. 5. GitNexus tools are ~100ms. Use them when they save you multiple grep iterations. <system_info> {{system}} {{release}} {{version}} {{machine}} </system_info> ## Formatting your response Here is an example of a correct response: <example_response> THOUGHT: The issue mentions a problem with form field validation. Let me search the code knowledge graph for the relevant execution flows to understand how validation works in this codebase. ```mswea_bash_command gitnexus-query "form field validation" ``` </example_response> ## Useful command examples ### Create a new file: ```bash cat <<'EOF' > newfile.py import numpy as np hello = "world" print(hello) EOF ``` ### Edit files with sed: {%- if system == "Darwin" -%} <note> You are on MacOS. For all the below examples, you need to use `sed -i ''` instead of `sed -i`. </note> {%- endif -%} ```bash # Replace all occurrences sed -i 's/old_string/new_string/g' filename.py # Replace only first occurrence sed -i 's/old_string/new_string/' filename.py # Replace all occurrences in lines 1-10 sed -i '1,10s/old_string/new_string/g' filename.py ``` ### View file content: ```bash # View specific lines with numbers nl -ba filename.py | sed -n '10,20p' ``` ### Any other command you want to run ```bash anything ``` ================================================ FILE: eval/prompts/instance_native_augment.jinja ================================================ Please solve this issue: {{task}} You can execute bash commands and edit files to implement the necessary changes. ## Recommended Workflow Work step-by-step so you can iterate on your changes and catch problems early. 1. **Understand the issue** — read the problem statement, identify the symptom and affected area 2. **Find the relevant code** — use `gitnexus-query "<feature area>"` to find execution flows, or `grep` for specific strings 3. **Understand the suspect** — use `gitnexus-context "<symbol>"` to see all callers and callees, then `cat` to read the source 4. **Check blast radius** — before editing shared code, run `gitnexus-impact "<symbol>" upstream` to see what depends on it 5. **Implement the fix** — make minimal, targeted changes 6. **Verify** — run relevant tests, check edge cases 7. **Submit** — issue: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT` Do not combine it with any other command. After this command, you cannot continue working on this task. ## Debugging Patterns | Symptom | Approach | |---------|----------| | Error message / exception | `gitnexus-query` for error text → `gitnexus-context` on throw sites | | Wrong return value | `gitnexus-context` on the function → trace callees for data flow | | Missing feature / incomplete behavior | `gitnexus-query` for feature area → find the execution flow → locate the gap | | Need to understand callers | `gitnexus-context` — graph-complete, finds callers grep would miss | ## Risk Assessment Before editing shared code, check the blast radius: | Impact | Risk | Action | |--------|------|--------| | <5 symbols at d=1 | Low | Fix with confidence | | 5-15 symbols at d=1 | Medium | Fix carefully, run broader tests | | >15 symbols at d=1 | High | Minimal change, run full test suite | ## Important Rules 1. Every response must contain exactly one action 2. The action must be enclosed in triple backticks 3. Directory or environment variable changes are not persistent. Every action is executed in a new subshell. However, you can prefix any action with `MY_ENV_VAR=MY_VALUE cd /path/to/working/dir && ...` or write/load environment variables from files 4. Make minimal, targeted changes. Don't refactor unrelated code. 5. GitNexus tools are ~100ms. Use them when they save you multiple grep iterations. 6. When grep results show `[GitNexus]` enrichments, use those for navigation. <system_info> {{system}} {{release}} {{version}} {{machine}} </system_info> ## Formatting your response Here is an example of a correct response: <example_response> THOUGHT: The issue mentions a problem with form field validation. Let me search the code knowledge graph for the relevant execution flows to understand how validation works in this codebase. ```mswea_bash_command gitnexus-query "form field validation" ``` </example_response> ## Useful command examples ### Create a new file: ```bash cat <<'EOF' > newfile.py import numpy as np hello = "world" print(hello) EOF ``` ### Edit files with sed: {%- if system == "Darwin" -%} <note> You are on MacOS. For all the below examples, you need to use `sed -i ''` instead of `sed -i`. </note> {%- endif -%} ```bash # Replace all occurrences sed -i 's/old_string/new_string/g' filename.py # Replace only first occurrence sed -i 's/old_string/new_string/' filename.py # Replace all occurrences in lines 1-10 sed -i '1,10s/old_string/new_string/g' filename.py ``` ### View file content: ```bash # View specific lines with numbers nl -ba filename.py | sed -n '10,20p' ``` ### Any other command you want to run ```bash anything ``` ================================================ FILE: eval/prompts/system_baseline.jinja ================================================ You are a helpful assistant that can interact with a computer to solve software engineering tasks. Your response must contain exactly ONE bash code block with ONE command (or commands connected with && or ||). Include a THOUGHT section before your command where you explain your reasoning process. Format your response as shown in. <example_response> Your reasoning and analysis here. Explain why you want to perform the action. ```mswea_bash_command your_command_here ``` </example_response> Failure to follow these rules will cause your response to be rejected. ================================================ FILE: eval/prompts/system_native.jinja ================================================ You are a helpful assistant that can interact with a computer to solve software engineering tasks. Your response must contain exactly ONE bash code block with ONE command (or commands connected with && or ||). Include a THOUGHT section before your command where you explain your reasoning process. Format your response as shown in. <example_response> Your reasoning and analysis here. Explain why you want to perform the action. ```mswea_bash_command your_command_here ``` </example_response> Failure to follow these rules will cause your response to be rejected. ## Code Intelligence You have **GitNexus** — a knowledge graph over this entire codebase. It knows every function call chain, class hierarchy, execution flow, and symbol relationship. These are fast bash commands (~100ms). Use them when useful, skip them when a simple grep suffices. ### GitNexus Commands **gitnexus-query "<concept>"** — Find execution flows related to a concept. Returns ranked execution flow traces with participating symbols and file locations. ```bash gitnexus-query "form field validation" ``` **gitnexus-context "<symbol>" ["<file_path>"]** — 360-degree view of a symbol. Returns ALL callers, ALL callees, and execution flows. Graph-complete — finds callers that grep misses. ```bash gitnexus-context "BoundField" "django/forms/boundfield.py" ``` **gitnexus-impact "<symbol>" [upstream|downstream]** — Blast radius analysis. What breaks if you change this: d=1 WILL BREAK, d=2 LIKELY AFFECTED, d=3 MAY NEED TESTING. ```bash gitnexus-impact "BoundField" upstream ``` **gitnexus-cypher "<query>"** — Raw Cypher query against the code graph. ```bash gitnexus-cypher 'MATCH (a)-[:CodeRelation {type: "CALLS"}]->(b:Function {name: "clean"}) RETURN a.name, a.filePath' ``` ### When to Use What | I need to... | Use | |---|---| | Understand how a feature works end-to-end | `gitnexus-query` | | Find ALL callers of a function | `gitnexus-context` | | Know what breaks if I change something | `gitnexus-impact` upstream | | Find a string literal or error message | `grep` | | Read source code | `cat` / `nl -ba` | ================================================ FILE: eval/prompts/system_native_augment.jinja ================================================ You are a helpful assistant that can interact with a computer to solve software engineering tasks. Your response must contain exactly ONE bash code block with ONE command (or commands connected with && or ||). Include a THOUGHT section before your command where you explain your reasoning process. Format your response as shown in. <example_response> Your reasoning and analysis here. Explain why you want to perform the action. ```mswea_bash_command your_command_here ``` </example_response> Failure to follow these rules will cause your response to be rejected. ## Code Intelligence You have **GitNexus** — a knowledge graph over this entire codebase. It knows every function call chain, class hierarchy, execution flow, and symbol relationship. These are fast bash commands (~100ms). Use them when useful, skip them when a simple grep suffices. Your `grep` results are also automatically enriched with `[GitNexus]` annotations showing callers, callees, and execution flows for matched symbols. Pay attention to these — they often point you to the right code without extra tool calls. ### GitNexus Commands **gitnexus-query "<concept>"** — Find execution flows related to a concept. Returns ranked execution flow traces with participating symbols and file locations. ```bash gitnexus-query "form field validation" ``` **gitnexus-context "<symbol>" ["<file_path>"]** — 360-degree view of a symbol. Returns ALL callers, ALL callees, and execution flows. Graph-complete — finds callers that grep misses. ```bash gitnexus-context "BoundField" "django/forms/boundfield.py" ``` **gitnexus-impact "<symbol>" [upstream|downstream]** — Blast radius analysis. What breaks if you change this: d=1 WILL BREAK, d=2 LIKELY AFFECTED, d=3 MAY NEED TESTING. ```bash gitnexus-impact "BoundField" upstream ``` **gitnexus-cypher "<query>"** — Raw Cypher query against the code graph. ```bash gitnexus-cypher 'MATCH (a)-[:CodeRelation {type: "CALLS"}]->(b:Function {name: "clean"}) RETURN a.name, a.filePath' ``` ### When to Use What | I need to... | Use | |---|---| | Understand how a feature works end-to-end | `gitnexus-query` | | Find ALL callers of a function | `gitnexus-context` | | Know what breaks if I change something | `gitnexus-impact` upstream | | Find a string literal or error message | `grep` | | Read source code | `cat` / `nl -ba` | ================================================ FILE: eval/pyproject.toml ================================================ [project] name = "gitnexus-swebench-eval" version = "0.1.0" description = "SWE-bench evaluation harness with GitNexus code intelligence integration" readme = "README.md" requires-python = ">=3.11" dependencies = [ "mini-swe-agent>=2.0.0", "litellm>=1.50.0", "datasets>=3.0.0", "typer>=0.12.0", "rich>=13.0.0", "pyyaml>=6.0", "pandas>=2.0.0", "tabulate>=0.9.0", "python-dotenv>=1.0.0", ] [project.optional-dependencies] dev = [ "pytest>=8.0.0", "ruff>=0.5.0", ] [project.scripts] gitnexus-eval = "run_eval:app" gitnexus-eval-analyze = "analysis.analyze_results:app" [build-system] requires = ["hatchling"] build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] packages = ["agents", "environments", "analysis", "bridge"] extra-files = ["run_eval.py"] [tool.ruff] line-length = 120 target-version = "py311" ================================================ FILE: eval/run_eval.py ================================================ #!/usr/bin/env python3 """ GitNexus SWE-bench Evaluation Runner Main entry point for running SWE-bench evaluations with and without GitNexus. Supports running a single configuration or a full matrix of models x modes. Usage: # Single run (default: native_augment mode — GitNexus tools + grep enrichment) python run_eval.py single -m claude-sonnet --subset lite --slice 0:5 # Baseline comparison (no GitNexus) python run_eval.py single -m claude-sonnet --mode baseline --subset lite --slice 0:5 # Matrix run (all models x all modes) python run_eval.py matrix --subset lite --slice 0:50 --workers 4 # Single instance for debugging python run_eval.py debug -m claude-haiku -i django__django-16527 """ import concurrent.futures import json import logging import os import threading import time import traceback from itertools import product from pathlib import Path from typing import Any import typer import yaml from rich.console import Console from rich.live import Live from rich.table import Table # Load .env file from eval/ directory _env_file = Path(__file__).parent / ".env" if _env_file.exists(): for line in _env_file.read_text().splitlines(): line = line.strip() if not line or line.startswith("#"): continue if "=" in line: key, _, value = line.partition("=") key, value = key.strip(), value.strip() if value and key not in os.environ: # Don't override existing env vars os.environ[key] = value logger = logging.getLogger("gitnexus_eval") console = Console() app = typer.Typer(rich_markup_mode="rich", add_completion=False) # Directory paths EVAL_DIR = Path(__file__).parent CONFIGS_DIR = EVAL_DIR / "configs" MODELS_DIR = CONFIGS_DIR / "models" MODES_DIR = CONFIGS_DIR / "modes" DEFAULT_OUTPUT_DIR = EVAL_DIR / "results" # Available models and modes (discovered from config files) AVAILABLE_MODELS = sorted([p.stem for p in MODELS_DIR.glob("*.yaml")]) AVAILABLE_MODES = sorted([p.stem for p in MODES_DIR.glob("*.yaml")]) # SWE-bench dataset mapping (same as mini-swe-agent) DATASET_MAPPING = { "full": "princeton-nlp/SWE-Bench", "verified": "princeton-nlp/SWE-Bench_Verified", "lite": "princeton-nlp/SWE-Bench_Lite", } _output_lock = threading.Lock() def load_yaml_config(path: Path) -> dict: """Load a YAML config file.""" with open(path) as f: return yaml.safe_load(f) or {} def merge_configs(*configs: dict) -> dict: """Recursively merge multiple config dicts (later values win).""" result = {} for config in configs: for key, value in config.items(): if key in result and isinstance(result[key], dict) and isinstance(value, dict): result[key] = merge_configs(result[key], value) else: result[key] = value return result def build_config(model_name: str, mode_name: str) -> dict: """Build a complete config from model + mode YAML files.""" model_file = MODELS_DIR / f"{model_name}.yaml" mode_file = MODES_DIR / f"{mode_name}.yaml" if not model_file.exists(): raise FileNotFoundError(f"Model config not found: {model_file}") if not mode_file.exists(): raise FileNotFoundError(f"Mode config not found: {mode_file}") model_config = load_yaml_config(model_file) mode_config = load_yaml_config(mode_file) return merge_configs(mode_config, model_config) def load_instances(subset: str, split: str, slice_spec: str = "", filter_spec: str = "") -> list[dict]: """Load SWE-bench instances.""" from datasets import load_dataset import re dataset_path = DATASET_MAPPING.get(subset, subset) logger.info(f"Loading dataset: {dataset_path}, split: {split}") instances = list(load_dataset(dataset_path, split=split)) if filter_spec: instances = [i for i in instances if re.match(filter_spec, i["instance_id"])] if slice_spec: values = [int(x) if x else None for x in slice_spec.split(":")] instances = instances[slice(*values)] logger.info(f"Loaded {len(instances)} instances") return instances def get_swebench_docker_image(instance: dict) -> str: """Get Docker image name for a SWE-bench instance.""" image_name = instance.get("image_name") if image_name is None: iid = instance["instance_id"] id_docker = iid.replace("__", "_1776_") image_name = f"docker.io/swebench/sweb.eval.x86_64.{id_docker}:latest".lower() return image_name def process_instance( instance: dict, config: dict, output_dir: Path, model_name: str, mode_name: str, ) -> dict: """ Process a single SWE-bench instance with the given config. Returns result dict with instance_id, exit_status, submission, metrics. """ from minisweagent.models import get_model instance_id = instance["instance_id"] run_id = f"{model_name}_{mode_name}" instance_dir = output_dir / run_id / instance_id instance_dir.mkdir(parents=True, exist_ok=True) result = { "instance_id": instance_id, "model": model_name, "mode": mode_name, "exit_status": None, "submission": "", "cost": 0.0, "n_calls": 0, "gitnexus_metrics": {}, } agent = None try: # Build model model = get_model(config=config.get("model", {})) # Build environment env_config = dict(config.get("environment", {})) env_class_name = env_config.pop("environment_class", "docker") if env_class_name == "eval.environments.gitnexus_docker.GitNexusDockerEnvironment": from environments.gitnexus_docker import GitNexusDockerEnvironment env_config["image"] = get_swebench_docker_image(instance) env = GitNexusDockerEnvironment(**env_config) else: from minisweagent.environments.docker import DockerEnvironment env = DockerEnvironment(image=get_swebench_docker_image(instance), **env_config) # Build agent agent_config = dict(config.get("agent", {})) agent_class_name = agent_config.pop("agent_class", "eval.agents.gitnexus_agent.GitNexusAgent") from agents.gitnexus_agent import GitNexusAgent traj_path = instance_dir / f"{instance_id}.traj.json" agent_config["output_path"] = traj_path agent = GitNexusAgent(model, env, **agent_config) # Run logger.info(f"[{run_id}] Starting {instance_id}") info = agent.run(instance["problem_statement"]) result["exit_status"] = info.get("exit_status") result["cost"] = agent.cost result["n_calls"] = agent.n_calls result["gitnexus_metrics"] = agent.gitnexus_metrics.to_dict() # Extract git diff patch from the container (SWE-bench needs the model_patch) try: patch_output = env.execute({"command": "cd /testbed && git diff"}) result["submission"] = patch_output.get("output", "").strip() except Exception as patch_err: logger.warning(f"[{run_id}] Failed to extract patch: {patch_err}") result["submission"] = info.get("submission", "") except Exception as e: logger.error(f"[{run_id}] Error on {instance_id}: {e}") result["exit_status"] = type(e).__name__ result["error"] = str(e) result["traceback"] = traceback.format_exc() finally: if agent: agent.save( instance_dir / f"{instance_id}.traj.json", {"instance_id": instance_id, "run_id": run_id}, ) # Update predictions file _update_preds(output_dir / run_id / "preds.json", instance_id, model_name, result) return result def _update_preds(preds_path: Path, instance_id: str, model_name: str, result: dict): """Thread-safe update of predictions file.""" with _output_lock: preds_path.parent.mkdir(parents=True, exist_ok=True) data = {} if preds_path.exists(): data = json.loads(preds_path.read_text()) data[instance_id] = { "model_name_or_path": model_name, "instance_id": instance_id, "model_patch": result.get("submission", ""), } preds_path.write_text(json.dumps(data, indent=2)) def run_configuration( model_name: str, mode_name: str, instances: list[dict], output_dir: Path, workers: int = 1, redo_existing: bool = False, ) -> list[dict]: """Run a single (model, mode) configuration across all instances.""" config = build_config(model_name, mode_name) run_id = f"{model_name}_{mode_name}" run_dir = output_dir / run_id # Skip existing instances if not redo_existing and (run_dir / "preds.json").exists(): existing = set(json.loads((run_dir / "preds.json").read_text()).keys()) instances = [i for i in instances if i["instance_id"] not in existing] if not instances: logger.info(f"[{run_id}] All instances already completed, skipping") return [] console.print(f" [bold]{run_id}[/bold]: {len(instances)} instances, {workers} workers") results = [] if workers <= 1: for instance in instances: result = process_instance(instance, config, output_dir, model_name, mode_name) results.append(result) else: with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor: futures = { executor.submit( process_instance, instance, config, output_dir, model_name, mode_name ): instance["instance_id"] for instance in instances } for future in concurrent.futures.as_completed(futures): try: results.append(future.result()) except Exception as e: iid = futures[future] logger.error(f"[{run_id}] Uncaught error for {iid}: {e}") # Save run summary summary = { "run_id": run_id, "model": model_name, "mode": mode_name, "config": config, "total_instances": len(results), "completed": sum(1 for r in results if r["exit_status"] not in [None, "error"]), "total_cost": sum(r.get("cost", 0) for r in results), "total_api_calls": sum(r.get("n_calls", 0) for r in results), "results": results, } (run_dir / "summary.json").mkdir(parents=True, exist_ok=True) if not run_dir.exists() else None run_dir.mkdir(parents=True, exist_ok=True) (run_dir / "summary.json").write_text(json.dumps(summary, indent=2, default=str)) return results # ─── CLI Commands ─────────────────────────────────────────────────────────── @app.command() def single( model: str = typer.Option(..., "-m", "--model", help=f"Model config name. Available: {', '.join(AVAILABLE_MODELS)}"), mode: str = typer.Option("native_augment", "--mode", help=f"Evaluation mode. Available: {', '.join(AVAILABLE_MODES)}"), subset: str = typer.Option("lite", "--subset", help="SWE-bench subset: lite, verified, full"), split: str = typer.Option("dev", "--split", help="Dataset split"), slice_spec: str = typer.Option("", "--slice", help="Slice spec (e.g., '0:5')"), filter_spec: str = typer.Option("", "--filter", help="Filter instance IDs by regex"), workers: int = typer.Option(1, "-w", "--workers", help="Parallel workers"), output: str = typer.Option(str(DEFAULT_OUTPUT_DIR), "-o", "--output", help="Output directory"), redo: bool = typer.Option(False, "--redo", help="Redo existing instances"), ): """Run a single (model, mode) configuration on SWE-bench.""" output_dir = Path(output) instances = load_instances(subset, split, slice_spec, filter_spec) console.print(f"\n[bold]Running evaluation:[/bold] {model} + {mode}") console.print(f" Instances: {len(instances)}") console.print(f" Output: {output_dir}\n") results = run_configuration(model, mode, instances, output_dir, workers, redo) # Print summary _print_summary(results, model, mode) @app.command() def matrix( models: list[str] = typer.Option(AVAILABLE_MODELS, "-m", "--models", help="Models to evaluate (comma-separated or repeated)"), modes: list[str] = typer.Option(AVAILABLE_MODES, "--modes", help="Modes to evaluate"), subset: str = typer.Option("lite", "--subset", help="SWE-bench subset"), split: str = typer.Option("dev", "--split", help="Dataset split"), slice_spec: str = typer.Option("", "--slice", help="Slice spec"), filter_spec: str = typer.Option("", "--filter", help="Filter instances by regex"), workers: int = typer.Option(1, "-w", "--workers", help="Parallel workers per config"), output: str = typer.Option(str(DEFAULT_OUTPUT_DIR), "-o", "--output", help="Output directory"), redo: bool = typer.Option(False, "--redo", help="Redo existing instances"), ): """Run the full evaluation matrix: all models x all modes.""" output_dir = Path(output) instances = load_instances(subset, split, slice_spec, filter_spec) combos = list(product(models, modes)) console.print(f"\n[bold]Matrix evaluation:[/bold] {len(models)} models x {len(modes)} modes = {len(combos)} configs") console.print(f" Models: {', '.join(models)}") console.print(f" Modes: {', '.join(modes)}") console.print(f" Instances per config: {len(instances)}") console.print(f" Total runs: {len(combos) * len(instances)}") console.print(f" Output: {output_dir}\n") all_results = {} for model_name, mode_name in combos: run_id = f"{model_name}_{mode_name}" console.print(f"\n[bold cyan]━━━ {run_id} ━━━[/bold cyan]") results = run_configuration(model_name, mode_name, instances, output_dir, workers, redo) all_results[run_id] = results # Print comparative summary _print_matrix_summary(all_results) # Save master summary master = { "timestamp": time.time(), "models": models, "modes": modes, "subset": subset, "n_instances": len(instances), "runs": { run_id: { "total": len(results), "cost": sum(r.get("cost", 0) for r in results), "api_calls": sum(r.get("n_calls", 0) for r in results), } for run_id, results in all_results.items() }, } output_dir.mkdir(parents=True, exist_ok=True) (output_dir / "matrix_summary.json").write_text(json.dumps(master, indent=2, default=str)) console.print(f"\n[green]Results saved to {output_dir}[/green]") @app.command() def debug( model: str = typer.Option("claude-haiku", "-m", "--model", help="Model config name"), mode: str = typer.Option("native_augment", "--mode", help="Evaluation mode"), instance_id: str = typer.Option(..., "-i", "--instance", help="SWE-bench instance ID"), subset: str = typer.Option("lite", "--subset", help="SWE-bench subset"), split: str = typer.Option("dev", "--split"), output: str = typer.Option(str(DEFAULT_OUTPUT_DIR / "debug"), "-o", "--output"), ): """Debug a single SWE-bench instance.""" from datasets import load_dataset dataset_path = DATASET_MAPPING.get(subset, subset) instances = {inst["instance_id"]: inst for inst in load_dataset(dataset_path, split=split)} if instance_id not in instances: console.print(f"[red]Instance '{instance_id}' not found in {subset}/{split}[/red]") raise typer.Exit(1) instance = instances[instance_id] config = build_config(model, mode) output_dir = Path(output) console.print(f"\n[bold]Debug run:[/bold] {model} + {mode}") console.print(f" Instance: {instance_id}") console.print(f" Problem: {instance['problem_statement'][:200]}...\n") result = process_instance(instance, config, output_dir, model, mode) _print_summary([result], model, mode) @app.command() def list_configs(): """List available model and mode configurations.""" console.print("\n[bold]Available Models:[/bold]") for name in AVAILABLE_MODELS: config = load_yaml_config(MODELS_DIR / f"{name}.yaml") model_name = config.get("model", {}).get("model_name", "unknown") console.print(f" {name:<20} {model_name}") console.print("\n[bold]Available Modes:[/bold]") for name in AVAILABLE_MODES: config = load_yaml_config(MODES_DIR / f"{name}.yaml") gn_mode = config.get("agent", {}).get("gitnexus_mode", "baseline") console.print(f" {name:<20} gitnexus_mode={gn_mode}") console.print(f"\n[bold]Matrix:[/bold] {len(AVAILABLE_MODELS)} models x {len(AVAILABLE_MODES)} modes = {len(AVAILABLE_MODELS) * len(AVAILABLE_MODES)} configurations") # ─── Summary Output ──────────────────────────────────────────────────────── def _print_summary(results: list[dict], model: str, mode: str): """Print a summary table for a single run.""" if not results: console.print("[yellow]No results to display[/yellow]") return table = Table(title=f"{model} + {mode}") table.add_column("Metric", style="bold") table.add_column("Value") total = len(results) completed = sum(1 for r in results if r.get("submission")) total_cost = sum(r.get("cost", 0) for r in results) total_calls = sum(r.get("n_calls", 0) for r in results) table.add_row("Instances", str(total)) table.add_row("Completed", f"{completed}/{total}") table.add_row("Total Cost", f"${total_cost:.4f}") table.add_row("Total API Calls", str(total_calls)) table.add_row("Avg Cost/Instance", f"${total_cost / max(total, 1):.4f}") table.add_row("Avg Calls/Instance", f"{total_calls / max(total, 1):.1f}") # GitNexus-specific metrics gn_tool_calls = sum( r.get("gitnexus_metrics", {}).get("total_tool_calls", 0) for r in results ) gn_augment_hits = sum( r.get("gitnexus_metrics", {}).get("augmentation_hits", 0) for r in results ) if gn_tool_calls > 0: table.add_row("GitNexus Tool Calls", str(gn_tool_calls)) if gn_augment_hits > 0: table.add_row("Augmentation Hits", str(gn_augment_hits)) console.print(table) def _print_matrix_summary(all_results: dict[str, list[dict]]): """Print a comparative matrix summary.""" table = Table(title="Evaluation Matrix Summary") table.add_column("Configuration", style="bold") table.add_column("Instances") table.add_column("Completed") table.add_column("Cost") table.add_column("API Calls") table.add_column("GN Tools") for run_id, results in sorted(all_results.items()): total = len(results) completed = sum(1 for r in results if r.get("submission")) cost = sum(r.get("cost", 0) for r in results) calls = sum(r.get("n_calls", 0) for r in results) gn_calls = sum(r.get("gitnexus_metrics", {}).get("total_tool_calls", 0) for r in results) table.add_row( run_id, str(total), f"{completed}/{total}", f"${cost:.2f}", str(calls), str(gn_calls) if gn_calls > 0 else "-", ) console.print(table) if __name__ == "__main__": logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(message)s") app() ================================================ FILE: gitnexus/.claude/settings.local.json ================================================ { "permissions": { "allow": [ "mcp__plugin_claude-mem_mcp-search__get_observations" ] } } ================================================ FILE: gitnexus/.npmignore ================================================ # Source (dist/ is the compiled output) src/ tsconfig.json # Dev files *.ts !dist/**/*.d.ts .git/ .gitignore node_modules/ # Package lock (consumers use their own) package-lock.json # IDE .vscode/ .idea/ ================================================ FILE: gitnexus/CHANGELOG.md ================================================ # Changelog All notable changes to GitNexus will be documented in this file. ## [1.4.7] - 2026-03-19 ### Added - **Phase 8 field/property type resolution** — ACCESSES edges with `declaredType` for field reads/writes (#354) - **Phase 9 return-type variable binding** — call-result variable binding across 11 languages (#379) - `extractPendingAssignment` in per-language type extractors captures `let x = getUser()` patterns - Unified fixpoint loop resolves variable types from function return types after initial walk - Field access on call-result variables: `user.name` resolves `name` via return type's class definition - Method-call-result chaining: `user.getProfile().bio` resolves through intermediate return types - 22 new test fixtures covering call-result and method-chain binding across all supported languages - Integration tests added for all 10 language resolver suites - **ACCESSES edge type** with read/write field access tracking (#372) - **Python `enumerate()` for-loop support** with nested tuple patterns (#356) - **MCP tool/resource descriptions** updated to reflect Phase 9 ACCESSES edge semantics and `declaredType` property ### Fixed - **mcp**: server crashes under parallel tool calls (#326, #349) - **parsing**: undefined error on languages missing from call routers (#364) - **web**: add missing Kotlin entries to `Record<SupportedLanguages>` maps - **rust**: `await` expression unwrapping in `extractPendingAssignment` for async call-result binding - **tests**: update property edge and write access expectations across multiple language tests - **docs**: corrected stale "single-pass" claims in type-resolution-system.md to reflect walk+fixpoint architecture ### Changed - **Upgrade `@ladybugdb/core` to 0.15.2** and remove segfault workarounds (#374) - **type-resolution-roadmap.md** overhauled — completed phases condensed to summaries, Phases 10–14 added with full engineering specs ## [1.4.6] - 2026-03-18 ### Added - **Phase 7 type resolution** — return-aware loop inference for call-expression iterables (#341) - `ReturnTypeLookup` interface with `lookupReturnType` / `lookupRawReturnType` split - `ForLoopExtractorContext` context object replacing positional `(node, env)` signature - Call-expression iterable resolution across 8 languages (TS/JS, Java, Kotlin, C#, Go, Rust, Python, PHP) - PHP `$this->property` foreach via `@var` class property scan (Strategy C) - PHP `function_call_expression` and `member_call_expression` foreach paths - `extractElementTypeFromString` as canonical raw-string container unwrapper in `shared.ts` - `extractReturnTypeName` deduplicated from `call-processor.ts` into `shared.ts` (137 lines removed) - `SKIP_SUBTREE_TYPES` performance optimization with documented `template_string` exclusion - `pendingCallResults` infrastructure (dormant — Phase 9 work) ### Fixed - **impact**: return structured error + partial results instead of crashing (#345) - **impact**: add `HAS_METHOD` and `OVERRIDES` to `VALID_RELATION_TYPES` (#350) - **cli**: write tool output to stdout via fd 1 instead of stderr (#346) - **postinstall**: add permission fix for CLI and hook scripts (#348) - **workflow**: use prefixed temporary branch name for fork PRs to prevent overwriting real branches - **test**: add `--repo` to CLI e2e tool tests for multi-repo environment - **php**: add `declaration_list` type guard on `findClassPropertyElementType` fallback - **docs**: correct `pendingCallResults` description in roadmap and system docs ### Chore - Add `.worktrees/` to `.gitignore` ## [1.4.5] - 2026-03-17 ### Added - **Ruby language support** for CLI and web (#111) - **TypeEnvironment API** with constructor inference, self/this/super resolution (#274) - **Return type inference** with doc-comment parsing (JSDoc, PHPDoc, YARD) and per-language type extractors (#284) - **Phase 4 type resolution** — nullable unwrapping, for-loop typing, assignment chain propagation (#310) - **Phase 5 type resolution** — chained calls, pattern matching, class-as-receiver (#315) - **Phase 6 type resolution** — for-loop Tier 1c, pattern matching, container descriptors, 10-language coverage (#318) - Container descriptor table for generic type argument resolution (Map keys vs values) - Method-aware for-loop extractors with integration tests for all languages - Recursive pattern binding (C# `is` patterns, Kotlin `when/is` smart casts) - Class field declaration unwrapping for C#/Java - PHP `$this->property` foreach member access - C++ pointer dereference range-for - Java `this.data.values()` field access patterns - Position-indexed when/is bindings for branch-local narrowing - **Type resolution system documentation** with architecture guide and roadmap - `.gitignore` and `.gitnexusignore` support during file discovery (#231) - Codex MCP configuration documentation in README (#236) - `skipGraphPhases` pipeline option to skip MRO/community/process phases for faster test runs - `hookTimeout: 120000` in vitest config for CI beforeAll hooks ### Changed - **Migrated from KuzuDB to LadybugDB v0.15** (#275) - Dynamically discover and install agent skills in CLI (#270) ### Performance - Worker pool threshold — skip worker creation for small repos (<15 files or <512KB total) - AST walk pruning via `SKIP_SUBTREE_TYPES` for leaf-only nodes (string, comment, number literals) - Pre-computed `interestingNodeTypes` set — single Set.has() replaces 3 checks per AST node - `fastStripNullable` — skip full nullable parsing for simple identifiers (90%+ case) - Replace `.children?.find()` with manual for loops in `extractFunctionName` to eliminate array allocations ### Fixed - Same-directory Python import resolution (#328) - Ruby method-level call resolution, HAS_METHOD edges, and dispatch table (#278) - C++ fixture file casing for case-sensitive CI - Template string incorrectly included in AST pruning set (contains interpolated expressions) ## [1.4.0] - Previous release ================================================ FILE: gitnexus/Dockerfile.test ================================================ FROM node:22-bookworm WORKDIR /app RUN apt-get update && apt-get install -y python3 make g++ && rm -rf /var/lib/apt/lists/* COPY . . RUN npm ci --ignore-scripts \ && node scripts/patch-tree-sitter-swift.cjs \ && (npm rebuild 2>&1 || true) \ && cd node_modules/tree-sitter-kotlin && npx --yes node-gyp rebuild 2>&1 CMD ["npx", "vitest", "run", "test/integration", "--reporter=verbose"] ================================================ FILE: gitnexus/README.md ================================================ # GitNexus **Graph-powered code intelligence for AI agents.** Index any codebase into a knowledge graph, then query it via MCP or CLI. Works with **Cursor**, **Claude Code**, **Windsurf**, **Cline**, **OpenCode**, and any MCP-compatible tool. [![npm version](https://img.shields.io/npm/v/gitnexus.svg)](https://www.npmjs.com/package/gitnexus) [![License: PolyForm Noncommercial](https://img.shields.io/badge/License-PolyForm%20Noncommercial-blue.svg)](https://polyformproject.org/licenses/noncommercial/1.0.0/) --- ## Why? AI coding tools don't understand your codebase structure. They edit a function without knowing 47 other functions depend on it. GitNexus fixes this by **precomputing every dependency, call chain, and relationship** into a queryable graph. **Three commands to give your AI agent full codebase awareness.** ## Quick Start ```bash # Index your repo (run from repo root) npx gitnexus analyze ``` That's it. This indexes the codebase, installs agent skills, registers Claude Code hooks, and creates `AGENTS.md` / `CLAUDE.md` context files — all in one command. To configure MCP for your editor, run `npx gitnexus setup` once — or set it up manually below. `gitnexus setup` auto-detects your editors and writes the correct global MCP config. You only need to run it once. ### Editor Support | Editor | MCP | Skills | Hooks (auto-augment) | Support | |--------|-----|--------|---------------------|---------| | **Claude Code** | Yes | Yes | Yes (PreToolUse) | **Full** | | **Cursor** | Yes | Yes | — | MCP + Skills | | **Windsurf** | Yes | — | — | MCP | | **OpenCode** | Yes | Yes | — | MCP + Skills | > **Claude Code** gets the deepest integration: MCP tools + agent skills + PreToolUse hooks that automatically enrich grep/glob/bash calls with knowledge graph context. ### Community Integrations | Agent | Install | Source | |-------|---------|--------| | [pi](https://pi.dev) | `pi install npm:pi-gitnexus` | [pi-gitnexus](https://github.com/tintinweb/pi-gitnexus) | ## MCP Setup (manual) If you prefer to configure manually instead of using `gitnexus setup`: ### Claude Code (full support — MCP + skills + hooks) ```bash claude mcp add gitnexus -- npx -y gitnexus@latest mcp ``` ### Cursor / Windsurf Add to `~/.cursor/mcp.json` (global — works for all projects): ```json { "mcpServers": { "gitnexus": { "command": "npx", "args": ["-y", "gitnexus@latest", "mcp"] } } } ``` ### OpenCode Add to `~/.config/opencode/config.json`: ```json { "mcp": { "gitnexus": { "command": "npx", "args": ["-y", "gitnexus@latest", "mcp"] } } } ``` ## How It Works GitNexus builds a complete knowledge graph of your codebase through a multi-phase indexing pipeline: 1. **Structure** — Walks the file tree and maps folder/file relationships 2. **Parsing** — Extracts functions, classes, methods, and interfaces using Tree-sitter ASTs 3. **Resolution** — Resolves imports and function calls across files with language-aware logic 4. **Clustering** — Groups related symbols into functional communities 5. **Processes** — Traces execution flows from entry points through call chains 6. **Search** — Builds hybrid search indexes for fast retrieval The result is a **LadybugDB graph database** stored locally in `.gitnexus/` with full-text search and semantic embeddings. ## MCP Tools Your AI agent gets these tools automatically: | Tool | What It Does | `repo` Param | |------|-------------|--------------| | `list_repos` | Discover all indexed repositories | — | | `query` | Process-grouped hybrid search (BM25 + semantic + RRF) | Optional | | `context` | 360-degree symbol view — categorized refs, process participation | Optional | | `impact` | Blast radius analysis with depth grouping and confidence | Optional | | `detect_changes` | Git-diff impact — maps changed lines to affected processes | Optional | | `rename` | Multi-file coordinated rename with graph + text search | Optional | | `cypher` | Raw Cypher graph queries | Optional | > With one indexed repo, the `repo` param is optional. With multiple, specify which: `query({query: "auth", repo: "my-app"})`. ## MCP Resources | Resource | Purpose | |----------|---------| | `gitnexus://repos` | List all indexed repositories (read first) | | `gitnexus://repo/{name}/context` | Codebase stats, staleness check, and available tools | | `gitnexus://repo/{name}/clusters` | All functional clusters with cohesion scores | | `gitnexus://repo/{name}/cluster/{name}` | Cluster members and details | | `gitnexus://repo/{name}/processes` | All execution flows | | `gitnexus://repo/{name}/process/{name}` | Full process trace with steps | | `gitnexus://repo/{name}/schema` | Graph schema for Cypher queries | ## MCP Prompts | Prompt | What It Does | |--------|-------------| | `detect_impact` | Pre-commit change analysis — scope, affected processes, risk level | | `generate_map` | Architecture documentation from the knowledge graph with mermaid diagrams | ## CLI Commands ```bash gitnexus setup # Configure MCP for your editors (one-time) gitnexus analyze [path] # Index a repository (or update stale index) gitnexus analyze --force # Force full re-index gitnexus analyze --embeddings # Enable embedding generation (slower, better search) gitnexus analyze --verbose # Log skipped files when parsers are unavailable gitnexus mcp # Start MCP server (stdio) — serves all indexed repos gitnexus serve # Start local HTTP server (multi-repo) for web UI gitnexus list # List all indexed repositories gitnexus status # Show index status for current repo gitnexus clean # Delete index for current repo gitnexus clean --all --force # Delete all indexes gitnexus wiki [path] # Generate LLM-powered docs from knowledge graph gitnexus wiki --model <model> # Wiki with custom LLM model (default: gpt-4o-mini) ``` ## Multi-Repo Support GitNexus supports indexing multiple repositories. Each `gitnexus analyze` registers the repo in a global registry (`~/.gitnexus/registry.json`). The MCP server serves all indexed repos automatically. ## Supported Languages TypeScript, JavaScript, Python, Java, C, C++, C#, Go, Rust, PHP, Kotlin, Swift, Ruby ### Language Feature Matrix | Language | Imports | Named Bindings | Exports | Heritage | Type Annotations | Constructor Inference | Config | Frameworks | Entry Points | |----------|---------|----------------|---------|----------|-----------------|---------------------|--------|------------|-------------| | TypeScript | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | JavaScript | ✓ | ✓ | ✓ | ✓ | — | ✓ | ✓ | ✓ | ✓ | | Python | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | Java | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | — | ✓ | ✓ | | Kotlin | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | — | ✓ | ✓ | | C# | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | Go | ✓ | — | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | Rust | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | — | ✓ | ✓ | | PHP | ✓ | ✓ | ✓ | — | ✓ | ✓ | ✓ | ✓ | ✓ | | Ruby | ✓ | — | ✓ | ✓ | — | ✓ | — | ✓ | ✓ | | Swift | — | — | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | C | — | — | ✓ | — | ✓ | ✓ | — | ✓ | ✓ | | C++ | — | — | ✓ | ✓ | ✓ | ✓ | — | ✓ | ✓ | **Imports** — cross-file import resolution · **Named Bindings** — `import { X as Y }` / re-export tracking · **Exports** — public/exported symbol detection · **Heritage** — class inheritance, interfaces, mixins · **Type Annotations** — explicit type extraction for receiver resolution · **Constructor Inference** — infer receiver type from constructor calls (`self`/`this` resolution included for all languages) · **Config** — language toolchain config parsing (tsconfig, go.mod, etc.) · **Frameworks** — AST-based framework pattern detection · **Entry Points** — entry point scoring heuristics ## Agent Skills GitNexus ships with skill files that teach AI agents how to use the tools effectively: - **Exploring** — Navigate unfamiliar code using the knowledge graph - **Debugging** — Trace bugs through call chains - **Impact Analysis** — Analyze blast radius before changes - **Refactoring** — Plan safe refactors using dependency mapping Installed automatically by both `gitnexus analyze` (per-repo) and `gitnexus setup` (global). ## Requirements - Node.js >= 18 - Git repository (uses git for commit tracking) ## Privacy - All processing happens locally on your machine - No code is sent to any server - Index stored in `.gitnexus/` inside your repo (gitignored) - Global registry at `~/.gitnexus/` stores only paths and metadata ## Web UI GitNexus also has a browser-based UI at [gitnexus.vercel.app](https://gitnexus.vercel.app) — 100% client-side, your code never leaves the browser. **Local Backend Mode:** Run `gitnexus serve` and open the web UI locally — it auto-detects the server and shows all your indexed repos, with full AI chat support. No need to re-upload or re-index. The agent's tools (Cypher queries, search, code navigation) route through the backend HTTP API automatically. ## License [PolyForm Noncommercial 1.0.0](https://polyformproject.org/licenses/noncommercial/1.0.0/) Free for non-commercial use. Contact for commercial licensing. ================================================ FILE: gitnexus/hooks/claude/gitnexus-hook.cjs ================================================ #!/usr/bin/env node /** * GitNexus Claude Code Hook * * PreToolUse — intercepts Grep/Glob/Bash searches and augments * with graph context from the GitNexus index. * PostToolUse — detects stale index after git mutations and notifies * the agent to reindex. * * NOTE: SessionStart hooks are broken on Windows (Claude Code bug). * Session context is injected via CLAUDE.md / skills instead. */ const fs = require('fs'); const path = require('path'); const { spawnSync } = require('child_process'); /** * Read JSON input from stdin synchronously. */ function readInput() { try { const data = fs.readFileSync(0, 'utf-8'); return JSON.parse(data); } catch { return {}; } } /** * Find the .gitnexus directory by walking up from startDir. * Returns the path to .gitnexus/ or null if not found. */ function findGitNexusDir(startDir) { let dir = startDir || process.cwd(); for (let i = 0; i < 5; i++) { const candidate = path.join(dir, '.gitnexus'); if (fs.existsSync(candidate)) return candidate; const parent = path.dirname(dir); if (parent === dir) break; dir = parent; } return null; } /** * Extract search pattern from tool input. */ function extractPattern(toolName, toolInput) { if (toolName === 'Grep') { return toolInput.pattern || null; } if (toolName === 'Glob') { const raw = toolInput.pattern || ''; const match = raw.match(/[*\/]([a-zA-Z][a-zA-Z0-9_-]{2,})/); return match ? match[1] : null; } if (toolName === 'Bash') { const cmd = toolInput.command || ''; if (!/\brg\b|\bgrep\b/.test(cmd)) return null; const tokens = cmd.split(/\s+/); let foundCmd = false; let skipNext = false; const flagsWithValues = new Set(['-e', '-f', '-m', '-A', '-B', '-C', '-g', '--glob', '-t', '--type', '--include', '--exclude']); for (const token of tokens) { if (skipNext) { skipNext = false; continue; } if (!foundCmd) { if (/\brg$|\bgrep$/.test(token)) foundCmd = true; continue; } if (token.startsWith('-')) { if (flagsWithValues.has(token)) skipNext = true; continue; } const cleaned = token.replace(/['"]/g, ''); return cleaned.length >= 3 ? cleaned : null; } return null; } return null; } /** * Resolve the gitnexus CLI path. * 1. Relative path (works when script is inside npm package) * 2. require.resolve (works when gitnexus is globally installed) * 3. Fall back to npx (returns empty string) */ function resolveCliPath() { let cliPath = path.resolve(__dirname, '..', '..', 'dist', 'cli', 'index.js'); if (!fs.existsSync(cliPath)) { try { cliPath = require.resolve('gitnexus/dist/cli/index.js'); } catch { cliPath = ''; } } return cliPath; } /** * Spawn a gitnexus CLI command synchronously. * Returns the stderr output (KuzuDB captures stdout at OS level). */ function runGitNexusCli(cliPath, args, cwd, timeout) { const isWin = process.platform === 'win32'; if (cliPath) { return spawnSync( process.execPath, [cliPath, ...args], { encoding: 'utf-8', timeout, cwd, stdio: ['pipe', 'pipe', 'pipe'] } ); } // On Windows, invoke npx.cmd directly (no shell needed) return spawnSync( isWin ? 'npx.cmd' : 'npx', ['-y', 'gitnexus', ...args], { encoding: 'utf-8', timeout: timeout + 5000, cwd, stdio: ['pipe', 'pipe', 'pipe'] } ); } /** * PreToolUse handler — augment searches with graph context. */ function handlePreToolUse(input) { const cwd = input.cwd || process.cwd(); if (!path.isAbsolute(cwd)) return; if (!findGitNexusDir(cwd)) return; const toolName = input.tool_name || ''; const toolInput = input.tool_input || {}; if (toolName !== 'Grep' && toolName !== 'Glob' && toolName !== 'Bash') return; const pattern = extractPattern(toolName, toolInput); if (!pattern || pattern.length < 3) return; const cliPath = resolveCliPath(); let result = ''; try { const child = runGitNexusCli(cliPath, ['augment', '--', pattern], cwd, 7000); if (!child.error && child.status === 0) { result = child.stderr || ''; } } catch { /* graceful failure */ } if (result && result.trim()) { sendHookResponse('PreToolUse', result.trim()); } } /** * Emit a PostToolUse hook response with additional context for the agent. */ function sendHookResponse(hookEventName, message) { console.log(JSON.stringify({ hookSpecificOutput: { hookEventName, additionalContext: message } })); } /** * PostToolUse handler — detect index staleness after git mutations. * * Instead of spawning a full `gitnexus analyze` synchronously (which blocks * the agent for up to 120s and risks KuzuDB corruption on timeout), we do a * lightweight staleness check: compare `git rev-parse HEAD` against the * lastCommit stored in `.gitnexus/meta.json`. If they differ, notify the * agent so it can decide when to reindex. */ function handlePostToolUse(input) { const toolName = input.tool_name || ''; if (toolName !== 'Bash') return; const command = (input.tool_input || {}).command || ''; if (!/\bgit\s+(commit|merge|rebase|cherry-pick|pull)(\s|$)/.test(command)) return; // Only proceed if the command succeeded const toolOutput = input.tool_output || {}; if (toolOutput.exit_code !== undefined && toolOutput.exit_code !== 0) return; const cwd = input.cwd || process.cwd(); if (!path.isAbsolute(cwd)) return; const gitNexusDir = findGitNexusDir(cwd); if (!gitNexusDir) return; // Compare HEAD against last indexed commit — skip if unchanged let currentHead = ''; try { const headResult = spawnSync('git', ['rev-parse', 'HEAD'], { encoding: 'utf-8', timeout: 3000, cwd, stdio: ['pipe', 'pipe', 'pipe'], }); currentHead = (headResult.stdout || '').trim(); } catch { return; } if (!currentHead) return; let lastCommit = ''; let hadEmbeddings = false; try { const meta = JSON.parse(fs.readFileSync(path.join(gitNexusDir, 'meta.json'), 'utf-8')); lastCommit = meta.lastCommit || ''; hadEmbeddings = (meta.stats && meta.stats.embeddings > 0); } catch { /* no meta — treat as stale */ } // If HEAD matches last indexed commit, no reindex needed if (currentHead && currentHead === lastCommit) return; const analyzeCmd = `npx gitnexus analyze${hadEmbeddings ? ' --embeddings' : ''}`; sendHookResponse('PostToolUse', `GitNexus index is stale (last indexed: ${lastCommit ? lastCommit.slice(0, 7) : 'never'}). ` + `Run \`${analyzeCmd}\` to update the knowledge graph.` ); } // Dispatch map for hook events const handlers = { PreToolUse: handlePreToolUse, PostToolUse: handlePostToolUse, }; function main() { try { const input = readInput(); const handler = handlers[input.hook_event_name || '']; if (handler) handler(input); } catch (err) { if (process.env.GITNEXUS_DEBUG) { console.error('GitNexus hook error:', (err.message || '').slice(0, 200)); } } } main(); ================================================ FILE: gitnexus/hooks/claude/pre-tool-use.sh ================================================ #!/bin/bash # GitNexus PreToolUse hook for Claude Code # Intercepts Grep/Glob/Bash searches and augments with graph context. # Receives JSON on stdin with { tool_name, tool_input, cwd, ... } # Returns JSON with additionalContext for graph-enriched results. INPUT=$(cat) TOOL_NAME=$(echo "$INPUT" | jq -r '.tool_name // empty' 2>/dev/null) CWD=$(echo "$INPUT" | jq -r '.cwd // empty' 2>/dev/null) # Extract search pattern based on tool type PATTERN="" case "$TOOL_NAME" in Grep) PATTERN=$(echo "$INPUT" | jq -r '.tool_input.pattern // empty' 2>/dev/null) ;; Glob) # Glob patterns are file paths, not search terms — extract meaningful part RAW=$(echo "$INPUT" | jq -r '.tool_input.pattern // empty' 2>/dev/null) # Strip glob syntax to get the meaningful name (e.g., "**/*.ts" → skip, "auth*.ts" → "auth") PATTERN=$(echo "$RAW" | sed -n 's/.*[*\/]\([a-zA-Z][a-zA-Z0-9_-]*\).*/\1/p') ;; Bash) CMD=$(echo "$INPUT" | jq -r '.tool_input.command // empty' 2>/dev/null) # Only augment grep/rg commands if echo "$CMD" | grep -qE '\brg\b|\bgrep\b'; then # Extract pattern from rg/grep if echo "$CMD" | grep -qE '\brg\b'; then PATTERN=$(echo "$CMD" | sed -n "s/.*\brg\s\+\(--[^ ]*\s\+\)*['\"]\\?\([^'\";\| >]*\\).*/\2/p") elif echo "$CMD" | grep -qE '\bgrep\b'; then PATTERN=$(echo "$CMD" | sed -n "s/.*\bgrep\s\+\(-[^ ]*\s\+\)*['\"]\\?\([^'\";\| >]*\\).*/\2/p") fi fi ;; *) # Not a search tool — skip exit 0 ;; esac # Skip if pattern too short or empty if [ -z "$PATTERN" ] || [ ${#PATTERN} -lt 3 ]; then exit 0 fi # Check if we're in a GitNexus-indexed repo dir="${CWD:-$PWD}" found=false for i in 1 2 3 4 5; do if [ -d "$dir/.gitnexus" ]; then found=true break fi parent="$(dirname "$dir")" [ "$parent" = "$dir" ] && break dir="$parent" done if [ "$found" = false ]; then exit 0 fi # Run gitnexus augment — must be fast (<500ms target) # augment writes to stderr (KuzuDB captures stdout at OS level), so capture stderr and discard stdout RESULT=$(cd "$CWD" && npx -y gitnexus augment "$PATTERN" 2>&1 1>/dev/null) if [ -n "$RESULT" ]; then ESCAPED=$(echo "$RESULT" | jq -Rs .) jq -n --argjson ctx "$ESCAPED" '{ hookSpecificOutput: { hookEventName: "PreToolUse", additionalContext: $ctx } }' else exit 0 fi ================================================ FILE: gitnexus/hooks/claude/session-start.sh ================================================ #!/bin/bash # GitNexus SessionStart hook for Claude Code # Fires on session startup. Stdout is injected into Claude's context. # Checks if the current directory has a GitNexus index. dir="$PWD" found=false for i in 1 2 3 4 5; do if [ -d "$dir/.gitnexus" ]; then found=true break fi parent="$(dirname "$dir")" [ "$parent" = "$dir" ] && break dir="$parent" done if [ "$found" = false ]; then exit 0 fi # Inject GitNexus context — this stdout goes directly into Claude's context cat << 'EOF' ## GitNexus Code Intelligence This codebase is indexed by GitNexus, providing a knowledge graph with execution flows, relationships, and semantic search. **Available MCP Tools:** - `query` — Process-grouped code intelligence (execution flows related to a concept) - `context` — 360-degree symbol view (categorized refs, process participation) - `impact` — Blast radius analysis (what breaks if you change a symbol) - `detect_changes` — Git-diff impact analysis (what do your changes affect) - `rename` — Multi-file coordinated rename with confidence tags - `cypher` — Raw graph queries - `list_repos` — Discover indexed repos **Quick Start:** READ `gitnexus://repo/{name}/context` for codebase overview, then use `query` to find execution flows. **Resources:** `gitnexus://repo/{name}/context` (overview), `/processes` (execution flows), `/schema` (for Cypher) EOF exit 0 ================================================ FILE: gitnexus/package.json ================================================ { "name": "gitnexus", "version": "1.4.7", "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.", "author": "Abhigyan Patwari", "license": "PolyForm-Noncommercial-1.0.0", "homepage": "https://github.com/abhigyanpatwari/GitNexus#readme", "repository": { "type": "git", "url": "git+https://github.com/abhigyanpatwari/GitNexus.git", "directory": "gitnexus" }, "bugs": { "url": "https://github.com/abhigyanpatwari/GitNexus/issues" }, "keywords": [ "mcp", "model-context-protocol", "code-intelligence", "knowledge-graph", "cursor", "claude", "ai-agent", "gitnexus", "static-analysis", "codebase-indexing" ], "type": "module", "bin": { "gitnexus": "dist/cli/index.js" }, "files": [ "dist", "hooks", "scripts", "skills", "vendor" ], "scripts": { "build": "tsc", "dev": "tsx watch src/cli/index.ts", "test": "vitest run", "test:unit": "vitest run test/unit", "test:integration": "vitest run test/integration", "test:watch": "vitest", "test:coverage": "vitest run --coverage", "prepare": "npm run build", "postinstall": "node scripts/patch-tree-sitter-swift.cjs", "prepack": "npm run build && chmod +x dist/cli/index.js" }, "dependencies": { "@huggingface/transformers": "^3.0.0", "@modelcontextprotocol/sdk": "^1.0.0", "cli-progress": "^3.12.0", "commander": "^12.0.0", "cors": "^2.8.5", "express": "^4.19.2", "glob": "^11.0.0", "graphology": "^0.25.4", "graphology-indices": "^0.17.0", "graphology-utils": "^2.3.0", "@ladybugdb/core": "^0.15.2", "ignore": "^7.0.5", "lru-cache": "^11.0.0", "mnemonist": "^0.39.0", "pandemonium": "^2.4.0", "tree-sitter": "^0.21.0", "tree-sitter-c": "^0.21.0", "tree-sitter-c-sharp": "^0.21.0", "tree-sitter-cpp": "^0.22.0", "tree-sitter-go": "^0.21.0", "tree-sitter-java": "^0.21.0", "tree-sitter-javascript": "^0.21.0", "tree-sitter-php": "^0.23.12", "tree-sitter-python": "^0.21.0", "tree-sitter-ruby": "^0.23.1", "tree-sitter-rust": "^0.21.0", "tree-sitter-typescript": "^0.21.0", "uuid": "^13.0.0" }, "optionalDependencies": { "tree-sitter-kotlin": "^0.3.8", "tree-sitter-swift": "^0.6.0" }, "devDependencies": { "@types/cli-progress": "^3.11.6", "@types/cors": "^2.8.17", "@types/express": "^4.17.21", "@types/node": "^20.0.0", "@types/uuid": "^10.0.0", "@vitest/coverage-v8": "^4.0.18", "tsx": "^4.0.0", "typescript": "^5.4.5", "vitest": "^4.0.18" }, "engines": { "node": ">=18.0.0" } } ================================================ FILE: gitnexus/scripts/patch-tree-sitter-swift.cjs ================================================ #!/usr/bin/env node /** * WORKAROUND: tree-sitter-swift@0.6.0 binding.gyp build failure * * Background: * tree-sitter-swift@0.6.0's binding.gyp contains an "actions" array that * invokes `tree-sitter generate` to regenerate parser.c from grammar.js. * This is intended for grammar developers, but the published npm package * already ships pre-generated parser files (parser.c, scanner.c), so the * actions are unnecessary for consumers. Since consumers don't have * tree-sitter-cli installed, the actions always fail during `npm install`. * * Why we can't just upgrade: * tree-sitter-swift@0.7.1 fixes this (removes postinstall, ships prebuilds), * but it requires tree-sitter@^0.22.1. The upstream project pins tree-sitter * to ^0.21.0 and all other grammar packages depend on that version. * Upgrading tree-sitter would be a separate breaking change. * * How this workaround works: * 1. tree-sitter-swift's own postinstall fails (npm warns but continues) * 2. This script runs as gitnexus's postinstall * 3. It removes the "actions" array from binding.gyp * 4. It rebuilds the native binding with the cleaned binding.gyp * * TODO: Remove this script when tree-sitter is upgraded to ^0.22.x, * which allows using tree-sitter-swift@0.7.1+ directly. */ const fs = require('fs'); const path = require('path'); const { execSync } = require('child_process'); const swiftDir = path.join(__dirname, '..', 'node_modules', 'tree-sitter-swift'); const bindingPath = path.join(swiftDir, 'binding.gyp'); try { if (!fs.existsSync(bindingPath)) { process.exit(0); } const content = fs.readFileSync(bindingPath, 'utf8'); let needsRebuild = false; if (content.includes('"actions"')) { // Strip Python-style comments (#) before JSON parsing const cleaned = content.replace(/#[^\n]*/g, ''); const gyp = JSON.parse(cleaned); if (gyp.targets && gyp.targets[0] && gyp.targets[0].actions) { delete gyp.targets[0].actions; fs.writeFileSync(bindingPath, JSON.stringify(gyp, null, 2) + '\n'); console.log('[tree-sitter-swift] Patched binding.gyp (removed actions array)'); needsRebuild = true; } } // Check if native binding exists const bindingNode = path.join(swiftDir, 'build', 'Release', 'tree_sitter_swift_binding.node'); if (!fs.existsSync(bindingNode)) { needsRebuild = true; } if (needsRebuild) { console.log('[tree-sitter-swift] Rebuilding native binding...'); execSync('npx node-gyp rebuild', { cwd: swiftDir, stdio: 'pipe', timeout: 120000, }); console.log('[tree-sitter-swift] Native binding built successfully'); } } catch (err) { console.warn('[tree-sitter-swift] Could not build native binding:', err.message); console.warn('[tree-sitter-swift] You may need to manually run: cd node_modules/tree-sitter-swift && npx node-gyp rebuild'); } ================================================ FILE: gitnexus/skills/gitnexus-cli.md ================================================ --- name: gitnexus-cli description: "Use when the user needs to run GitNexus CLI commands like analyze/index a repo, check status, clean the index, generate a wiki, or list indexed repos. Examples: \"Index this repo\", \"Reanalyze the codebase\", \"Generate a wiki\"" --- # GitNexus CLI Commands All commands work via `npx` — no global install required. ## Commands ### analyze — Build or refresh the index ```bash npx gitnexus analyze ``` Run from the project root. This parses all source files, builds the knowledge graph, writes it to `.gitnexus/`, and generates CLAUDE.md / AGENTS.md context files. | Flag | Effect | | -------------- | ---------------------------------------------------------------- | | `--force` | Force full re-index even if up to date | | `--embeddings` | Enable embedding generation for semantic search (off by default) | **When to run:** First time in a project, after major code changes, or when `gitnexus://repo/{name}/context` reports the index is stale. In Claude Code, a PostToolUse hook runs `analyze` automatically after `git commit` and `git merge`, preserving embeddings if previously generated. ### status — Check index freshness ```bash npx gitnexus status ``` Shows whether the current repo has a GitNexus index, when it was last updated, and symbol/relationship counts. Use this to check if re-indexing is needed. ### clean — Delete the index ```bash npx gitnexus clean ``` Deletes the `.gitnexus/` directory and unregisters the repo from the global registry. Use before re-indexing if the index is corrupt or after removing GitNexus from a project. | Flag | Effect | | --------- | ------------------------------------------------- | | `--force` | Skip confirmation prompt | | `--all` | Clean all indexed repos, not just the current one | ### wiki — Generate documentation from the graph ```bash npx gitnexus wiki ``` Generates repository documentation from the knowledge graph using an LLM. Requires an API key (saved to `~/.gitnexus/config.json` on first use). | Flag | Effect | | ------------------- | ----------------------------------------- | | `--force` | Force full regeneration | | `--model <model>` | LLM model (default: minimax/minimax-m2.5) | | `--base-url <url>` | LLM API base URL | | `--api-key <key>` | LLM API key | | `--concurrency <n>` | Parallel LLM calls (default: 3) | | `--gist` | Publish wiki as a public GitHub Gist | ### list — Show all indexed repos ```bash npx gitnexus list ``` Lists all repositories registered in `~/.gitnexus/registry.json`. The MCP `list_repos` tool provides the same information. ## After Indexing 1. **Read `gitnexus://repo/{name}/context`** to verify the index loaded 2. Use the other GitNexus skills (`exploring`, `debugging`, `impact-analysis`, `refactoring`) for your task ## Troubleshooting - **"Not inside a git repository"**: Run from a directory inside a git repo - **Index is stale after re-analyzing**: Restart Claude Code to reload the MCP server - **Embeddings slow**: Omit `--embeddings` (it's off by default) or set `OPENAI_API_KEY` for faster API-based embedding ================================================ FILE: gitnexus/skills/gitnexus-debugging.md ================================================ --- name: gitnexus-debugging description: "Use when the user is debugging a bug, tracing an error, or asking why something fails. Examples: \"Why is X failing?\", \"Where does this error come from?\", \"Trace this bug\"" --- # Debugging with GitNexus ## When to Use - "Why is this function failing?" - "Trace where this error comes from" - "Who calls this method?" - "This endpoint returns 500" - Investigating bugs, errors, or unexpected behavior ## Workflow ``` 1. gitnexus_query({query: "<error or symptom>"}) → Find related execution flows 2. gitnexus_context({name: "<suspect>"}) → See callers/callees/processes 3. READ gitnexus://repo/{name}/process/{name} → Trace execution flow 4. gitnexus_cypher({query: "MATCH path..."}) → Custom traces if needed ``` > If "Index is stale" → run `npx gitnexus analyze` in terminal. ## Checklist ``` - [ ] Understand the symptom (error message, unexpected behavior) - [ ] gitnexus_query for error text or related code - [ ] Identify the suspect function from returned processes - [ ] gitnexus_context to see callers and callees - [ ] Trace execution flow via process resource if applicable - [ ] gitnexus_cypher for custom call chain traces if needed - [ ] Read source files to confirm root cause ``` ## Debugging Patterns | Symptom | GitNexus Approach | | -------------------- | ---------------------------------------------------------- | | Error message | `gitnexus_query` for error text → `context` on throw sites | | Wrong return value | `context` on the function → trace callees for data flow | | Intermittent failure | `context` → look for external calls, async deps | | Performance issue | `context` → find symbols with many callers (hot paths) | | Recent regression | `detect_changes` to see what your changes affect | ## Tools **gitnexus_query** — find code related to error: ``` gitnexus_query({query: "payment validation error"}) → Processes: CheckoutFlow, ErrorHandling → Symbols: validatePayment, handlePaymentError, PaymentException ``` **gitnexus_context** — full context for a suspect: ``` gitnexus_context({name: "validatePayment"}) → Incoming calls: processCheckout, webhookHandler → Outgoing calls: verifyCard, fetchRates (external API!) → Processes: CheckoutFlow (step 3/7) ``` **gitnexus_cypher** — custom call chain traces: ```cypher MATCH path = (a)-[:CodeRelation {type: 'CALLS'}*1..2]->(b:Function {name: "validatePayment"}) RETURN [n IN nodes(path) | n.name] AS chain ``` ## Example: "Payment endpoint returns 500 intermittently" ``` 1. gitnexus_query({query: "payment error handling"}) → Processes: CheckoutFlow, ErrorHandling → Symbols: validatePayment, handlePaymentError 2. gitnexus_context({name: "validatePayment"}) → Outgoing calls: verifyCard, fetchRates (external API!) 3. READ gitnexus://repo/my-app/process/CheckoutFlow → Step 3: validatePayment → calls fetchRates (external) 4. Root cause: fetchRates calls external API without proper timeout ``` ================================================ FILE: gitnexus/skills/gitnexus-exploring.md ================================================ --- name: gitnexus-exploring description: "Use when the user asks how code works, wants to understand architecture, trace execution flows, or explore unfamiliar parts of the codebase. Examples: \"How does X work?\", \"What calls this function?\", \"Show me the auth flow\"" --- # Exploring Codebases with GitNexus ## When to Use - "How does authentication work?" - "What's the project structure?" - "Show me the main components" - "Where is the database logic?" - Understanding code you haven't seen before ## Workflow ``` 1. READ gitnexus://repos → Discover indexed repos 2. READ gitnexus://repo/{name}/context → Codebase overview, check staleness 3. gitnexus_query({query: "<what you want to understand>"}) → Find related execution flows 4. gitnexus_context({name: "<symbol>"}) → Deep dive on specific symbol 5. READ gitnexus://repo/{name}/process/{name} → Trace full execution flow ``` > If step 2 says "Index is stale" → run `npx gitnexus analyze` in terminal. ## Checklist ``` - [ ] READ gitnexus://repo/{name}/context - [ ] gitnexus_query for the concept you want to understand - [ ] Review returned processes (execution flows) - [ ] gitnexus_context on key symbols for callers/callees - [ ] READ process resource for full execution traces - [ ] Read source files for implementation details ``` ## Resources | Resource | What you get | | --------------------------------------- | ------------------------------------------------------- | | `gitnexus://repo/{name}/context` | Stats, staleness warning (~150 tokens) | | `gitnexus://repo/{name}/clusters` | All functional areas with cohesion scores (~300 tokens) | | `gitnexus://repo/{name}/cluster/{name}` | Area members with file paths (~500 tokens) | | `gitnexus://repo/{name}/process/{name}` | Step-by-step execution trace (~200 tokens) | ## Tools **gitnexus_query** — find execution flows related to a concept: ``` gitnexus_query({query: "payment processing"}) → Processes: CheckoutFlow, RefundFlow, WebhookHandler → Symbols grouped by flow with file locations ``` **gitnexus_context** — 360-degree view of a symbol: ``` gitnexus_context({name: "validateUser"}) → Incoming calls: loginHandler, apiMiddleware → Outgoing calls: checkToken, getUserById → Processes: LoginFlow (step 2/5), TokenRefresh (step 1/3) ``` ## Example: "How does payment processing work?" ``` 1. READ gitnexus://repo/my-app/context → 918 symbols, 45 processes 2. gitnexus_query({query: "payment processing"}) → CheckoutFlow: processPayment → validateCard → chargeStripe → RefundFlow: initiateRefund → calculateRefund → processRefund 3. gitnexus_context({name: "processPayment"}) → Incoming: checkoutHandler, webhookHandler → Outgoing: validateCard, chargeStripe, saveTransaction 4. Read src/payments/processor.ts for implementation details ``` ================================================ FILE: gitnexus/skills/gitnexus-guide.md ================================================ --- name: gitnexus-guide description: "Use when the user asks about GitNexus itself — available tools, how to query the knowledge graph, MCP resources, graph schema, or workflow reference. Examples: \"What GitNexus tools are available?\", \"How do I use GitNexus?\"" --- # GitNexus Guide Quick reference for all GitNexus MCP tools, resources, and the knowledge graph schema. ## Always Start Here For any task involving code understanding, debugging, impact analysis, or refactoring: 1. **Read `gitnexus://repo/{name}/context`** — codebase overview + check index freshness 2. **Match your task to a skill below** and **read that skill file** 3. **Follow the skill's workflow and checklist** > If step 1 warns the index is stale, run `npx gitnexus analyze` in the terminal first. ## Skills | Task | Skill to read | | -------------------------------------------- | ------------------- | | Understand architecture / "How does X work?" | `gitnexus-exploring` | | Blast radius / "What breaks if I change X?" | `gitnexus-impact-analysis` | | Trace bugs / "Why is X failing?" | `gitnexus-debugging` | | Rename / extract / split / refactor | `gitnexus-refactoring` | | Tools, resources, schema reference | `gitnexus-guide` (this file) | | Index, status, clean, wiki CLI commands | `gitnexus-cli` | ## Tools Reference | Tool | What it gives you | | ---------------- | ------------------------------------------------------------------------ | | `query` | Process-grouped code intelligence — execution flows related to a concept | | `context` | 360-degree symbol view — categorized refs, processes it participates in | | `impact` | Symbol blast radius — what breaks at depth 1/2/3 with confidence | | `detect_changes` | Git-diff impact — what do your current changes affect | | `rename` | Multi-file coordinated rename with confidence-tagged edits | | `cypher` | Raw graph queries (read `gitnexus://repo/{name}/schema` first) | | `list_repos` | Discover indexed repos | ## Resources Reference Lightweight reads (~100-500 tokens) for navigation: | Resource | Content | | ---------------------------------------------- | ----------------------------------------- | | `gitnexus://repo/{name}/context` | Stats, staleness check | | `gitnexus://repo/{name}/clusters` | All functional areas with cohesion scores | | `gitnexus://repo/{name}/cluster/{clusterName}` | Area members | | `gitnexus://repo/{name}/processes` | All execution flows | | `gitnexus://repo/{name}/process/{processName}` | Step-by-step trace | | `gitnexus://repo/{name}/schema` | Graph schema for Cypher | ## Graph Schema **Nodes:** File, Function, Class, Interface, Method, Community, Process **Edges (via CodeRelation.type):** CALLS, IMPORTS, EXTENDS, IMPLEMENTS, DEFINES, MEMBER_OF, STEP_IN_PROCESS ```cypher MATCH (caller)-[:CodeRelation {type: 'CALLS'}]->(f:Function {name: "myFunc"}) RETURN caller.name, caller.filePath ``` ================================================ FILE: gitnexus/skills/gitnexus-impact-analysis.md ================================================ --- name: gitnexus-impact-analysis description: "Use when the user wants to know what will break if they change something, or needs safety analysis before editing code. Examples: \"Is it safe to change X?\", \"What depends on this?\", \"What will break?\"" --- # Impact Analysis with GitNexus ## When to Use - "Is it safe to change this function?" - "What will break if I modify X?" - "Show me the blast radius" - "Who uses this code?" - Before making non-trivial code changes - Before committing — to understand what your changes affect ## Workflow ``` 1. gitnexus_impact({target: "X", direction: "upstream"}) → What depends on this 2. READ gitnexus://repo/{name}/processes → Check affected execution flows 3. gitnexus_detect_changes() → Map current git changes to affected flows 4. Assess risk and report to user ``` > If "Index is stale" → run `npx gitnexus analyze` in terminal. ## Checklist ``` - [ ] gitnexus_impact({target, direction: "upstream"}) to find dependents - [ ] Review d=1 items first (these WILL BREAK) - [ ] Check high-confidence (>0.8) dependencies - [ ] READ processes to check affected execution flows - [ ] gitnexus_detect_changes() for pre-commit check - [ ] Assess risk level and report to user ``` ## Understanding Output | Depth | Risk Level | Meaning | | ----- | ---------------- | ------------------------ | | d=1 | **WILL BREAK** | Direct callers/importers | | d=2 | LIKELY AFFECTED | Indirect dependencies | | d=3 | MAY NEED TESTING | Transitive effects | ## Risk Assessment | Affected | Risk | | ------------------------------ | -------- | | <5 symbols, few processes | LOW | | 5-15 symbols, 2-5 processes | MEDIUM | | >15 symbols or many processes | HIGH | | Critical path (auth, payments) | CRITICAL | ## Tools **gitnexus_impact** — the primary tool for symbol blast radius: ``` gitnexus_impact({ target: "validateUser", direction: "upstream", minConfidence: 0.8, maxDepth: 3 }) → d=1 (WILL BREAK): - loginHandler (src/auth/login.ts:42) [CALLS, 100%] - apiMiddleware (src/api/middleware.ts:15) [CALLS, 100%] → d=2 (LIKELY AFFECTED): - authRouter (src/routes/auth.ts:22) [CALLS, 95%] ``` **gitnexus_detect_changes** — git-diff based impact analysis: ``` gitnexus_detect_changes({scope: "staged"}) → Changed: 5 symbols in 3 files → Affected: LoginFlow, TokenRefresh, APIMiddlewarePipeline → Risk: MEDIUM ``` ## Example: "What breaks if I change validateUser?" ``` 1. gitnexus_impact({target: "validateUser", direction: "upstream"}) → d=1: loginHandler, apiMiddleware (WILL BREAK) → d=2: authRouter, sessionManager (LIKELY AFFECTED) 2. READ gitnexus://repo/my-app/processes → LoginFlow and TokenRefresh touch validateUser 3. Risk: 2 direct callers, 2 processes = MEDIUM ``` ================================================ FILE: gitnexus/skills/gitnexus-pr-review.md ================================================ --- name: gitnexus-pr-review description: "Use when the user wants to review a pull request, understand what a PR changes, assess risk of merging, or check for missing test coverage. Examples: \"Review this PR\", \"What does PR #42 change?\", \"Is this PR safe to merge?\"" --- # PR Review with GitNexus ## When to Use - "Review this PR" - "What does PR #42 change?" - "Is this safe to merge?" - "What's the blast radius of this PR?" - "Are there missing tests for this PR?" - Reviewing someone else's code changes before merge ## Workflow ``` 1. gh pr diff <number> → Get the raw diff 2. gitnexus_detect_changes({scope: "compare", base_ref: "main"}) → Map diff to affected flows 3. For each changed symbol: gitnexus_impact({target: "<symbol>", direction: "upstream"}) → Blast radius per change 4. gitnexus_context({name: "<key symbol>"}) → Understand callers/callees 5. READ gitnexus://repo/{name}/processes → Check affected execution flows 6. Summarize findings with risk assessment ``` > If "Index is stale" → run `npx gitnexus analyze` in terminal before reviewing. ## Checklist ``` - [ ] Fetch PR diff (gh pr diff or git diff base...head) - [ ] gitnexus_detect_changes to map changes to affected execution flows - [ ] gitnexus_impact on each non-trivial changed symbol - [ ] Review d=1 items (WILL BREAK) — are callers updated? - [ ] gitnexus_context on key changed symbols to understand full picture - [ ] Check if affected processes have test coverage - [ ] Assess overall risk level - [ ] Write review summary with findings ``` ## Review Dimensions | Dimension | How GitNexus Helps | | --- | --- | | **Correctness** | `context` shows callers — are they all compatible with the change? | | **Blast radius** | `impact` shows d=1/d=2/d=3 dependents — anything missed? | | **Completeness** | `detect_changes` shows all affected flows — are they all handled? | | **Test coverage** | `impact({includeTests: true})` shows which tests touch changed code | | **Breaking changes** | d=1 upstream items that aren't updated in the PR = potential breakage | ## Risk Assessment | Signal | Risk | | --- | --- | | Changes touch <3 symbols, 0-1 processes | LOW | | Changes touch 3-10 symbols, 2-5 processes | MEDIUM | | Changes touch >10 symbols or many processes | HIGH | | Changes touch auth, payments, or data integrity code | CRITICAL | | d=1 callers exist outside the PR diff | Potential breakage — flag it | ## Tools **gitnexus_detect_changes** — map PR diff to affected execution flows: ``` gitnexus_detect_changes({scope: "compare", base_ref: "main"}) → Changed: 8 symbols in 4 files → Affected processes: CheckoutFlow, RefundFlow, WebhookHandler → Risk: MEDIUM ``` **gitnexus_impact** — blast radius per changed symbol: ``` gitnexus_impact({target: "validatePayment", direction: "upstream"}) → d=1 (WILL BREAK): - processCheckout (src/checkout.ts:42) [CALLS, 100%] - webhookHandler (src/webhooks.ts:15) [CALLS, 100%] → d=2 (LIKELY AFFECTED): - checkoutRouter (src/routes/checkout.ts:22) [CALLS, 95%] ``` **gitnexus_impact with tests** — check test coverage: ``` gitnexus_impact({target: "validatePayment", direction: "upstream", includeTests: true}) → Tests that cover this symbol: - validatePayment.test.ts [direct] - checkout.integration.test.ts [via processCheckout] ``` **gitnexus_context** — understand a changed symbol's role: ``` gitnexus_context({name: "validatePayment"}) → Incoming calls: processCheckout, webhookHandler → Outgoing calls: verifyCard, fetchRates → Processes: CheckoutFlow (step 3/7), RefundFlow (step 1/5) ``` ## Example: "Review PR #42" ``` 1. gh pr diff 42 > /tmp/pr42.diff → 4 files changed: payments.ts, checkout.ts, types.ts, utils.ts 2. gitnexus_detect_changes({scope: "compare", base_ref: "main"}) → Changed symbols: validatePayment, PaymentInput, formatAmount → Affected processes: CheckoutFlow, RefundFlow → Risk: MEDIUM 3. gitnexus_impact({target: "validatePayment", direction: "upstream"}) → d=1: processCheckout, webhookHandler (WILL BREAK) → webhookHandler is NOT in the PR diff — potential breakage! 4. gitnexus_impact({target: "PaymentInput", direction: "upstream"}) → d=1: validatePayment (in PR), createPayment (NOT in PR) → createPayment uses the old PaymentInput shape — breaking change! 5. gitnexus_context({name: "formatAmount"}) → Called by 12 functions — but change is backwards-compatible (added optional param) 6. Review summary: - MEDIUM risk — 3 changed symbols affect 2 execution flows - BUG: webhookHandler calls validatePayment but isn't updated for new signature - BUG: createPayment depends on PaymentInput type which changed - OK: formatAmount change is backwards-compatible - Tests: checkout.test.ts covers processCheckout path, but no webhook test ``` ## Review Output Format Structure your review as: ```markdown ## PR Review: <title> **Risk: LOW / MEDIUM / HIGH / CRITICAL** ### Changes Summary - <N> symbols changed across <M> files - <P> execution flows affected ### Findings 1. **[severity]** Description of finding - Evidence from GitNexus tools - Affected callers/flows ### Missing Coverage - Callers not updated in PR: ... - Untested flows: ... ### Recommendation APPROVE / REQUEST CHANGES / NEEDS DISCUSSION ``` ================================================ FILE: gitnexus/skills/gitnexus-refactoring.md ================================================ --- name: gitnexus-refactoring description: "Use when the user wants to rename, extract, split, move, or restructure code safely. Examples: \"Rename this function\", \"Extract this into a module\", \"Refactor this class\", \"Move this to a separate file\"" --- # Refactoring with GitNexus ## When to Use - "Rename this function safely" - "Extract this into a module" - "Split this service" - "Move this to a new file" - Any task involving renaming, extracting, splitting, or restructuring code ## Workflow ``` 1. gitnexus_impact({target: "X", direction: "upstream"}) → Map all dependents 2. gitnexus_query({query: "X"}) → Find execution flows involving X 3. gitnexus_context({name: "X"}) → See all incoming/outgoing refs 4. Plan update order: interfaces → implementations → callers → tests ``` > If "Index is stale" → run `npx gitnexus analyze` in terminal. ## Checklists ### Rename Symbol ``` - [ ] gitnexus_rename({symbol_name: "oldName", new_name: "newName", dry_run: true}) — preview all edits - [ ] Review graph edits (high confidence) and ast_search edits (review carefully) - [ ] If satisfied: gitnexus_rename({..., dry_run: false}) — apply edits - [ ] gitnexus_detect_changes() — verify only expected files changed - [ ] Run tests for affected processes ``` ### Extract Module ``` - [ ] gitnexus_context({name: target}) — see all incoming/outgoing refs - [ ] gitnexus_impact({target, direction: "upstream"}) — find all external callers - [ ] Define new module interface - [ ] Extract code, update imports - [ ] gitnexus_detect_changes() — verify affected scope - [ ] Run tests for affected processes ``` ### Split Function/Service ``` - [ ] gitnexus_context({name: target}) — understand all callees - [ ] Group callees by responsibility - [ ] gitnexus_impact({target, direction: "upstream"}) — map callers to update - [ ] Create new functions/services - [ ] Update callers - [ ] gitnexus_detect_changes() — verify affected scope - [ ] Run tests for affected processes ``` ## Tools **gitnexus_rename** — automated multi-file rename: ``` gitnexus_rename({symbol_name: "validateUser", new_name: "authenticateUser", dry_run: true}) → 12 edits across 8 files → 10 graph edits (high confidence), 2 ast_search edits (review) → Changes: [{file_path, edits: [{line, old_text, new_text, confidence}]}] ``` **gitnexus_impact** — map all dependents first: ``` gitnexus_impact({target: "validateUser", direction: "upstream"}) → d=1: loginHandler, apiMiddleware, testUtils → Affected Processes: LoginFlow, TokenRefresh ``` **gitnexus_detect_changes** — verify your changes after refactoring: ``` gitnexus_detect_changes({scope: "all"}) → Changed: 8 files, 12 symbols → Affected processes: LoginFlow, TokenRefresh → Risk: MEDIUM ``` **gitnexus_cypher** — custom reference queries: ```cypher MATCH (caller)-[:CodeRelation {type: 'CALLS'}]->(f:Function {name: "validateUser"}) RETURN caller.name, caller.filePath ORDER BY caller.filePath ``` ## Risk Rules | Risk Factor | Mitigation | | ------------------- | ----------------------------------------- | | Many callers (>5) | Use gitnexus_rename for automated updates | | Cross-area refs | Use detect_changes after to verify scope | | String/dynamic refs | gitnexus_query to find them | | External/public API | Version and deprecate properly | ## Example: Rename `validateUser` to `authenticateUser` ``` 1. gitnexus_rename({symbol_name: "validateUser", new_name: "authenticateUser", dry_run: true}) → 12 edits: 10 graph (safe), 2 ast_search (review) → Files: validator.ts, login.ts, middleware.ts, config.json... 2. Review ast_search edits (config.json: dynamic reference!) 3. gitnexus_rename({symbol_name: "validateUser", new_name: "authenticateUser", dry_run: false}) → Applied 12 edits across 8 files 4. gitnexus_detect_changes({scope: "all"}) → Affected: LoginFlow, TokenRefresh → Risk: MEDIUM — run tests for these flows ``` ================================================ FILE: gitnexus/src/cli/ai-context.ts ================================================ /** * AI Context Generator * * Creates AGENTS.md and CLAUDE.md with full inline GitNexus context. * AGENTS.md is the standard read by Cursor, Windsurf, OpenCode, Cline, etc. * CLAUDE.md is for Claude Code which only reads that file. */ import fs from 'fs/promises'; import path from 'path'; import { fileURLToPath } from 'url'; import { type GeneratedSkillInfo } from './skill-gen.js'; // ESM equivalent of __dirname const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); interface RepoStats { files?: number; nodes?: number; edges?: number; communities?: number; clusters?: number; // Aggregated cluster count (what tools show) processes?: number; } const GITNEXUS_START_MARKER = '<!-- gitnexus:start -->'; const GITNEXUS_END_MARKER = '<!-- gitnexus:end -->'; /** * Generate the full GitNexus context content. * * Design principles (learned from real agent behavior and industry research): * - Inline critical workflows — skills are skipped 56% of the time (Vercel eval data) * - Use RFC 2119 language (MUST, NEVER, ALWAYS) — models follow imperative rules * - Three-tier boundaries (Always/When/Never) — proven to change model behavior * - Keep under 120 lines — adherence degrades past 150 lines * - Exact tool commands with parameters — vague directives get ignored * - Self-review checklist — forces model to verify its own work */ function generateGitNexusContent(projectName: string, stats: RepoStats, generatedSkills?: GeneratedSkillInfo[]): string { const generatedRows = (generatedSkills && generatedSkills.length > 0) ? generatedSkills.map(s => `| Work in the ${s.label} area (${s.symbolCount} symbols) | \`.claude/skills/generated/${s.name}/SKILL.md\` |` ).join('\n') : ''; const skillsTable = `| Task | Read this skill file | |------|---------------------| | Understand architecture / "How does X work?" | \`.claude/skills/gitnexus/gitnexus-exploring/SKILL.md\` | | Blast radius / "What breaks if I change X?" | \`.claude/skills/gitnexus/gitnexus-impact-analysis/SKILL.md\` | | Trace bugs / "Why is X failing?" | \`.claude/skills/gitnexus/gitnexus-debugging/SKILL.md\` | | Rename / extract / split / refactor | \`.claude/skills/gitnexus/gitnexus-refactoring/SKILL.md\` | | Tools, resources, schema reference | \`.claude/skills/gitnexus/gitnexus-guide/SKILL.md\` | | Index, status, clean, wiki CLI commands | \`.claude/skills/gitnexus/gitnexus-cli/SKILL.md\` |${generatedRows ? '\n' + generatedRows : ''}`; return `${GITNEXUS_START_MARKER} # GitNexus — Code Intelligence This project is indexed by GitNexus as **${projectName}** (${stats.nodes || 0} symbols, ${stats.edges || 0} relationships, ${stats.processes || 0} execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely. > If any GitNexus tool warns the index is stale, run \`npx gitnexus analyze\` in terminal first. ## Always Do - **MUST run impact analysis before editing any symbol.** Before modifying a function, class, or method, run \`gitnexus_impact({target: "symbolName", direction: "upstream"})\` and report the blast radius (direct callers, affected processes, risk level) to the user. - **MUST run \`gitnexus_detect_changes()\` before committing** to verify your changes only affect expected symbols and execution flows. - **MUST warn the user** if impact analysis returns HIGH or CRITICAL risk before proceeding with edits. - When exploring unfamiliar code, use \`gitnexus_query({query: "concept"})\` to find execution flows instead of grepping. It returns process-grouped results ranked by relevance. - When you need full context on a specific symbol — callers, callees, which execution flows it participates in — use \`gitnexus_context({name: "symbolName"})\`. ## When Debugging 1. \`gitnexus_query({query: "<error or symptom>"})\` — find execution flows related to the issue 2. \`gitnexus_context({name: "<suspect function>"})\` — see all callers, callees, and process participation 3. \`READ gitnexus://repo/${projectName}/process/{processName}\` — trace the full execution flow step by step 4. For regressions: \`gitnexus_detect_changes({scope: "compare", base_ref: "main"})\` — see what your branch changed ## When Refactoring - **Renaming**: MUST use \`gitnexus_rename({symbol_name: "old", new_name: "new", dry_run: true})\` first. Review the preview — graph edits are safe, text_search edits need manual review. Then run with \`dry_run: false\`. - **Extracting/Splitting**: MUST run \`gitnexus_context({name: "target"})\` to see all incoming/outgoing refs, then \`gitnexus_impact({target: "target", direction: "upstream"})\` to find all external callers before moving code. - After any refactor: run \`gitnexus_detect_changes({scope: "all"})\` to verify only expected files changed. ## Never Do - NEVER edit a function, class, or method without first running \`gitnexus_impact\` on it. - NEVER ignore HIGH or CRITICAL risk warnings from impact analysis. - NEVER rename symbols with find-and-replace — use \`gitnexus_rename\` which understands the call graph. - NEVER commit changes without running \`gitnexus_detect_changes()\` to check affected scope. ## Tools Quick Reference | Tool | When to use | Command | |------|-------------|---------| | \`query\` | Find code by concept | \`gitnexus_query({query: "auth validation"})\` | | \`context\` | 360-degree view of one symbol | \`gitnexus_context({name: "validateUser"})\` | | \`impact\` | Blast radius before editing | \`gitnexus_impact({target: "X", direction: "upstream"})\` | | \`detect_changes\` | Pre-commit scope check | \`gitnexus_detect_changes({scope: "staged"})\` | | \`rename\` | Safe multi-file rename | \`gitnexus_rename({symbol_name: "old", new_name: "new", dry_run: true})\` | | \`cypher\` | Custom graph queries | \`gitnexus_cypher({query: "MATCH ..."})\` | ## Impact Risk Levels | Depth | Meaning | Action | |-------|---------|--------| | d=1 | WILL BREAK — direct callers/importers | MUST update these | | d=2 | LIKELY AFFECTED — indirect deps | Should test | | d=3 | MAY NEED TESTING — transitive | Test if critical path | ## Resources | Resource | Use for | |----------|---------| | \`gitnexus://repo/${projectName}/context\` | Codebase overview, check index freshness | | \`gitnexus://repo/${projectName}/clusters\` | All functional areas | | \`gitnexus://repo/${projectName}/processes\` | All execution flows | | \`gitnexus://repo/${projectName}/process/{name}\` | Step-by-step execution trace | ## Self-Check Before Finishing Before completing any code modification task, verify: 1. \`gitnexus_impact\` was run for all modified symbols 2. No HIGH/CRITICAL risk warnings were ignored 3. \`gitnexus_detect_changes()\` confirms changes match expected scope 4. All d=1 (WILL BREAK) dependents were updated ## Keeping the Index Fresh After committing code changes, the GitNexus index becomes stale. Re-run analyze to update it: \`\`\`bash npx gitnexus analyze \`\`\` If the index previously included embeddings, preserve them by adding \`--embeddings\`: \`\`\`bash npx gitnexus analyze --embeddings \`\`\` To check whether embeddings exist, inspect \`.gitnexus/meta.json\` — the \`stats.embeddings\` field shows the count (0 means no embeddings). **Running analyze without \`--embeddings\` will delete any previously generated embeddings.** > Claude Code users: A PostToolUse hook handles this automatically after \`git commit\` and \`git merge\`. ## CLI ${skillsTable} ${GITNEXUS_END_MARKER}`; } /** * Check if a file exists */ async function fileExists(filePath: string): Promise<boolean> { try { await fs.access(filePath); return true; } catch { return false; } } /** * Create or update GitNexus section in a file * - If file doesn't exist: create with GitNexus content * - If file exists without GitNexus section: append * - If file exists with GitNexus section: replace that section */ async function upsertGitNexusSection( filePath: string, content: string ): Promise<'created' | 'updated' | 'appended'> { const exists = await fileExists(filePath); if (!exists) { await fs.writeFile(filePath, content, 'utf-8'); return 'created'; } const existingContent = await fs.readFile(filePath, 'utf-8'); // Check if GitNexus section already exists const startIdx = existingContent.indexOf(GITNEXUS_START_MARKER); const endIdx = existingContent.indexOf(GITNEXUS_END_MARKER); if (startIdx !== -1 && endIdx !== -1 && endIdx > startIdx) { // Replace existing section const before = existingContent.substring(0, startIdx); const after = existingContent.substring(endIdx + GITNEXUS_END_MARKER.length); const newContent = before + content + after; await fs.writeFile(filePath, newContent.trim() + '\n', 'utf-8'); return 'updated'; } // Append new section const newContent = existingContent.trim() + '\n\n' + content + '\n'; await fs.writeFile(filePath, newContent, 'utf-8'); return 'appended'; } /** * Install GitNexus skills to .claude/skills/gitnexus/ * Works natively with Claude Code, Cursor, and GitHub Copilot */ async function installSkills(repoPath: string): Promise<string[]> { const skillsDir = path.join(repoPath, '.claude', 'skills', 'gitnexus'); const installedSkills: string[] = []; // Skill definitions bundled with the package const skills = [ { name: 'gitnexus-exploring', description: 'Use when the user asks how code works, wants to understand architecture, trace execution flows, or explore unfamiliar parts of the codebase. Examples: "How does X work?", "What calls this function?", "Show me the auth flow"', }, { name: 'gitnexus-debugging', description: 'Use when the user is debugging a bug, tracing an error, or asking why something fails. Examples: "Why is X failing?", "Where does this error come from?", "Trace this bug"', }, { name: 'gitnexus-impact-analysis', description: 'Use when the user wants to know what will break if they change something, or needs safety analysis before editing code. Examples: "Is it safe to change X?", "What depends on this?", "What will break?"', }, { name: 'gitnexus-refactoring', description: 'Use when the user wants to rename, extract, split, move, or restructure code safely. Examples: "Rename this function", "Extract this into a module", "Refactor this class", "Move this to a separate file"', }, { name: 'gitnexus-guide', description: 'Use when the user asks about GitNexus itself — available tools, how to query the knowledge graph, MCP resources, graph schema, or workflow reference. Examples: "What GitNexus tools are available?", "How do I use GitNexus?"', }, { name: 'gitnexus-cli', description: 'Use when the user needs to run GitNexus CLI commands like analyze/index a repo, check status, clean the index, generate a wiki, or list indexed repos. Examples: "Index this repo", "Reanalyze the codebase", "Generate a wiki"', }, ]; for (const skill of skills) { const skillDir = path.join(skillsDir, skill.name); const skillPath = path.join(skillDir, 'SKILL.md'); try { // Create skill directory await fs.mkdir(skillDir, { recursive: true }); // Try to read from package skills directory const packageSkillPath = path.join(__dirname, '..', '..', 'skills', `${skill.name}.md`); let skillContent: string; try { skillContent = await fs.readFile(packageSkillPath, 'utf-8'); } catch { // Fallback: generate minimal skill content skillContent = `--- name: ${skill.name} description: ${skill.description} --- # ${skill.name.charAt(0).toUpperCase() + skill.name.slice(1)} ${skill.description} Use GitNexus tools to accomplish this task. `; } await fs.writeFile(skillPath, skillContent, 'utf-8'); installedSkills.push(skill.name); } catch (err) { // Skip on error, don't fail the whole process console.warn(`Warning: Could not install skill ${skill.name}:`, err); } } return installedSkills; } /** * Generate AI context files after indexing */ export async function generateAIContextFiles( repoPath: string, _storagePath: string, projectName: string, stats: RepoStats, generatedSkills?: GeneratedSkillInfo[] ): Promise<{ files: string[] }> { const content = generateGitNexusContent(projectName, stats, generatedSkills); const createdFiles: string[] = []; // Create AGENTS.md (standard for Cursor, Windsurf, OpenCode, Cline, etc.) const agentsPath = path.join(repoPath, 'AGENTS.md'); const agentsResult = await upsertGitNexusSection(agentsPath, content); createdFiles.push(`AGENTS.md (${agentsResult})`); // Create CLAUDE.md (for Claude Code) const claudePath = path.join(repoPath, 'CLAUDE.md'); const claudeResult = await upsertGitNexusSection(claudePath, content); createdFiles.push(`CLAUDE.md (${claudeResult})`); // Install skills to .claude/skills/gitnexus/ const installedSkills = await installSkills(repoPath); if (installedSkills.length > 0) { createdFiles.push(`.claude/skills/gitnexus/ (${installedSkills.length} skills)`); } return { files: createdFiles }; } ================================================ FILE: gitnexus/src/cli/analyze.ts ================================================ /** * Analyze Command * * Indexes a repository and stores the knowledge graph in .gitnexus/ */ import path from 'path'; import { execFileSync } from 'child_process'; import v8 from 'v8'; import cliProgress from 'cli-progress'; import { runPipelineFromRepo } from '../core/ingestion/pipeline.js'; import { initLbug, loadGraphToLbug, getLbugStats, executeQuery, executeWithReusedStatement, closeLbug, createFTSIndex, loadCachedEmbeddings } from '../core/lbug/lbug-adapter.js'; // Embedding imports are lazy (dynamic import) so onnxruntime-node is never // loaded when embeddings are not requested. This avoids crashes on Node // versions whose ABI is not yet supported by the native binary (#89). // disposeEmbedder intentionally not called — ONNX Runtime segfaults on cleanup (see #38) import { getStoragePaths, saveMeta, loadMeta, addToGitignore, registerRepo, getGlobalRegistryPath, cleanupOldKuzuFiles } from '../storage/repo-manager.js'; import { getCurrentCommit, isGitRepo, getGitRoot } from '../storage/git.js'; import { generateAIContextFiles } from './ai-context.js'; import { generateSkillFiles, type GeneratedSkillInfo } from './skill-gen.js'; import fs from 'fs/promises'; const HEAP_MB = 8192; const HEAP_FLAG = `--max-old-space-size=${HEAP_MB}`; /** Re-exec the process with an 8GB heap if we're currently below that. */ function ensureHeap(): boolean { const nodeOpts = process.env.NODE_OPTIONS || ''; if (nodeOpts.includes('--max-old-space-size')) return false; const v8Heap = v8.getHeapStatistics().heap_size_limit; if (v8Heap >= HEAP_MB * 1024 * 1024 * 0.9) return false; try { execFileSync(process.execPath, [HEAP_FLAG, ...process.argv.slice(1)], { stdio: 'inherit', env: { ...process.env, NODE_OPTIONS: `${nodeOpts} ${HEAP_FLAG}`.trim() }, }); } catch (e: any) { process.exitCode = e.status ?? 1; } return true; } export interface AnalyzeOptions { force?: boolean; embeddings?: boolean; skills?: boolean; verbose?: boolean; } /** Threshold: auto-skip embeddings for repos with more nodes than this */ const EMBEDDING_NODE_LIMIT = 50_000; const PHASE_LABELS: Record<string, string> = { extracting: 'Scanning files', structure: 'Building structure', parsing: 'Parsing code', imports: 'Resolving imports', calls: 'Tracing calls', heritage: 'Extracting inheritance', communities: 'Detecting communities', processes: 'Detecting processes', complete: 'Pipeline complete', lbug: 'Loading into LadybugDB', fts: 'Creating search indexes', embeddings: 'Generating embeddings', done: 'Done', }; export const analyzeCommand = async ( inputPath?: string, options?: AnalyzeOptions ) => { if (ensureHeap()) return; if (options?.verbose) { process.env.GITNEXUS_VERBOSE = '1'; } console.log('\n GitNexus Analyzer\n'); let repoPath: string; if (inputPath) { repoPath = path.resolve(inputPath); } else { const gitRoot = getGitRoot(process.cwd()); if (!gitRoot) { console.log(' Not inside a git repository\n'); process.exitCode = 1; return; } repoPath = gitRoot; } if (!isGitRepo(repoPath)) { console.log(' Not a git repository\n'); process.exitCode = 1; return; } const { storagePath, lbugPath } = getStoragePaths(repoPath); // Clean up stale KuzuDB files from before the LadybugDB migration. // If kuzu existed but lbug doesn't, we're doing a migration re-index — say so. const kuzuResult = await cleanupOldKuzuFiles(storagePath); if (kuzuResult.found && kuzuResult.needsReindex) { console.log(' Migrating from KuzuDB to LadybugDB — rebuilding index...\n'); } const currentCommit = getCurrentCommit(repoPath); const existingMeta = await loadMeta(storagePath); if (existingMeta && !options?.force && !options?.skills && existingMeta.lastCommit === currentCommit) { console.log(' Already up to date\n'); return; } if (process.env.GITNEXUS_NO_GITIGNORE) { console.log(' GITNEXUS_NO_GITIGNORE is set — skipping .gitignore (still reading .gitnexusignore)\n'); } // Single progress bar for entire pipeline const bar = new cliProgress.SingleBar({ format: ' {bar} {percentage}% | {phase}', barCompleteChar: '\u2588', barIncompleteChar: '\u2591', hideCursor: true, barGlue: '', autopadding: true, clearOnComplete: false, stopOnComplete: false, }, cliProgress.Presets.shades_grey); bar.start(100, 0, { phase: 'Initializing...' }); // Graceful SIGINT handling — clean up resources and exit let aborted = false; const sigintHandler = () => { if (aborted) process.exit(1); // Second Ctrl-C: force exit aborted = true; bar.stop(); console.log('\n Interrupted — cleaning up...'); closeLbug().catch(() => {}).finally(() => process.exit(130)); }; process.on('SIGINT', sigintHandler); // Route all console output through bar.log() so the bar doesn't stamp itself // multiple times when other code writes to stdout/stderr mid-render. const origLog = console.log.bind(console); const origWarn = console.warn.bind(console); const origError = console.error.bind(console); const barLog = (...args: any[]) => { // Clear the bar line, print the message, then let the next bar.update redraw process.stdout.write('\x1b[2K\r'); origLog(args.map(a => (typeof a === 'string' ? a : String(a))).join(' ')); }; console.log = barLog; console.warn = barLog; console.error = barLog; // Track elapsed time per phase — both updateBar and the interval use the // same format so they don't flicker against each other. let lastPhaseLabel = 'Initializing...'; let phaseStart = Date.now(); /** Update bar with phase label + elapsed seconds (shown after 3s). */ const updateBar = (value: number, phaseLabel: string) => { if (phaseLabel !== lastPhaseLabel) { lastPhaseLabel = phaseLabel; phaseStart = Date.now(); } const elapsed = Math.round((Date.now() - phaseStart) / 1000); const display = elapsed >= 3 ? `${phaseLabel} (${elapsed}s)` : phaseLabel; bar.update(value, { phase: display }); }; // Tick elapsed seconds for phases with infrequent progress callbacks // (e.g. CSV streaming, FTS indexing). Uses the same display format as // updateBar so there's no flickering. const elapsedTimer = setInterval(() => { const elapsed = Math.round((Date.now() - phaseStart) / 1000); if (elapsed >= 3) { bar.update({ phase: `${lastPhaseLabel} (${elapsed}s)` }); } }, 1000); const t0Global = Date.now(); // ── Cache embeddings from existing index before rebuild ──────────── let cachedEmbeddingNodeIds = new Set<string>(); let cachedEmbeddings: Array<{ nodeId: string; embedding: number[] }> = []; if (options?.embeddings && existingMeta && !options?.force) { try { updateBar(0, 'Caching embeddings...'); await initLbug(lbugPath); const cached = await loadCachedEmbeddings(); cachedEmbeddingNodeIds = cached.embeddingNodeIds; cachedEmbeddings = cached.embeddings; await closeLbug(); } catch { try { await closeLbug(); } catch {} } } // ── Phase 1: Full Pipeline (0–60%) ───────────────────────────────── const pipelineResult = await runPipelineFromRepo(repoPath, (progress) => { const phaseLabel = PHASE_LABELS[progress.phase] || progress.phase; const scaled = Math.round(progress.percent * 0.6); updateBar(scaled, phaseLabel); }); // ── Phase 2: LadybugDB (60–85%) ────────────────────────────────────── updateBar(60, 'Loading into LadybugDB...'); await closeLbug(); const lbugFiles = [lbugPath, `${lbugPath}.wal`, `${lbugPath}.lock`]; for (const f of lbugFiles) { try { await fs.rm(f, { recursive: true, force: true }); } catch {} } const t0Lbug = Date.now(); await initLbug(lbugPath); let lbugMsgCount = 0; const lbugResult = await loadGraphToLbug(pipelineResult.graph, pipelineResult.repoPath, storagePath, (msg) => { lbugMsgCount++; const progress = Math.min(84, 60 + Math.round((lbugMsgCount / (lbugMsgCount + 10)) * 24)); updateBar(progress, msg); }); const lbugTime = ((Date.now() - t0Lbug) / 1000).toFixed(1); const lbugWarnings = lbugResult.warnings; // ── Phase 3: FTS (85–90%) ───────────────────────────────────────── updateBar(85, 'Creating search indexes...'); const t0Fts = Date.now(); try { await createFTSIndex('File', 'file_fts', ['name', 'content']); await createFTSIndex('Function', 'function_fts', ['name', 'content']); await createFTSIndex('Class', 'class_fts', ['name', 'content']); await createFTSIndex('Method', 'method_fts', ['name', 'content']); await createFTSIndex('Interface', 'interface_fts', ['name', 'content']); } catch (e: any) { // Non-fatal — FTS is best-effort } const ftsTime = ((Date.now() - t0Fts) / 1000).toFixed(1); // ── Phase 3.5: Re-insert cached embeddings ──────────────────────── if (cachedEmbeddings.length > 0) { updateBar(88, `Restoring ${cachedEmbeddings.length} cached embeddings...`); const EMBED_BATCH = 200; for (let i = 0; i < cachedEmbeddings.length; i += EMBED_BATCH) { const batch = cachedEmbeddings.slice(i, i + EMBED_BATCH); const paramsList = batch.map(e => ({ nodeId: e.nodeId, embedding: e.embedding })); try { await executeWithReusedStatement( `CREATE (e:CodeEmbedding {nodeId: $nodeId, embedding: $embedding})`, paramsList, ); } catch { /* some may fail if node was removed, that's fine */ } } } // ── Phase 4: Embeddings (90–98%) ────────────────────────────────── const stats = await getLbugStats(); let embeddingTime = '0.0'; let embeddingSkipped = true; let embeddingSkipReason = 'off (use --embeddings to enable)'; if (options?.embeddings) { if (stats.nodes > EMBEDDING_NODE_LIMIT) { embeddingSkipReason = `skipped (${stats.nodes.toLocaleString()} nodes > ${EMBEDDING_NODE_LIMIT.toLocaleString()} limit)`; } else { embeddingSkipped = false; } } if (!embeddingSkipped) { updateBar(90, 'Loading embedding model...'); const t0Emb = Date.now(); const { runEmbeddingPipeline } = await import('../core/embeddings/embedding-pipeline.js'); await runEmbeddingPipeline( executeQuery, executeWithReusedStatement, (progress) => { const scaled = 90 + Math.round((progress.percent / 100) * 8); const label = progress.phase === 'loading-model' ? 'Loading embedding model...' : `Embedding ${progress.nodesProcessed || 0}/${progress.totalNodes || '?'}`; updateBar(scaled, label); }, {}, cachedEmbeddingNodeIds.size > 0 ? cachedEmbeddingNodeIds : undefined, ); embeddingTime = ((Date.now() - t0Emb) / 1000).toFixed(1); } // ── Phase 5: Finalize (98–100%) ─────────────────────────────────── updateBar(98, 'Saving metadata...'); // Count embeddings in the index (cached + newly generated) let embeddingCount = 0; try { const embResult = await executeQuery(`MATCH (e:CodeEmbedding) RETURN count(e) AS cnt`); embeddingCount = embResult?.[0]?.cnt ?? 0; } catch { /* table may not exist if embeddings never ran */ } const meta = { repoPath, lastCommit: currentCommit, indexedAt: new Date().toISOString(), stats: { files: pipelineResult.totalFileCount, nodes: stats.nodes, edges: stats.edges, communities: pipelineResult.communityResult?.stats.totalCommunities, processes: pipelineResult.processResult?.stats.totalProcesses, embeddings: embeddingCount, }, }; await saveMeta(storagePath, meta); await registerRepo(repoPath, meta); await addToGitignore(repoPath); const projectName = path.basename(repoPath); let aggregatedClusterCount = 0; if (pipelineResult.communityResult?.communities) { const groups = new Map<string, number>(); for (const c of pipelineResult.communityResult.communities) { const label = c.heuristicLabel || c.label || 'Unknown'; groups.set(label, (groups.get(label) || 0) + c.symbolCount); } aggregatedClusterCount = Array.from(groups.values()).filter(count => count >= 5).length; } let generatedSkills: GeneratedSkillInfo[] = []; if (options?.skills && pipelineResult.communityResult) { updateBar(99, 'Generating skill files...'); const skillResult = await generateSkillFiles(repoPath, projectName, pipelineResult); generatedSkills = skillResult.skills; } const aiContext = await generateAIContextFiles(repoPath, storagePath, projectName, { files: pipelineResult.totalFileCount, nodes: stats.nodes, edges: stats.edges, communities: pipelineResult.communityResult?.stats.totalCommunities, clusters: aggregatedClusterCount, processes: pipelineResult.processResult?.stats.totalProcesses, }, generatedSkills); await closeLbug(); // Note: we intentionally do NOT call disposeEmbedder() here. // ONNX Runtime's native cleanup segfaults on macOS and some Linux configs. // Since the process exits immediately after, Node.js reclaims everything. const totalTime = ((Date.now() - t0Global) / 1000).toFixed(1); clearInterval(elapsedTimer); process.removeListener('SIGINT', sigintHandler); console.log = origLog; console.warn = origWarn; console.error = origError; bar.update(100, { phase: 'Done' }); bar.stop(); // ── Summary ─────────────────────────────────────────────────────── const embeddingsCached = cachedEmbeddings.length > 0; console.log(`\n Repository indexed successfully (${totalTime}s)${embeddingsCached ? ` [${cachedEmbeddings.length} embeddings cached]` : ''}\n`); console.log(` ${stats.nodes.toLocaleString()} nodes | ${stats.edges.toLocaleString()} edges | ${pipelineResult.communityResult?.stats.totalCommunities || 0} clusters | ${pipelineResult.processResult?.stats.totalProcesses || 0} flows`); console.log(` LadybugDB ${lbugTime}s | FTS ${ftsTime}s | Embeddings ${embeddingSkipped ? embeddingSkipReason : embeddingTime + 's'}`); console.log(` ${repoPath}`); if (aiContext.files.length > 0) { console.log(` Context: ${aiContext.files.join(', ')}`); } // Show a quiet summary if some edge types needed fallback insertion if (lbugWarnings.length > 0) { const totalFallback = lbugWarnings.reduce((sum, w) => { const m = w.match(/\((\d+) edges\)/); return sum + (m ? parseInt(m[1]) : 0); }, 0); console.log(` Note: ${totalFallback} edges across ${lbugWarnings.length} types inserted via fallback (schema will be updated in next release)`); } try { await fs.access(getGlobalRegistryPath()); } catch { console.log('\n Tip: Run `gitnexus setup` to configure MCP for your editor.'); } console.log(''); // LadybugDB's native module holds open handles that prevent Node from exiting. // ONNX Runtime also registers native atexit hooks that segfault on some // platforms (#38, #40). Force-exit to ensure clean termination. process.exit(0); }; ================================================ FILE: gitnexus/src/cli/augment.ts ================================================ /** * Augment CLI Command * * Fast-path command for platform hooks. * Shells out from Claude Code PreToolUse / Cursor beforeShellExecution hooks. * * Usage: gitnexus augment <pattern> * Returns enriched text to stdout. * * Performance: Must cold-start fast (<500ms). * Skips unnecessary initialization (no web server, no full DB warmup). */ import { augment } from '../core/augmentation/engine.js'; export async function augmentCommand(pattern: string): Promise<void> { if (!pattern || pattern.length < 3) { process.exit(0); } try { const result = await augment(pattern, process.cwd()); if (result) { // IMPORTANT: Write to stderr, NOT stdout. // LadybugDB's native module captures stdout fd at OS level during init, // which makes stdout permanently broken in subprocess contexts. // stderr is never captured, so it works reliably everywhere. // The hook reads from the subprocess's stderr. process.stderr.write(result + '\n'); } } catch { // Graceful failure — never break the calling hook process.exit(0); } } ================================================ FILE: gitnexus/src/cli/clean.ts ================================================ /** * Clean Command * * Removes the .gitnexus index from the current repository. * Also unregisters it from the global registry. */ import fs from 'fs/promises'; import { findRepo, unregisterRepo, listRegisteredRepos } from '../storage/repo-manager.js'; export const cleanCommand = async (options?: { force?: boolean; all?: boolean }) => { // --all flag: clean all indexed repos if (options?.all) { if (!options?.force) { const entries = await listRegisteredRepos(); if (entries.length === 0) { console.log('No indexed repositories found.'); return; } console.log(`This will delete GitNexus indexes for ${entries.length} repo(s):`); for (const entry of entries) { console.log(` - ${entry.name} (${entry.path})`); } console.log('\nRun with --force to confirm deletion.'); return; } const entries = await listRegisteredRepos(); for (const entry of entries) { try { await fs.rm(entry.storagePath, { recursive: true, force: true }); await unregisterRepo(entry.path); console.log(`Deleted: ${entry.name} (${entry.storagePath})`); } catch (err) { console.error(`Failed to delete ${entry.name}:`, err); } } return; } // Default: clean current repo const cwd = process.cwd(); const repo = await findRepo(cwd); if (!repo) { console.log('No indexed repository found in this directory.'); return; } const repoName = repo.repoPath.split(/[/\\]/).pop() || repo.repoPath; if (!options?.force) { console.log(`This will delete the GitNexus index for: ${repoName}`); console.log(` Path: ${repo.storagePath}`); console.log('\nRun with --force to confirm deletion.'); return; } try { await fs.rm(repo.storagePath, { recursive: true, force: true }); await unregisterRepo(repo.repoPath); console.log(`Deleted: ${repo.storagePath}`); } catch (err) { console.error('Failed to delete:', err); } }; ================================================ FILE: gitnexus/src/cli/eval-server.ts ================================================ /** * Eval Server — Lightweight HTTP server for SWE-bench evaluation * * Keeps LadybugDB warm in memory so tool calls from the agent are near-instant. * Designed to run inside Docker containers during SWE-bench evaluation. * * KEY DESIGN: Returns LLM-friendly text, not raw JSON. * Raw JSON wastes tokens and is hard for models to parse. The text formatter * converts structured results into compact, readable output that models * can immediately act on. Next-step hints guide the agent through a * productive tool-chaining workflow (query → context → impact → fix). * * Architecture: * Agent bash cmd → curl localhost:PORT/tool/query → eval-server → LocalBackend → format → text * * Usage: * gitnexus eval-server # default port 4848 * gitnexus eval-server --port 4848 # explicit port * gitnexus eval-server --idle-timeout 300 # auto-shutdown after 300s idle * * API: * POST /tool/:name — Call a tool. Body is JSON arguments. Returns formatted text. * GET /health — Health check. Returns {"status":"ok","repos":[...]} * POST /shutdown — Graceful shutdown. */ import http from 'http'; import { writeSync } from 'node:fs'; import { LocalBackend } from '../mcp/local/local-backend.js'; export interface EvalServerOptions { port?: string; idleTimeout?: string; } // ─── Text Formatters ────────────────────────────────────────────────── // Convert structured JSON results into compact, LLM-friendly text. // Design: minimize tokens, maximize actionability. export function formatQueryResult(result: any): string { if (result.error) return `Error: ${result.error}`; const lines: string[] = []; const processes = result.processes || []; const symbols = result.process_symbols || []; const defs = result.definitions || []; if (processes.length === 0 && defs.length === 0) { return 'No matching execution flows found. Try a different search term or use grep.'; } lines.push(`Found ${processes.length} execution flow(s):\n`); for (let i = 0; i < processes.length; i++) { const p = processes[i]; lines.push(`${i + 1}. ${p.summary} (${p.step_count} steps, ${p.symbol_count} symbols)`); // Show symbols belonging to this process const procSymbols = symbols.filter((s: any) => s.process_id === p.id); for (const s of procSymbols.slice(0, 6)) { const loc = s.startLine ? `:${s.startLine}` : ''; lines.push(` ${s.type} ${s.name} → ${s.filePath}${loc}`); } if (procSymbols.length > 6) { lines.push(` ... and ${procSymbols.length - 6} more`); } lines.push(''); } if (defs.length > 0) { lines.push(`Standalone definitions:`); for (const d of defs.slice(0, 8)) { lines.push(` ${d.type || 'Symbol'} ${d.name} → ${d.filePath || '?'}`); } if (defs.length > 8) lines.push(` ... and ${defs.length - 8} more`); } return lines.join('\n').trim(); } export function formatContextResult(result: any): string { if (result.error) return `Error: ${result.error}`; if (result.status === 'ambiguous') { const lines = [`Multiple symbols named '${result.candidates?.[0]?.name || '?'}'. Disambiguate with file path:\n`]; for (const c of result.candidates || []) { lines.push(` ${c.kind} ${c.name} → ${c.filePath}:${c.line || '?'} (uid: ${c.uid})`); } lines.push(`\nRe-run: gitnexus-context "${result.candidates?.[0]?.name}" "<file_path>"`); return lines.join('\n'); } const sym = result.symbol; if (!sym) return 'Symbol not found.'; const lines: string[] = []; const loc = sym.startLine ? `:${sym.startLine}-${sym.endLine}` : ''; lines.push(`${sym.kind} ${sym.name} → ${sym.filePath}${loc}`); lines.push(''); // Incoming refs (who calls/imports/extends this) const incoming = result.incoming || {}; const incomingCount = Object.values(incoming).reduce((sum: number, arr: any) => sum + arr.length, 0) as number; if (incomingCount > 0) { lines.push(`Called/imported by (${incomingCount}):`); for (const [relType, refs] of Object.entries(incoming)) { for (const ref of (refs as any[]).slice(0, 10)) { lines.push(` ← [${relType}] ${ref.kind} ${ref.name} → ${ref.filePath}`); } } lines.push(''); } // Outgoing refs (what this calls/imports) const outgoing = result.outgoing || {}; const outgoingCount = Object.values(outgoing).reduce((sum: number, arr: any) => sum + arr.length, 0) as number; if (outgoingCount > 0) { lines.push(`Calls/imports (${outgoingCount}):`); for (const [relType, refs] of Object.entries(outgoing)) { for (const ref of (refs as any[]).slice(0, 10)) { lines.push(` → [${relType}] ${ref.kind} ${ref.name} → ${ref.filePath}`); } } lines.push(''); } // Processes const procs = result.processes || []; if (procs.length > 0) { lines.push(`Participates in ${procs.length} execution flow(s):`); for (const p of procs) { lines.push(` • ${p.name} (step ${p.step_index}/${p.step_count})`); } } if (sym.content) { lines.push(''); lines.push(`Source:`); lines.push(sym.content); } return lines.join('\n').trim(); } export function formatImpactResult(result: any): string { if (result.error) { const suggestion = result.suggestion ? `\nSuggestion: ${result.suggestion}` : ''; return `Error: ${result.error}${suggestion}`; } const target = result.target; const direction = result.direction; const byDepth = result.byDepth || {}; const total = result.impactedCount || 0; if (total === 0) { return `${target?.name || '?'}: No ${direction} dependencies found. This symbol appears isolated.`; } const lines: string[] = []; const dirLabel = direction === 'upstream' ? 'depends on this (will break if changed)' : 'this depends on'; lines.push(`Blast radius for ${target?.kind || ''} ${target?.name} (${direction}): ${total} symbol(s) ${dirLabel}`); if (result.partial) { lines.push('⚠️ Partial results — graph traversal was interrupted. Deeper impacts may exist.'); } lines.push(''); const depthLabels: Record<number, string> = { 1: 'WILL BREAK (direct)', 2: 'LIKELY AFFECTED (indirect)', 3: 'MAY NEED TESTING (transitive)', }; for (const depth of [1, 2, 3]) { const items = byDepth[depth]; if (!items || items.length === 0) continue; lines.push(`d=${depth}: ${depthLabels[depth] || ''} (${items.length})`); for (const item of items.slice(0, 12)) { const conf = item.confidence < 1 ? ` (conf: ${item.confidence})` : ''; lines.push(` ${item.type} ${item.name} → ${item.filePath} [${item.relationType}]${conf}`); } if (items.length > 12) { lines.push(` ... and ${items.length - 12} more`); } lines.push(''); } return lines.join('\n').trim(); } export function formatCypherResult(result: any): string { if (result.error) return `Error: ${result.error}`; if (Array.isArray(result)) { if (result.length === 0) return 'Query returned 0 rows.'; // Format as simple table const keys = Object.keys(result[0]); const lines: string[] = [`${result.length} row(s):\n`]; for (const row of result.slice(0, 30)) { const parts = keys.map(k => `${k}: ${row[k]}`); lines.push(` ${parts.join(' | ')}`); } if (result.length > 30) { lines.push(` ... ${result.length - 30} more rows`); } return lines.join('\n'); } return typeof result === 'string' ? result : JSON.stringify(result, null, 2); } export function formatDetectChangesResult(result: any): string { if (result.error) return `Error: ${result.error}`; const summary = result.summary || {}; const lines: string[] = []; if (summary.changed_count === 0) { return 'No changes detected.'; } lines.push(`Changes: ${summary.changed_files || 0} files, ${summary.changed_count || 0} symbols`); lines.push(`Affected processes: ${summary.affected_count || 0}`); lines.push(`Risk level: ${summary.risk_level || 'unknown'}\n`); const changed = result.changed_symbols || []; if (changed.length > 0) { lines.push(`Changed symbols:`); for (const s of changed.slice(0, 15)) { lines.push(` ${s.type} ${s.name} → ${s.filePath}`); } if (changed.length > 15) lines.push(` ... and ${changed.length - 15} more`); lines.push(''); } const affected = result.affected_processes || []; if (affected.length > 0) { lines.push(`Affected execution flows:`); for (const p of affected.slice(0, 10)) { const steps = (p.changed_steps || []).map((s: any) => s.symbol).join(', '); lines.push(` • ${p.name} (${p.step_count} steps) — changed: ${steps}`); } } return lines.join('\n').trim(); } export function formatListReposResult(result: any): string { if (!Array.isArray(result) || result.length === 0) { return 'No indexed repositories.'; } const lines = ['Indexed repositories:\n']; for (const r of result) { const stats = r.stats || {}; lines.push(` ${r.name} — ${stats.nodes || '?'} symbols, ${stats.edges || '?'} relationships, ${stats.processes || '?'} flows`); lines.push(` Path: ${r.path}`); lines.push(` Indexed: ${r.indexedAt}`); } return lines.join('\n'); } /** * Format a tool result as compact, LLM-friendly text. */ function formatToolResult(toolName: string, result: any): string { switch (toolName) { case 'query': return formatQueryResult(result); case 'context': return formatContextResult(result); case 'impact': return formatImpactResult(result); case 'cypher': return formatCypherResult(result); case 'detect_changes': return formatDetectChangesResult(result); case 'list_repos': return formatListReposResult(result); default: return typeof result === 'string' ? result : JSON.stringify(result, null, 2); } } // ─── Next-Step Hints ────────────────────────────────────────────────── // Guide the agent to the logical next tool call. // Critical for tool chaining: query → context → impact → fix. function getNextStepHint(toolName: string): string { switch (toolName) { case 'query': return '\n---\nNext: Pick a symbol above and run gitnexus-context "<name>" to see all its callers, callees, and execution flows.'; case 'context': return '\n---\nNext: To check what breaks if you change this, run gitnexus-impact "<name>" upstream'; case 'impact': return '\n---\nNext: Review d=1 items first (WILL BREAK). Read the source with cat to understand the code, then make your fix.'; case 'cypher': return '\n---\nNext: To explore a result symbol in depth, run gitnexus-context "<name>"'; case 'detect_changes': return '\n---\nNext: Run gitnexus-context "<symbol>" on high-risk changed symbols to check their callers.'; default: return ''; } } // ─── Server ─────────────────────────────────────────────────────────── export async function evalServerCommand(options?: EvalServerOptions): Promise<void> { const port = parseInt(options?.port || '4848'); const idleTimeoutSec = parseInt(options?.idleTimeout || '0'); const backend = new LocalBackend(); const ok = await backend.init(); if (!ok) { console.error('GitNexus eval-server: No indexed repositories found. Run: gitnexus analyze'); process.exit(1); } const repos = await backend.listRepos(); console.error(`GitNexus eval-server: ${repos.length} repo(s) loaded: ${repos.map(r => r.name).join(', ')}`); let idleTimer: ReturnType<typeof setTimeout> | null = null; function resetIdleTimer() { if (idleTimeoutSec <= 0) return; if (idleTimer) clearTimeout(idleTimer); idleTimer = setTimeout(async () => { console.error('GitNexus eval-server: Idle timeout reached, shutting down'); await backend.disconnect(); process.exit(0); }, idleTimeoutSec * 1000); } const server = http.createServer(async (req, res) => { resetIdleTimer(); try { // Health check if (req.method === 'GET' && req.url === '/health') { res.setHeader('Content-Type', 'application/json'); res.writeHead(200); res.end(JSON.stringify({ status: 'ok', repos: repos.map(r => r.name) })); return; } // Shutdown if (req.method === 'POST' && req.url === '/shutdown') { res.setHeader('Content-Type', 'application/json'); res.writeHead(200); res.end(JSON.stringify({ status: 'shutting_down' })); setTimeout(async () => { await backend.disconnect(); server.close(); process.exit(0); }, 100); return; } // Tool calls: POST /tool/:name const toolMatch = req.url?.match(/^\/tool\/(\w+)$/); if (req.method === 'POST' && toolMatch) { const toolName = toolMatch[1]; const body = await readBody(req); let args: Record<string, any> = {}; if (body.trim()) { try { args = JSON.parse(body); } catch { res.setHeader('Content-Type', 'text/plain'); res.writeHead(400); res.end('Error: Invalid JSON body'); return; } } // Call tool, format result as text, append next-step hint const result = await backend.callTool(toolName, args); const formatted = formatToolResult(toolName, result); const hint = getNextStepHint(toolName); res.setHeader('Content-Type', 'text/plain'); res.writeHead(200); res.end(formatted + hint); return; } // 404 res.setHeader('Content-Type', 'text/plain'); res.writeHead(404); res.end('Not found. Use POST /tool/:name or GET /health'); } catch (err: any) { res.setHeader('Content-Type', 'text/plain'); res.writeHead(500); res.end(`Error: ${err.message || 'Internal error'}`); } }); server.listen(port, '127.0.0.1', () => { console.error(`GitNexus eval-server: listening on http://127.0.0.1:${port}`); console.error(` POST /tool/query — search execution flows`); console.error(` POST /tool/context — 360-degree symbol view`); console.error(` POST /tool/impact — blast radius analysis`); console.error(` POST /tool/cypher — raw Cypher query`); console.error(` GET /health — health check`); console.error(` POST /shutdown — graceful shutdown`); if (idleTimeoutSec > 0) { console.error(` Auto-shutdown after ${idleTimeoutSec}s idle`); } try { // Use fd 1 directly — LadybugDB captures process.stdout (#324) writeSync(1, `GITNEXUS_EVAL_SERVER_READY:${port}\n`); } catch { // stdout may not be available (e.g., broken pipe) } }); resetIdleTimer(); const shutdown = async () => { console.error('GitNexus eval-server: shutting down...'); await backend.disconnect(); server.close(); process.exit(0); }; process.on('SIGINT', shutdown); process.on('SIGTERM', shutdown); } export const MAX_BODY_SIZE = 1024 * 1024; // 1MB function readBody(req: http.IncomingMessage): Promise<string> { return new Promise((resolve, reject) => { const chunks: Buffer[] = []; let totalSize = 0; req.on('data', (chunk: Buffer) => { totalSize += chunk.length; if (totalSize > MAX_BODY_SIZE) { req.destroy(new Error('Request body too large (max 1MB)')); return; } chunks.push(chunk); }); req.on('end', () => resolve(Buffer.concat(chunks).toString('utf-8'))); req.on('error', reject); }); } ================================================ FILE: gitnexus/src/cli/index.ts ================================================ #!/usr/bin/env node // Heap re-spawn removed — only analyze.ts needs the 8GB heap (via its own ensureHeap()). // Removing it from here improves MCP server startup time significantly. import { Command } from 'commander'; import { createRequire } from 'node:module'; import { createLazyAction } from './lazy-action.js'; const _require = createRequire(import.meta.url); const pkg = _require('../../package.json'); const program = new Command(); program .name('gitnexus') .description('GitNexus local CLI and MCP server') .version(pkg.version); program .command('setup') .description('One-time setup: configure MCP for Cursor, Claude Code, OpenCode') .action(createLazyAction(() => import('./setup.js'), 'setupCommand')); program .command('analyze [path]') .description('Index a repository (full analysis)') .option('-f, --force', 'Force full re-index even if up to date') .option('--embeddings', 'Enable embedding generation for semantic search (off by default)') .option('--skills', 'Generate repo-specific skill files from detected communities') .option('-v, --verbose', 'Enable verbose ingestion warnings (default: false)') .addHelpText('after', '\nEnvironment variables:\n GITNEXUS_NO_GITIGNORE=1 Skip .gitignore parsing (still reads .gitnexusignore)') .action(createLazyAction(() => import('./analyze.js'), 'analyzeCommand')); program .command('serve') .description('Start local HTTP server for web UI connection') .option('-p, --port <port>', 'Port number', '4747') .option('--host <host>', 'Bind address (default: 127.0.0.1, use 0.0.0.0 for remote access)') .action(createLazyAction(() => import('./serve.js'), 'serveCommand')); program .command('mcp') .description('Start MCP server (stdio) — serves all indexed repos') .action(createLazyAction(() => import('./mcp.js'), 'mcpCommand')); program .command('list') .description('List all indexed repositories') .action(createLazyAction(() => import('./list.js'), 'listCommand')); program .command('status') .description('Show index status for current repo') .action(createLazyAction(() => import('./status.js'), 'statusCommand')); program .command('clean') .description('Delete GitNexus index for current repo') .option('-f, --force', 'Skip confirmation prompt') .option('--all', 'Clean all indexed repos') .action(createLazyAction(() => import('./clean.js'), 'cleanCommand')); program .command('wiki [path]') .description('Generate repository wiki from knowledge graph') .option('-f, --force', 'Force full regeneration even if up to date') .option('--model <model>', 'LLM model name (default: minimax/minimax-m2.5)') .option('--base-url <url>', 'LLM API base URL (default: OpenAI)') .option('--api-key <key>', 'LLM API key (saved to ~/.gitnexus/config.json)') .option('--concurrency <n>', 'Parallel LLM calls (default: 3)', '3') .option('--gist', 'Publish wiki as a public GitHub Gist after generation') .action(createLazyAction(() => import('./wiki.js'), 'wikiCommand')); program .command('augment <pattern>') .description('Augment a search pattern with knowledge graph context (used by hooks)') .action(createLazyAction(() => import('./augment.js'), 'augmentCommand')); // ─── Direct Tool Commands (no MCP overhead) ──────────────────────── // These invoke LocalBackend directly for use in eval, scripts, and CI. program .command('query <search_query>') .description('Search the knowledge graph for execution flows related to a concept') .option('-r, --repo <name>', 'Target repository (omit if only one indexed)') .option('-c, --context <text>', 'Task context to improve ranking') .option('-g, --goal <text>', 'What you want to find') .option('-l, --limit <n>', 'Max processes to return (default: 5)') .option('--content', 'Include full symbol source code') .action(createLazyAction(() => import('./tool.js'), 'queryCommand')); program .command('context [name]') .description('360-degree view of a code symbol: callers, callees, processes') .option('-r, --repo <name>', 'Target repository') .option('-u, --uid <uid>', 'Direct symbol UID (zero-ambiguity lookup)') .option('-f, --file <path>', 'File path to disambiguate common names') .option('--content', 'Include full symbol source code') .action(createLazyAction(() => import('./tool.js'), 'contextCommand')); program .command('impact <target>') .description('Blast radius analysis: what breaks if you change a symbol') .option('-d, --direction <dir>', 'upstream (dependants) or downstream (dependencies)', 'upstream') .option('-r, --repo <name>', 'Target repository') .option('--depth <n>', 'Max relationship depth (default: 3)') .option('--include-tests', 'Include test files in results') .action(createLazyAction(() => import('./tool.js'), 'impactCommand')); program .command('cypher <query>') .description('Execute raw Cypher query against the knowledge graph') .option('-r, --repo <name>', 'Target repository') .action(createLazyAction(() => import('./tool.js'), 'cypherCommand')); // ─── Eval Server (persistent daemon for SWE-bench) ───────────────── program .command('eval-server') .description('Start lightweight HTTP server for fast tool calls during evaluation') .option('-p, --port <port>', 'Port number', '4848') .option('--idle-timeout <seconds>', 'Auto-shutdown after N seconds idle (0 = disabled)', '0') .action(createLazyAction(() => import('./eval-server.js'), 'evalServerCommand')); program.parse(process.argv); ================================================ FILE: gitnexus/src/cli/lazy-action.ts ================================================ /** * Creates a lazy-loaded CLI action that defers module import until invocation. * The generic constraints ensure the export name is a valid key of the module * at compile time — catching typos when used with concrete module imports. */ function isCallable(value: unknown): value is (...args: unknown[]) => unknown { return typeof value === 'function'; } export function createLazyAction< TModule extends Record<string, unknown>, TKey extends string & keyof TModule, >( loader: () => Promise<TModule>, exportName: TKey, ): (...args: unknown[]) => Promise<void> { return async (...args: unknown[]): Promise<void> => { const module = await loader(); const action = module[exportName]; if (!isCallable(action)) { throw new Error(`Lazy action export not found: ${exportName}`); } await action(...args); }; } ================================================ FILE: gitnexus/src/cli/list.ts ================================================ /** * List Command * * Shows all indexed repositories from the global registry. */ import { listRegisteredRepos } from '../storage/repo-manager.js'; export const listCommand = async () => { const entries = await listRegisteredRepos({ validate: true }); if (entries.length === 0) { console.log('No indexed repositories found.'); console.log('Run `gitnexus analyze` in a git repo to index it.'); return; } console.log(`\n Indexed Repositories (${entries.length})\n`); for (const entry of entries) { const indexedDate = new Date(entry.indexedAt).toLocaleString(); const stats = entry.stats || {}; const commitShort = entry.lastCommit?.slice(0, 7) || 'unknown'; console.log(` ${entry.name}`); console.log(` Path: ${entry.path}`); console.log(` Indexed: ${indexedDate}`); console.log(` Commit: ${commitShort}`); console.log(` Stats: ${stats.files ?? 0} files, ${stats.nodes ?? 0} symbols, ${stats.edges ?? 0} edges`); if (stats.communities) console.log(` Clusters: ${stats.communities}`); if (stats.processes) console.log(` Processes: ${stats.processes}`); console.log(''); } }; ================================================ FILE: gitnexus/src/cli/mcp.ts ================================================ /** * MCP Command * * Starts the MCP server in standalone mode. * Loads all indexed repos from the global registry. * No longer depends on cwd — works from any directory. */ import { startMCPServer } from '../mcp/server.js'; import { LocalBackend } from '../mcp/local/local-backend.js'; export const mcpCommand = async () => { // Prevent unhandled errors from crashing the MCP server process. // LadybugDB lock conflicts and transient errors should degrade gracefully. process.on('uncaughtException', (err) => { console.error(`GitNexus MCP: uncaught exception — ${err.message}`); // Process is in an undefined state after uncaughtException — exit after flushing setTimeout(() => process.exit(1), 100); }); process.on('unhandledRejection', (reason) => { const msg = reason instanceof Error ? reason.message : String(reason); console.error(`GitNexus MCP: unhandled rejection — ${msg}`); }); // Initialize multi-repo backend from registry. // The server starts even with 0 repos — tools call refreshRepos() lazily, // so repos indexed after the server starts are discovered automatically. const backend = new LocalBackend(); await backend.init(); const repos = await backend.listRepos(); if (repos.length === 0) { console.error('GitNexus: No indexed repos yet. Run `gitnexus analyze` in a git repo — the server will pick it up automatically.'); } else { console.error(`GitNexus: MCP server starting with ${repos.length} repo(s): ${repos.map(r => r.name).join(', ')}`); } // Start MCP server (serves all repos, discovers new ones lazily) await startMCPServer(backend); }; ================================================ FILE: gitnexus/src/cli/serve.ts ================================================ import { createServer } from '../server/api.js'; export const serveCommand = async (options?: { port?: string; host?: string }) => { const port = Number(options?.port ?? 4747); const host = options?.host ?? '127.0.0.1'; await createServer(port, host); }; ================================================ FILE: gitnexus/src/cli/setup.ts ================================================ /** * Setup Command * * One-time global MCP configuration writer. * Detects installed AI editors and writes the appropriate MCP config * so the GitNexus MCP server is available in all projects. */ import fs from 'fs/promises'; import path from 'path'; import os from 'os'; import { fileURLToPath } from 'url'; import { glob } from 'glob'; import { getGlobalDir } from '../storage/repo-manager.js'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); interface SetupResult { configured: string[]; skipped: string[]; errors: string[]; } /** * The MCP server entry for all editors. * On Windows, npx must be invoked via cmd /c since it's a .cmd script. */ function getMcpEntry() { if (process.platform === 'win32') { return { command: 'cmd', args: ['/c', 'npx', '-y', 'gitnexus@latest', 'mcp'], }; } return { command: 'npx', args: ['-y', 'gitnexus@latest', 'mcp'], }; } /** * Merge gitnexus entry into an existing MCP config JSON object. * Returns the updated config. */ function mergeMcpConfig(existing: any): any { if (!existing || typeof existing !== 'object') { existing = {}; } if (!existing.mcpServers || typeof existing.mcpServers !== 'object') { existing.mcpServers = {}; } existing.mcpServers.gitnexus = getMcpEntry(); return existing; } /** * Try to read a JSON file, returning null if it doesn't exist or is invalid. */ async function readJsonFile(filePath: string): Promise<any | null> { try { const raw = await fs.readFile(filePath, 'utf-8'); return JSON.parse(raw); } catch { return null; } } /** * Write JSON to a file, creating parent directories if needed. */ async function writeJsonFile(filePath: string, data: any): Promise<void> { await fs.mkdir(path.dirname(filePath), { recursive: true }); await fs.writeFile(filePath, JSON.stringify(data, null, 2) + '\n', 'utf-8'); } /** * Check if a directory exists */ async function dirExists(dirPath: string): Promise<boolean> { try { const stat = await fs.stat(dirPath); return stat.isDirectory(); } catch { return false; } } // ─── Editor-specific setup ───────────────────────────────────────── async function setupCursor(result: SetupResult): Promise<void> { const cursorDir = path.join(os.homedir(), '.cursor'); if (!(await dirExists(cursorDir))) { result.skipped.push('Cursor (not installed)'); return; } const mcpPath = path.join(cursorDir, 'mcp.json'); try { const existing = await readJsonFile(mcpPath); const updated = mergeMcpConfig(existing); await writeJsonFile(mcpPath, updated); result.configured.push('Cursor'); } catch (err: any) { result.errors.push(`Cursor: ${err.message}`); } } async function setupClaudeCode(result: SetupResult): Promise<void> { const claudeDir = path.join(os.homedir(), '.claude'); const hasClaude = await dirExists(claudeDir); if (!hasClaude) { result.skipped.push('Claude Code (not installed)'); return; } // Claude Code uses a JSON settings file at ~/.claude.json or claude mcp add console.log(''); console.log(' Claude Code detected. Run this command to add GitNexus MCP:'); console.log(''); console.log(' claude mcp add gitnexus -- npx -y gitnexus mcp'); console.log(''); result.configured.push('Claude Code (MCP manual step printed)'); } /** * Install GitNexus skills to ~/.claude/skills/ for Claude Code. */ async function installClaudeCodeSkills(result: SetupResult): Promise<void> { const claudeDir = path.join(os.homedir(), '.claude'); if (!(await dirExists(claudeDir))) return; const skillsDir = path.join(claudeDir, 'skills'); try { const installed = await installSkillsTo(skillsDir); if (installed.length > 0) { result.configured.push(`Claude Code skills (${installed.length} skills → ~/.claude/skills/)`); } } catch (err: any) { result.errors.push(`Claude Code skills: ${err.message}`); } } /** * Install GitNexus hooks to ~/.claude/settings.json for Claude Code. * Merges hook config without overwriting existing hooks. */ async function installClaudeCodeHooks(result: SetupResult): Promise<void> { const claudeDir = path.join(os.homedir(), '.claude'); if (!(await dirExists(claudeDir))) return; const settingsPath = path.join(claudeDir, 'settings.json'); // Source hooks bundled within the gitnexus package (hooks/claude/) const pluginHooksPath = path.join(__dirname, '..', '..', 'hooks', 'claude'); // Copy unified hook script to ~/.claude/hooks/gitnexus/ const destHooksDir = path.join(claudeDir, 'hooks', 'gitnexus'); try { await fs.mkdir(destHooksDir, { recursive: true }); const src = path.join(pluginHooksPath, 'gitnexus-hook.cjs'); const dest = path.join(destHooksDir, 'gitnexus-hook.cjs'); try { let content = await fs.readFile(src, 'utf-8'); // Inject resolved CLI path so the copied hook can find the CLI // even when it's no longer inside the npm package tree const resolvedCli = path.join(__dirname, '..', 'cli', 'index.js'); const normalizedCli = path.resolve(resolvedCli).replace(/\\/g, '/'); const jsonCli = JSON.stringify(normalizedCli); content = content.replace( "let cliPath = path.resolve(__dirname, '..', '..', 'dist', 'cli', 'index.js');", `let cliPath = ${jsonCli};` ); await fs.writeFile(dest, content, 'utf-8'); } catch { // Script not found in source — skip } const hookPath = path.join(destHooksDir, 'gitnexus-hook.cjs').replace(/\\/g, '/'); const hookCmd = `node "${hookPath.replace(/"/g, '\\"')}"`; // Merge hook config into ~/.claude/settings.json const existing = await readJsonFile(settingsPath) || {}; if (!existing.hooks) existing.hooks = {}; // NOTE: SessionStart hooks are broken on Windows (Claude Code bug #23576). // Session context is delivered via CLAUDE.md / skills instead. // Helper: add a hook entry if one with 'gitnexus-hook' isn't already registered interface HookEntry { hooks?: Array<{ command?: string }> } function ensureHookEntry( eventName: string, matcher: string, timeout: number, statusMessage: string, ) { if (!existing.hooks[eventName]) existing.hooks[eventName] = []; const hasHook = existing.hooks[eventName].some( (h: HookEntry) => h.hooks?.some(hh => hh.command?.includes('gitnexus-hook')) ); if (!hasHook) { existing.hooks[eventName].push({ matcher, hooks: [{ type: 'command', command: hookCmd, timeout, statusMessage }], }); } } ensureHookEntry('PreToolUse', 'Grep|Glob|Bash', 10, 'Enriching with GitNexus graph context...'); ensureHookEntry('PostToolUse', 'Bash', 10, 'Checking GitNexus index freshness...'); await writeJsonFile(settingsPath, existing); result.configured.push('Claude Code hooks (PreToolUse, PostToolUse)'); } catch (err: any) { result.errors.push(`Claude Code hooks: ${err.message}`); } } async function setupOpenCode(result: SetupResult): Promise<void> { const opencodeDir = path.join(os.homedir(), '.config', 'opencode'); if (!(await dirExists(opencodeDir))) { result.skipped.push('OpenCode (not installed)'); return; } const configPath = path.join(opencodeDir, 'config.json'); try { const existing = await readJsonFile(configPath); const config = existing || {}; if (!config.mcp) config.mcp = {}; config.mcp.gitnexus = getMcpEntry(); await writeJsonFile(configPath, config); result.configured.push('OpenCode'); } catch (err: any) { result.errors.push(`OpenCode: ${err.message}`); } } // ─── Skill Installation ─────────────────────────────────────────── /** * Install GitNexus skills to a target directory. * Each skill is installed as {targetDir}/gitnexus-{skillName}/SKILL.md * following the Agent Skills standard (both Cursor and Claude Code). * * Supports two source layouts: * - Flat file: skills/{name}.md → copied as SKILL.md * - Directory: skills/{name}/SKILL.md → copied recursively (includes references/, etc.) */ async function installSkillsTo(targetDir: string): Promise<string[]> { const installed: string[] = []; const skillsRoot = path.join(__dirname, '..', '..', 'skills'); let flatFiles: string[] = []; let dirSkillFiles: string[] = []; try { [flatFiles, dirSkillFiles] = await Promise.all([ glob('*.md', { cwd: skillsRoot }), glob('*/SKILL.md', { cwd: skillsRoot }), ]); } catch { return []; } const skillSources = new Map<string, { isDirectory: boolean }>(); for (const relPath of dirSkillFiles) { skillSources.set(path.dirname(relPath), { isDirectory: true }); } for (const relPath of flatFiles) { const skillName = path.basename(relPath, '.md'); if (!skillSources.has(skillName)) { skillSources.set(skillName, { isDirectory: false }); } } for (const [skillName, source] of skillSources) { const skillDir = path.join(targetDir, skillName); try { if (source.isDirectory) { const dirSource = path.join(skillsRoot, skillName); await copyDirRecursive(dirSource, skillDir); installed.push(skillName); } else { const flatSource = path.join(skillsRoot, `${skillName}.md`); const content = await fs.readFile(flatSource, 'utf-8'); await fs.mkdir(skillDir, { recursive: true }); await fs.writeFile(path.join(skillDir, 'SKILL.md'), content, 'utf-8'); installed.push(skillName); } } catch { // Source skill not found — skip } } return installed; } /** * Recursively copy a directory tree. */ async function copyDirRecursive(src: string, dest: string): Promise<void> { await fs.mkdir(dest, { recursive: true }); const entries = await fs.readdir(src, { withFileTypes: true }); for (const entry of entries) { const srcPath = path.join(src, entry.name); const destPath = path.join(dest, entry.name); if (entry.isDirectory()) { await copyDirRecursive(srcPath, destPath); } else { await fs.copyFile(srcPath, destPath); } } } /** * Install global Cursor skills to ~/.cursor/skills/gitnexus/ */ async function installCursorSkills(result: SetupResult): Promise<void> { const cursorDir = path.join(os.homedir(), '.cursor'); if (!(await dirExists(cursorDir))) return; const skillsDir = path.join(cursorDir, 'skills'); try { const installed = await installSkillsTo(skillsDir); if (installed.length > 0) { result.configured.push(`Cursor skills (${installed.length} skills → ~/.cursor/skills/)`); } } catch (err: any) { result.errors.push(`Cursor skills: ${err.message}`); } } /** * Install global OpenCode skills to ~/.config/opencode/skill/gitnexus/ */ async function installOpenCodeSkills(result: SetupResult): Promise<void> { const opencodeDir = path.join(os.homedir(), '.config', 'opencode'); if (!(await dirExists(opencodeDir))) return; const skillsDir = path.join(opencodeDir, 'skill'); try { const installed = await installSkillsTo(skillsDir); if (installed.length > 0) { result.configured.push(`OpenCode skills (${installed.length} skills → ~/.config/opencode/skill/)`); } } catch (err: any) { result.errors.push(`OpenCode skills: ${err.message}`); } } // ─── Main command ────────────────────────────────────────────────── export const setupCommand = async () => { console.log(''); console.log(' GitNexus Setup'); console.log(' =============='); console.log(''); // Ensure global directory exists const globalDir = getGlobalDir(); await fs.mkdir(globalDir, { recursive: true }); const result: SetupResult = { configured: [], skipped: [], errors: [], }; // Detect and configure each editor's MCP await setupCursor(result); await setupClaudeCode(result); await setupOpenCode(result); // Install global skills for platforms that support them await installClaudeCodeSkills(result); await installClaudeCodeHooks(result); await installCursorSkills(result); await installOpenCodeSkills(result); // Print results if (result.configured.length > 0) { console.log(' Configured:'); for (const name of result.configured) { console.log(` + ${name}`); } } if (result.skipped.length > 0) { console.log(''); console.log(' Skipped:'); for (const name of result.skipped) { console.log(` - ${name}`); } } if (result.errors.length > 0) { console.log(''); console.log(' Errors:'); for (const err of result.errors) { console.log(` ! ${err}`); } } console.log(''); console.log(' Summary:'); console.log(` MCP configured for: ${result.configured.filter(c => !c.includes('skills')).join(', ') || 'none'}`); console.log(` Skills installed to: ${result.configured.filter(c => c.includes('skills')).length > 0 ? result.configured.filter(c => c.includes('skills')).join(', ') : 'none'}`); console.log(''); console.log(' Next steps:'); console.log(' 1. cd into any git repo'); console.log(' 2. Run: gitnexus analyze'); console.log(' 3. Open the repo in your editor — MCP is ready!'); console.log(''); }; ================================================ FILE: gitnexus/src/cli/skill-gen.ts ================================================ /** * Skill File Generator * * Generates repo-specific SKILL.md files from detected Leiden communities. * Each significant community becomes a skill that describes a functional area * of the codebase, including key files, entry points, execution flows, and * cross-community connections. */ import fs from 'fs/promises'; import path from 'path'; import { PipelineResult } from '../types/pipeline.js'; import { CommunityNode, CommunityMembership } from '../core/ingestion/community-processor.js'; import { ProcessNode } from '../core/ingestion/process-processor.js'; import { GraphNode, KnowledgeGraph } from '../core/graph/types.js'; // ============================================================================ // TYPES // ============================================================================ export interface GeneratedSkillInfo { name: string; label: string; symbolCount: number; fileCount: number; } interface AggregatedCommunity { label: string; rawIds: string[]; symbolCount: number; cohesion: number; } interface MemberSymbol { id: string; name: string; label: string; filePath: string; startLine: number; isExported: boolean; } interface FileInfo { relativePath: string; symbols: string[]; } interface CrossConnection { targetLabel: string; count: number; } // ============================================================================ // MAIN EXPORT // ============================================================================ /** * @brief Generate repo-specific skill files from detected communities * @param {string} repoPath - Absolute path to the repository root * @param {string} projectName - Human-readable project name * @param {PipelineResult} pipelineResult - In-memory pipeline data with communities, processes, graph * @returns {Promise<{ skills: GeneratedSkillInfo[], outputPath: string }>} Generated skill metadata */ export const generateSkillFiles = async ( repoPath: string, projectName: string, pipelineResult: PipelineResult ): Promise<{ skills: GeneratedSkillInfo[]; outputPath: string }> => { const { communityResult, processResult, graph } = pipelineResult; const outputDir = path.join(repoPath, '.claude', 'skills', 'generated'); if (!communityResult || !communityResult.memberships.length) { console.log('\n Skills: no communities detected, skipping skill generation'); return { skills: [], outputPath: outputDir }; } console.log('\n Generating repo-specific skills...'); // Step 1: Build communities from memberships (not the filtered communities array). // The community processor skips singletons from its communities array but memberships // include ALL assignments. For repos with sparse CALLS edges, the communities array // can be empty while memberships still has useful groupings. const communities = communityResult.communities.length > 0 ? communityResult.communities : buildCommunitiesFromMemberships(communityResult.memberships, graph, repoPath); const aggregated = aggregateCommunities(communities); // Step 2: Filter to significant communities // Keep communities with >= 3 symbols after aggregation. const significant = aggregated .filter(c => c.symbolCount >= 3) .sort((a, b) => b.symbolCount - a.symbolCount) .slice(0, 20); if (significant.length === 0) { console.log('\n Skills: no significant communities found (all below 3-symbol threshold)'); return { skills: [], outputPath: outputDir }; } // Step 3: Build lookup maps const membershipsByComm = buildMembershipMap(communityResult.memberships); const nodeIdToCommunityLabel = buildNodeCommunityLabelMap( communityResult.memberships, communities ); // Step 4: Clear and recreate output directory try { await fs.rm(outputDir, { recursive: true, force: true }); } catch { /* may not exist */ } await fs.mkdir(outputDir, { recursive: true }); // Step 5: Generate skill files const skills: GeneratedSkillInfo[] = []; const usedNames = new Set<string>(); for (const community of significant) { // Gather member symbols const members = gatherMembers(community.rawIds, membershipsByComm, graph); if (members.length === 0) continue; // Gather file info const files = gatherFiles(members, repoPath); // Gather entry points const entryPoints = gatherEntryPoints(members); // Gather execution flows const flows = gatherFlows(community.rawIds, processResult?.processes || []); // Gather cross-community connections const connections = gatherCrossConnections( community.rawIds, community.label, membershipsByComm, nodeIdToCommunityLabel, graph ); // Generate kebab name const kebabName = toKebabName(community.label, usedNames); usedNames.add(kebabName); // Generate SKILL.md content const content = renderSkillMarkdown( community, projectName, members, files, entryPoints, flows, connections, kebabName ); // Write file const skillDir = path.join(outputDir, kebabName); await fs.mkdir(skillDir, { recursive: true }); await fs.writeFile(path.join(skillDir, 'SKILL.md'), content, 'utf-8'); const info: GeneratedSkillInfo = { name: kebabName, label: community.label, symbolCount: community.symbolCount, fileCount: files.length, }; skills.push(info); console.log(` \u2713 ${community.label} (${community.symbolCount} symbols, ${files.length} files)`); } console.log(`\n ${skills.length} skills generated \u2192 .claude/skills/generated/`); return { skills, outputPath: outputDir }; }; // ============================================================================ // FALLBACK COMMUNITY BUILDER // ============================================================================ /** * @brief Build CommunityNode-like objects from raw memberships when the community * processor's communities array is empty (all singletons were filtered out) * @param {CommunityMembership[]} memberships - All node-to-community assignments * @param {KnowledgeGraph} graph - The knowledge graph for resolving node metadata * @param {string} repoPath - Repository root for path normalization * @returns {CommunityNode[]} Synthetic community nodes built from membership data */ const buildCommunitiesFromMemberships = ( memberships: CommunityMembership[], graph: KnowledgeGraph, repoPath: string ): CommunityNode[] => { // Group memberships by communityId const groups = new Map<string, string[]>(); for (const m of memberships) { const arr = groups.get(m.communityId); if (arr) { arr.push(m.nodeId); } else { groups.set(m.communityId, [m.nodeId]); } } const communities: CommunityNode[] = []; for (const [commId, nodeIds] of groups) { // Derive a heuristic label from the most common parent directory const folderCounts = new Map<string, number>(); for (const nodeId of nodeIds) { const node = graph.getNode(nodeId); if (!node?.properties.filePath) continue; const normalized = node.properties.filePath.replace(/\\/g, '/'); const parts = normalized.split('/').filter(Boolean); if (parts.length >= 2) { const folder = parts[parts.length - 2]; if (!['src', 'lib', 'core', 'utils', 'common', 'shared', 'helpers'].includes(folder.toLowerCase())) { folderCounts.set(folder, (folderCounts.get(folder) || 0) + 1); } } } let bestFolder = ''; let bestCount = 0; for (const [folder, count] of folderCounts) { if (count > bestCount) { bestCount = count; bestFolder = folder; } } const label = bestFolder ? bestFolder.charAt(0).toUpperCase() + bestFolder.slice(1) : `Cluster_${commId.replace('comm_', '')}`; // Compute cohesion as internal-edge ratio (matches backend calculateCohesion). // For each member node, count edges that stay inside the community vs total. const nodeSet = new Set(nodeIds); let internalEdges = 0; let totalEdges = 0; graph.forEachRelationship(rel => { if (nodeSet.has(rel.sourceId)) { totalEdges++; if (nodeSet.has(rel.targetId)) internalEdges++; } }); const cohesion = totalEdges > 0 ? Math.min(1.0, internalEdges / totalEdges) : 1.0; communities.push({ id: commId, label, heuristicLabel: label, cohesion, symbolCount: nodeIds.length, }); } return communities.sort((a, b) => b.symbolCount - a.symbolCount); }; // ============================================================================ // AGGREGATION // ============================================================================ /** * @brief Aggregate raw Leiden communities by heuristicLabel * @param {CommunityNode[]} communities - Raw community nodes from Leiden detection * @returns {AggregatedCommunity[]} Aggregated communities grouped by label */ const aggregateCommunities = (communities: CommunityNode[]): AggregatedCommunity[] => { const groups = new Map<string, { rawIds: string[]; totalSymbols: number; weightedCohesion: number; }>(); for (const c of communities) { const label = c.heuristicLabel || c.label || 'Unknown'; const symbols = c.symbolCount || 0; const cohesion = c.cohesion || 0; const existing = groups.get(label); if (!existing) { groups.set(label, { rawIds: [c.id], totalSymbols: symbols, weightedCohesion: cohesion * symbols, }); } else { existing.rawIds.push(c.id); existing.totalSymbols += symbols; existing.weightedCohesion += cohesion * symbols; } } return Array.from(groups.entries()).map(([label, g]) => ({ label, rawIds: g.rawIds, symbolCount: g.totalSymbols, cohesion: g.totalSymbols > 0 ? g.weightedCohesion / g.totalSymbols : 0, })); }; // ============================================================================ // LOOKUP MAP BUILDERS // ============================================================================ /** * @brief Build a map from communityId to member nodeIds * @param {CommunityMembership[]} memberships - All membership records * @returns {Map<string, string[]>} Map of communityId -> nodeId[] */ const buildMembershipMap = (memberships: CommunityMembership[]): Map<string, string[]> => { const map = new Map<string, string[]>(); for (const m of memberships) { const arr = map.get(m.communityId); if (arr) { arr.push(m.nodeId); } else { map.set(m.communityId, [m.nodeId]); } } return map; }; /** * @brief Build a map from nodeId to aggregated community label * @param {CommunityMembership[]} memberships - All membership records * @param {CommunityNode[]} communities - Community nodes with labels * @returns {Map<string, string>} Map of nodeId -> community label */ const buildNodeCommunityLabelMap = ( memberships: CommunityMembership[], communities: CommunityNode[] ): Map<string, string> => { const commIdToLabel = new Map<string, string>(); for (const c of communities) { commIdToLabel.set(c.id, c.heuristicLabel || c.label || 'Unknown'); } const map = new Map<string, string>(); for (const m of memberships) { const label = commIdToLabel.get(m.communityId); if (label) { map.set(m.nodeId, label); } } return map; }; // ============================================================================ // DATA GATHERING // ============================================================================ /** * @brief Gather member symbols for an aggregated community * @param {string[]} rawIds - Raw community IDs belonging to this aggregated community * @param {Map<string, string[]>} membershipsByComm - communityId -> nodeIds * @param {KnowledgeGraph} graph - The knowledge graph * @returns {MemberSymbol[]} Array of member symbol information */ const gatherMembers = ( rawIds: string[], membershipsByComm: Map<string, string[]>, graph: KnowledgeGraph ): MemberSymbol[] => { const seen = new Set<string>(); const members: MemberSymbol[] = []; for (const commId of rawIds) { const nodeIds = membershipsByComm.get(commId) || []; for (const nodeId of nodeIds) { if (seen.has(nodeId)) continue; seen.add(nodeId); const node = graph.getNode(nodeId); if (!node) continue; members.push({ id: node.id, name: node.properties.name, label: node.label, filePath: node.properties.filePath || '', startLine: node.properties.startLine || 0, isExported: node.properties.isExported === true, }); } } return members; }; /** * @brief Gather deduplicated file info with per-file symbol names * @param {MemberSymbol[]} members - Member symbols * @param {string} repoPath - Repository root for relative path computation * @returns {FileInfo[]} Sorted by symbol count descending */ const gatherFiles = (members: MemberSymbol[], repoPath: string): FileInfo[] => { const fileMap = new Map<string, string[]>(); for (const m of members) { if (!m.filePath) continue; const rel = toRelativePath(m.filePath, repoPath); const arr = fileMap.get(rel); if (arr) { arr.push(m.name); } else { fileMap.set(rel, [m.name]); } } return Array.from(fileMap.entries()) .map(([relativePath, symbols]) => ({ relativePath, symbols })) .sort((a, b) => b.symbols.length - a.symbols.length); }; /** * @brief Gather exported entry points prioritized by type * @param {MemberSymbol[]} members - Member symbols * @returns {MemberSymbol[]} Exported symbols sorted by type priority */ const gatherEntryPoints = (members: MemberSymbol[]): MemberSymbol[] => { const typePriority: Record<string, number> = { Function: 0, Class: 1, Method: 2, Interface: 3, }; return members .filter(m => m.isExported) .sort((a, b) => { const pa = typePriority[a.label] ?? 99; const pb = typePriority[b.label] ?? 99; return pa - pb; }); }; /** * @brief Gather execution flows touching this community * @param {string[]} rawIds - Raw community IDs for this aggregated community * @param {ProcessNode[]} processes - All detected processes * @returns {ProcessNode[]} Processes whose communities intersect rawIds, sorted by stepCount */ const gatherFlows = (rawIds: string[], processes: ProcessNode[]): ProcessNode[] => { const rawIdSet = new Set(rawIds); return processes .filter(proc => proc.communities.some(cid => rawIdSet.has(cid))) .sort((a, b) => b.stepCount - a.stepCount); }; /** * @brief Gather cross-community call connections * @param {string[]} rawIds - Raw community IDs for this aggregated community * @param {string} ownLabel - This community's aggregated label * @param {Map<string, string[]>} membershipsByComm - communityId -> nodeIds * @param {Map<string, string>} nodeIdToCommunityLabel - nodeId -> community label * @param {KnowledgeGraph} graph - The knowledge graph * @returns {CrossConnection[]} Aggregated cross-community connections sorted by count */ const gatherCrossConnections = ( rawIds: string[], ownLabel: string, membershipsByComm: Map<string, string[]>, nodeIdToCommunityLabel: Map<string, string>, graph: KnowledgeGraph ): CrossConnection[] => { // Collect all node IDs in this aggregated community const ownNodeIds = new Set<string>(); for (const commId of rawIds) { const nodeIds = membershipsByComm.get(commId) || []; for (const nid of nodeIds) { ownNodeIds.add(nid); } } // Count outgoing CALLS to nodes in different communities const targetCounts = new Map<string, number>(); graph.forEachRelationship(rel => { if (rel.type !== 'CALLS') return; if (!ownNodeIds.has(rel.sourceId)) return; if (ownNodeIds.has(rel.targetId)) return; // same community const targetLabel = nodeIdToCommunityLabel.get(rel.targetId); if (!targetLabel || targetLabel === ownLabel) return; targetCounts.set(targetLabel, (targetCounts.get(targetLabel) || 0) + 1); }); return Array.from(targetCounts.entries()) .map(([targetLabel, count]) => ({ targetLabel, count })) .sort((a, b) => b.count - a.count); }; // ============================================================================ // MARKDOWN RENDERING // ============================================================================ /** * @brief Render SKILL.md content for a single community * @param {AggregatedCommunity} community - The aggregated community data * @param {string} projectName - Project name for the description * @param {MemberSymbol[]} members - All member symbols * @param {FileInfo[]} files - File info with symbol names * @param {MemberSymbol[]} entryPoints - Exported entry point symbols * @param {ProcessNode[]} flows - Execution flows touching this community * @param {CrossConnection[]} connections - Cross-community connections * @param {string} kebabName - Kebab-case name for the skill * @returns {string} Full SKILL.md content */ const renderSkillMarkdown = ( community: AggregatedCommunity, projectName: string, members: MemberSymbol[], files: FileInfo[], entryPoints: MemberSymbol[], flows: ProcessNode[], connections: CrossConnection[], kebabName: string ): string => { const cohesionPct = Math.round(community.cohesion * 100); // Dominant directory: most common top-level directory const dominantDir = getDominantDirectory(files); // Top symbol names for "When to Use" const topNames = entryPoints.slice(0, 3).map(e => e.name); if (topNames.length === 0) { // Fallback to any members topNames.push(...members.slice(0, 3).map(m => m.name)); } const lines: string[] = []; // Frontmatter lines.push('---'); lines.push(`name: ${kebabName}`); lines.push(`description: "Skill for the ${community.label} area of ${projectName}. ${community.symbolCount} symbols across ${files.length} files."`); lines.push('---'); lines.push(''); // Title lines.push(`# ${community.label}`); lines.push(''); lines.push(`${community.symbolCount} symbols | ${files.length} files | Cohesion: ${cohesionPct}%`); lines.push(''); // When to Use lines.push('## When to Use'); lines.push(''); if (dominantDir) { lines.push(`- Working with code in \`${dominantDir}/\``); } if (topNames.length > 0) { lines.push(`- Understanding how ${topNames.join(', ')} work`); } lines.push(`- Modifying ${community.label.toLowerCase()}-related functionality`); lines.push(''); // Key Files (top 10) lines.push('## Key Files'); lines.push(''); lines.push('| File | Symbols |'); lines.push('|------|---------|'); for (const f of files.slice(0, 10)) { const symbolList = f.symbols.slice(0, 5).join(', '); const suffix = f.symbols.length > 5 ? ` (+${f.symbols.length - 5})` : ''; lines.push(`| \`${f.relativePath}\` | ${symbolList}${suffix} |`); } lines.push(''); // Entry Points (top 5) if (entryPoints.length > 0) { lines.push('## Entry Points'); lines.push(''); lines.push('Start here when exploring this area:'); lines.push(''); for (const ep of entryPoints.slice(0, 5)) { lines.push(`- **\`${ep.name}\`** (${ep.label}) \u2014 \`${ep.filePath}:${ep.startLine}\``); } lines.push(''); } // Key Symbols (top 20, exported first, then by type) lines.push('## Key Symbols'); lines.push(''); lines.push('| Symbol | Type | File | Line |'); lines.push('|--------|------|------|------|'); const sortedMembers = [...members].sort((a, b) => { if (a.isExported !== b.isExported) return a.isExported ? -1 : 1; return a.label.localeCompare(b.label); }); for (const m of sortedMembers.slice(0, 20)) { lines.push(`| \`${m.name}\` | ${m.label} | \`${m.filePath}\` | ${m.startLine} |`); } lines.push(''); // Execution Flows if (flows.length > 0) { lines.push('## Execution Flows'); lines.push(''); lines.push('| Flow | Type | Steps |'); lines.push('|------|------|-------|'); for (const f of flows.slice(0, 10)) { lines.push(`| \`${f.heuristicLabel}\` | ${f.processType} | ${f.stepCount} |`); } lines.push(''); } // Connected Areas if (connections.length > 0) { lines.push('## Connected Areas'); lines.push(''); lines.push('| Area | Connections |'); lines.push('|------|-------------|'); for (const c of connections.slice(0, 8)) { lines.push(`| ${c.targetLabel} | ${c.count} calls |`); } lines.push(''); } // How to Explore const firstEntry = entryPoints.length > 0 ? entryPoints[0].name : (members.length > 0 ? members[0].name : community.label); lines.push('## How to Explore'); lines.push(''); lines.push(`1. \`gitnexus_context({name: "${firstEntry}"})\` \u2014 see callers and callees`); lines.push(`2. \`gitnexus_query({query: "${community.label.toLowerCase()}"})\` \u2014 find related execution flows`); lines.push('3. Read key files listed above for implementation details'); lines.push(''); return lines.join('\n'); }; // ============================================================================ // UTILITY HELPERS // ============================================================================ /** * @brief Convert a community label to a kebab-case directory name * @param {string} label - The community label * @param {Set<string>} usedNames - Already-used names for collision detection * @returns {string} Unique kebab-case name capped at 50 characters */ const toKebabName = (label: string, usedNames: Set<string>): string => { let name = label .toLowerCase() .replace(/[^a-z0-9]+/g, '-') .replace(/^-+|-+$/g, '') .slice(0, 50); if (!name) name = 'skill'; let candidate = name; let counter = 2; while (usedNames.has(candidate)) { candidate = `${name}-${counter}`; counter++; } return candidate; }; /** * @brief Convert an absolute or repo-relative file path to a clean relative path * @param {string} filePath - The file path from the graph node * @param {string} repoPath - Repository root path * @returns {string} Relative path using forward slashes */ const toRelativePath = (filePath: string, repoPath: string): string => { // Normalize to forward slashes for cross-platform consistency const normalizedFile = filePath.replace(/\\/g, '/'); const normalizedRepo = repoPath.replace(/\\/g, '/'); if (normalizedFile.startsWith(normalizedRepo)) { return normalizedFile.slice(normalizedRepo.length).replace(/^\//, ''); } // Already relative or different root return normalizedFile.replace(/^\//, ''); }; /** * @brief Find the dominant (most common) top-level directory across files * @param {FileInfo[]} files - File info entries * @returns {string | null} Most common directory or null */ const getDominantDirectory = (files: FileInfo[]): string | null => { const dirCounts = new Map<string, number>(); for (const f of files) { const parts = f.relativePath.split('/'); if (parts.length >= 2) { const dir = parts[0]; dirCounts.set(dir, (dirCounts.get(dir) || 0) + f.symbols.length); } } let best: string | null = null; let bestCount = 0; for (const [dir, count] of dirCounts) { if (count > bestCount) { bestCount = count; best = dir; } } return best; }; ================================================ FILE: gitnexus/src/cli/status.ts ================================================ /** * Status Command * * Shows the indexing status of the current repository. */ import { findRepo, getStoragePaths, hasKuzuIndex } from '../storage/repo-manager.js'; import { getCurrentCommit, isGitRepo, getGitRoot } from '../storage/git.js'; export const statusCommand = async () => { const cwd = process.cwd(); if (!isGitRepo(cwd)) { console.log('Not a git repository.'); return; } const repo = await findRepo(cwd); if (!repo) { // Check if there's a stale KuzuDB index that needs migration const repoRoot = getGitRoot(cwd) ?? cwd; const { storagePath } = getStoragePaths(repoRoot); if (await hasKuzuIndex(storagePath)) { console.log('Repository has a stale KuzuDB index from a previous version.'); console.log('Run: gitnexus analyze (rebuilds the index with LadybugDB)'); } else { console.log('Repository not indexed.'); console.log('Run: gitnexus analyze'); } return; } const currentCommit = getCurrentCommit(repo.repoPath); const isUpToDate = currentCommit === repo.meta.lastCommit; console.log(`Repository: ${repo.repoPath}`); console.log(`Indexed: ${new Date(repo.meta.indexedAt).toLocaleString()}`); console.log(`Indexed commit: ${repo.meta.lastCommit?.slice(0, 7)}`); console.log(`Current commit: ${currentCommit?.slice(0, 7)}`); console.log(`Status: ${isUpToDate ? '✅ up-to-date' : '⚠️ stale (re-run gitnexus analyze)'}`); }; ================================================ FILE: gitnexus/src/cli/tool.ts ================================================ /** * Direct CLI Tool Commands * * Exposes GitNexus tools (query, context, impact, cypher) as direct CLI commands. * Bypasses MCP entirely — invokes LocalBackend directly for minimal overhead. * * Usage: * gitnexus query "authentication flow" * gitnexus context --name "validateUser" * gitnexus impact --target "AuthService" --direction upstream * gitnexus cypher "MATCH (n:Function) RETURN n.name LIMIT 10" * * Note: Output goes to stdout via fs.writeSync(fd 1), bypassing LadybugDB's * native module which captures the Node.js process.stdout stream during init. * See the output() function for details (#324). */ import { writeSync } from 'node:fs'; import { LocalBackend } from '../mcp/local/local-backend.js'; let _backend: LocalBackend | null = null; async function getBackend(): Promise<LocalBackend> { if (_backend) return _backend; _backend = new LocalBackend(); const ok = await _backend.init(); if (!ok) { console.error('GitNexus: No indexed repositories found. Run: gitnexus analyze'); process.exit(1); } return _backend; } /** * Write tool output to stdout using low-level fd write. * * LadybugDB's native module captures Node.js process.stdout during init, * but the underlying OS file descriptor 1 (stdout) remains intact. * By using fs.writeSync(1, ...) we bypass the Node.js stream layer * and write directly to the real stdout fd (#324). * * Falls back to stderr if the fd write fails (e.g., broken pipe). */ function output(data: any): void { const text = typeof data === 'string' ? data : JSON.stringify(data, null, 2); try { writeSync(1, text + '\n'); } catch (err: any) { if (err?.code === 'EPIPE') { // Consumer closed the pipe (e.g., `gitnexus cypher ... | head -1`) // Exit cleanly per Unix convention process.exit(0); } // Fallback: stderr (previous behavior, works on all platforms) process.stderr.write(text + '\n'); } } export async function queryCommand(queryText: string, options?: { repo?: string; context?: string; goal?: string; limit?: string; content?: boolean; }): Promise<void> { if (!queryText?.trim()) { console.error('Usage: gitnexus query <search_query>'); process.exit(1); } const backend = await getBackend(); const result = await backend.callTool('query', { query: queryText, task_context: options?.context, goal: options?.goal, limit: options?.limit ? parseInt(options.limit) : undefined, include_content: options?.content ?? false, repo: options?.repo, }); output(result); } export async function contextCommand(name: string, options?: { repo?: string; file?: string; uid?: string; content?: boolean; }): Promise<void> { if (!name?.trim() && !options?.uid) { console.error('Usage: gitnexus context <symbol_name> [--uid <uid>] [--file <path>]'); process.exit(1); } const backend = await getBackend(); const result = await backend.callTool('context', { name: name || undefined, uid: options?.uid, file_path: options?.file, include_content: options?.content ?? false, repo: options?.repo, }); output(result); } export async function impactCommand(target: string, options?: { direction?: string; repo?: string; depth?: string; includeTests?: boolean; }): Promise<void> { if (!target?.trim()) { console.error('Usage: gitnexus impact <symbol_name> [--direction upstream|downstream]'); process.exit(1); } try { const backend = await getBackend(); const result = await backend.callTool('impact', { target, direction: options?.direction || 'upstream', maxDepth: options?.depth ? parseInt(options.depth, 10) : undefined, includeTests: options?.includeTests ?? false, repo: options?.repo, }); output(result); } catch (err: unknown) { // Belt-and-suspenders: catch infrastructure failures (getBackend, callTool transport) // The backend's impact() already returns structured errors for graph query failures output({ error: (err instanceof Error ? err.message : String(err)) || 'Impact analysis failed unexpectedly', target: { name: target }, direction: options?.direction || 'upstream', suggestion: 'Try reducing --depth or using gitnexus context <symbol> as a fallback', }); process.exit(1); } } export async function cypherCommand(query: string, options?: { repo?: string; }): Promise<void> { if (!query?.trim()) { console.error('Usage: gitnexus cypher <cypher_query>'); process.exit(1); } const backend = await getBackend(); const result = await backend.callTool('cypher', { query, repo: options?.repo, }); output(result); } ================================================ FILE: gitnexus/src/cli/wiki.ts ================================================ /** * Wiki Command * * Generates repository documentation from the knowledge graph. * Usage: gitnexus wiki [path] [options] */ import path from 'path'; import readline from 'readline'; import { execSync, execFileSync } from 'child_process'; import cliProgress from 'cli-progress'; import { getGitRoot, isGitRepo } from '../storage/git.js'; import { getStoragePaths, loadMeta, loadCLIConfig, saveCLIConfig } from '../storage/repo-manager.js'; import { WikiGenerator, type WikiOptions } from '../core/wiki/generator.js'; import { resolveLLMConfig } from '../core/wiki/llm-client.js'; export interface WikiCommandOptions { force?: boolean; model?: string; baseUrl?: string; apiKey?: string; concurrency?: string; gist?: boolean; } /** * Prompt the user for input via stdin. */ function prompt(question: string, hide = false): Promise<string> { return new Promise((resolve) => { const rl = readline.createInterface({ input: process.stdin, output: process.stdout, }); if (hide && process.stdin.isTTY) { // Mask input for API keys process.stdout.write(question); let input = ''; process.stdin.setRawMode(true); process.stdin.resume(); process.stdin.setEncoding('utf-8'); const onData = (char: string) => { if (char === '\n' || char === '\r' || char === '\u0004') { process.stdin.setRawMode(false); process.stdin.removeListener('data', onData); process.stdout.write('\n'); rl.close(); resolve(input); } else if (char === '\u0003') { // Ctrl+C process.stdin.setRawMode(false); rl.close(); process.exit(1); } else if (char === '\u007F' || char === '\b') { // Backspace if (input.length > 0) { input = input.slice(0, -1); process.stdout.write('\b \b'); } } else { input += char; process.stdout.write('*'); } }; process.stdin.on('data', onData); } else { rl.question(question, (answer) => { rl.close(); resolve(answer.trim()); }); } }); } export const wikiCommand = async ( inputPath?: string, options?: WikiCommandOptions, ) => { console.log('\n GitNexus Wiki Generator\n'); // ── Resolve repo path ─────────────────────────────────────────────── let repoPath: string; if (inputPath) { repoPath = path.resolve(inputPath); } else { const gitRoot = getGitRoot(process.cwd()); if (!gitRoot) { console.log(' Error: Not inside a git repository\n'); process.exitCode = 1; return; } repoPath = gitRoot; } if (!isGitRepo(repoPath)) { console.log(' Error: Not a git repository\n'); process.exitCode = 1; return; } // ── Check for existing index ──────────────────────────────────────── const { storagePath, lbugPath } = getStoragePaths(repoPath); const meta = await loadMeta(storagePath); if (!meta) { console.log(' Error: No GitNexus index found.'); console.log(' Run `gitnexus analyze` first to index this repository.\n'); process.exitCode = 1; return; } // ── Resolve LLM config (with interactive fallback) ───────────────── // Save any CLI overrides immediately if (options?.apiKey || options?.model || options?.baseUrl) { const existing = await loadCLIConfig(); const updates: Record<string, string> = {}; if (options.apiKey) updates.apiKey = options.apiKey; if (options.model) updates.model = options.model; if (options.baseUrl) updates.baseUrl = options.baseUrl; await saveCLIConfig({ ...existing, ...updates }); console.log(' Config saved to ~/.gitnexus/config.json\n'); } const savedConfig = await loadCLIConfig(); const hasSavedConfig = !!(savedConfig.apiKey && savedConfig.baseUrl); const hasCLIOverrides = !!(options?.apiKey || options?.model || options?.baseUrl); let llmConfig = await resolveLLMConfig({ model: options?.model, baseUrl: options?.baseUrl, apiKey: options?.apiKey, }); // Run interactive setup if no saved config and no CLI flags provided // (even if env vars exist — let user explicitly choose their provider) if (!hasSavedConfig && !hasCLIOverrides) { if (!process.stdin.isTTY) { if (!llmConfig.apiKey) { console.log(' Error: No LLM API key found.'); console.log(' Set OPENAI_API_KEY or GITNEXUS_API_KEY environment variable,'); console.log(' or pass --api-key <key>.\n'); process.exitCode = 1; return; } // Non-interactive with env var — just use it } else { console.log(' No LLM configured. Let\'s set it up.\n'); console.log(' Supports OpenAI, OpenRouter, or any OpenAI-compatible API.\n'); // Provider selection console.log(' [1] OpenAI (api.openai.com)'); console.log(' [2] OpenRouter (openrouter.ai)'); console.log(' [3] Custom endpoint\n'); const choice = await prompt(' Select provider (1/2/3): '); let baseUrl: string; let defaultModel: string; if (choice === '2') { baseUrl = 'https://openrouter.ai/api/v1'; defaultModel = 'minimax/minimax-m2.5'; } else if (choice === '3') { baseUrl = await prompt(' Base URL (e.g. http://localhost:11434/v1): '); if (!baseUrl) { console.log('\n No URL provided. Aborting.\n'); process.exitCode = 1; return; } defaultModel = 'gpt-4o-mini'; } else { baseUrl = 'https://api.openai.com/v1'; defaultModel = 'gpt-4o-mini'; } // Model const modelInput = await prompt(` Model (default: ${defaultModel}): `); const model = modelInput || defaultModel; // API key — pre-fill hint if env var exists const envKey = process.env.GITNEXUS_API_KEY || process.env.OPENAI_API_KEY || ''; let key: string; if (envKey) { const masked = envKey.slice(0, 6) + '...' + envKey.slice(-4); const useEnv = await prompt(` Use existing env key (${masked})? (Y/n): `); if (!useEnv || useEnv.toLowerCase() === 'y' || useEnv.toLowerCase() === 'yes') { key = envKey; } else { key = await prompt(' API key: ', true); } } else { key = await prompt(' API key: ', true); } if (!key) { console.log('\n No key provided. Aborting.\n'); process.exitCode = 1; return; } // Save await saveCLIConfig({ apiKey: key, baseUrl, model }); console.log(' Config saved to ~/.gitnexus/config.json\n'); llmConfig = { ...llmConfig, apiKey: key, baseUrl, model }; } } // ── Setup progress bar with elapsed timer ────────────────────────── const bar = new cliProgress.SingleBar({ format: ' {bar} {percentage}% | {phase}', barCompleteChar: '\u2588', barIncompleteChar: '\u2591', hideCursor: true, barGlue: '', autopadding: true, clearOnComplete: false, stopOnComplete: false, }, cliProgress.Presets.shades_grey); bar.start(100, 0, { phase: 'Initializing...' }); const t0 = Date.now(); let lastPhase = ''; let phaseStart = t0; // Tick elapsed time every second while stuck on the same phase const elapsedTimer = setInterval(() => { if (lastPhase) { const elapsed = Math.round((Date.now() - phaseStart) / 1000); if (elapsed >= 3) { bar.update({ phase: `${lastPhase} (${elapsed}s)` }); } } }, 1000); // ── Run generator ─────────────────────────────────────────────────── const wikiOptions: WikiOptions = { force: options?.force, model: options?.model, baseUrl: options?.baseUrl, concurrency: options?.concurrency ? parseInt(options.concurrency, 10) : undefined, }; const generator = new WikiGenerator( repoPath, storagePath, lbugPath, llmConfig, wikiOptions, (phase, percent, detail) => { const label = detail || phase; if (label !== lastPhase) { lastPhase = label; phaseStart = Date.now(); } bar.update(percent, { phase: label }); }, ); try { const result = await generator.run(); clearInterval(elapsedTimer); bar.update(100, { phase: 'Done' }); bar.stop(); const elapsed = ((Date.now() - t0) / 1000).toFixed(1); const wikiDir = path.join(storagePath, 'wiki'); const viewerPath = path.join(wikiDir, 'index.html'); if (result.mode === 'up-to-date' && !options?.force) { console.log('\n Wiki is already up to date.'); console.log(` Viewer: ${viewerPath}\n`); await maybePublishGist(viewerPath, options?.gist); return; } console.log(`\n Wiki generated successfully (${elapsed}s)\n`); console.log(` Mode: ${result.mode}`); console.log(` Pages: ${result.pagesGenerated}`); console.log(` Output: ${wikiDir}`); console.log(` Viewer: ${viewerPath}`); if (result.failedModules && result.failedModules.length > 0) { console.log(`\n Failed modules (${result.failedModules.length}):`); for (const mod of result.failedModules) { console.log(` - ${mod}`); } console.log(' Re-run to retry failed modules (pages will be regenerated).'); } console.log(''); await maybePublishGist(viewerPath, options?.gist); } catch (err: any) { clearInterval(elapsedTimer); bar.stop(); if (err.message?.includes('No source files')) { console.log(`\n ${err.message}\n`); } else if (err.message?.includes('API key') || err.message?.includes('API error')) { console.log(`\n LLM Error: ${err.message}\n`); // Offer to reconfigure on auth-related failures const isAuthError = err.message?.includes('401') || err.message?.includes('403') || err.message?.includes('502') || err.message?.includes('authenticate') || err.message?.includes('Unauthorized'); if (isAuthError && process.stdin.isTTY) { const answer = await new Promise<string>((resolve) => { const rl = readline.createInterface({ input: process.stdin, output: process.stdout }); rl.question(' Reconfigure LLM settings? (Y/n): ', (ans) => { rl.close(); resolve(ans.trim().toLowerCase()); }); }); if (!answer || answer === 'y' || answer === 'yes') { // Clear saved config so next run triggers interactive setup await saveCLIConfig({}); console.log(' Config cleared. Run `gitnexus wiki` again to reconfigure.\n'); } } } else { console.log(`\n Error: ${err.message}\n`); if (process.env.DEBUG) { console.error(err); } } process.exitCode = 1; } }; // ─── Gist Publishing ─────────────────────────────────────────────────── function hasGhCLI(): boolean { try { execSync('gh --version', { stdio: 'ignore' }); return true; } catch { return false; } } function publishGist(htmlPath: string): { url: string; rawUrl: string } | null { try { const output = execFileSync('gh', [ 'gist', 'create', htmlPath, '--desc', 'Repository Wiki — generated by GitNexus', '--public', ], { encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe'] }).trim(); // gh gist create prints the gist URL as the last line const lines = output.split('\n'); const gistUrl = lines.find(l => l.includes('gist.github.com')) || lines[lines.length - 1]; if (!gistUrl || !gistUrl.includes('gist.github.com')) return null; // Build a raw viewer URL via gist.githack.com // gist URL format: https://gist.github.com/{user}/{id} const match = gistUrl.match(/gist\.github\.com\/([^/]+)\/([a-f0-9]+)/); let rawUrl = gistUrl; if (match) { rawUrl = `https://gistcdn.githack.com/${match[1]}/${match[2]}/raw/index.html`; } return { url: gistUrl.trim(), rawUrl }; } catch { return null; } } async function maybePublishGist(htmlPath: string, gistFlag?: boolean): Promise<void> { if (gistFlag === false) return; // Check that the HTML file exists try { const fs = await import('fs/promises'); await fs.access(htmlPath); } catch { return; } if (!hasGhCLI()) { if (gistFlag) { console.log(' GitHub CLI (gh) is not installed. Cannot publish gist.'); console.log(' Install it: https://cli.github.com\n'); } return; } let shouldPublish = !!gistFlag; if (!shouldPublish && process.stdin.isTTY) { const answer = await new Promise<string>((resolve) => { const rl = readline.createInterface({ input: process.stdin, output: process.stdout }); rl.question(' Publish wiki as a GitHub Gist for easy viewing? (Y/n): ', (ans) => { rl.close(); resolve(ans.trim().toLowerCase()); }); }); shouldPublish = !answer || answer === 'y' || answer === 'yes'; } if (!shouldPublish) return; console.log('\n Publishing to GitHub Gist...'); const result = publishGist(htmlPath); if (result) { console.log(` Gist: ${result.url}`); console.log(` Viewer: ${result.rawUrl}\n`); } else { console.log(' Failed to publish gist. Make sure `gh auth login` is configured.\n'); } } ================================================ FILE: gitnexus/src/config/ignore-service.ts ================================================ import ignore, { type Ignore } from 'ignore'; import fs from 'fs/promises'; import nodePath from 'path'; import type { Path } from 'path-scurry'; const DEFAULT_IGNORE_LIST = new Set([ // Version Control '.git', '.svn', '.hg', '.bzr', // IDEs & Editors '.idea', '.vscode', '.vs', '.eclipse', '.settings', '.DS_Store', 'Thumbs.db', // Dependencies 'node_modules', 'bower_components', 'jspm_packages', 'vendor', // PHP/Go // 'packages' removed - commonly used for monorepo source code (lerna, pnpm, yarn workspaces) 'venv', '.venv', 'env', '.env', '__pycache__', '.pytest_cache', '.mypy_cache', 'site-packages', '.tox', 'eggs', '.eggs', 'lib64', 'parts', 'sdist', 'wheels', // Build Outputs 'dist', 'build', 'out', 'output', 'bin', 'obj', 'target', // Java/Rust '.next', '.nuxt', '.output', '.vercel', '.netlify', '.serverless', '_build', 'public/build', '.parcel-cache', '.turbo', '.svelte-kit', // Test & Coverage 'coverage', '.nyc_output', 'htmlcov', '.coverage', '__tests__', // Often just test files '__mocks__', '.jest', // Logs & Temp 'logs', 'log', 'tmp', 'temp', 'cache', '.cache', '.tmp', '.temp', // Generated/Compiled '.generated', 'generated', 'auto-generated', '.terraform', '.serverless', // Documentation (optional - might want to keep) // 'docs', // 'documentation', // Misc '.husky', '.github', // GitHub config, not code '.circleci', '.gitlab', 'fixtures', // Test fixtures 'snapshots', // Jest snapshots '__snapshots__', ]); const IGNORED_EXTENSIONS = new Set([ // Images '.png', '.jpg', '.jpeg', '.gif', '.svg', '.ico', '.webp', '.bmp', '.tiff', '.tif', '.psd', '.ai', '.sketch', '.fig', '.xd', // Archives '.zip', '.tar', '.gz', '.rar', '.7z', '.bz2', '.xz', '.tgz', // Binary/Compiled '.exe', '.dll', '.so', '.dylib', '.a', '.lib', '.o', '.obj', '.class', '.jar', '.war', '.ear', '.pyc', '.pyo', '.pyd', '.beam', // Erlang '.wasm', // WebAssembly - important! '.node', // Native Node addons // Documents '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.odt', '.ods', '.odp', // Media '.mp4', '.mp3', '.wav', '.mov', '.avi', '.mkv', '.flv', '.wmv', '.ogg', '.webm', '.flac', '.aac', '.m4a', // Fonts '.woff', '.woff2', '.ttf', '.eot', '.otf', // Databases '.db', '.sqlite', '.sqlite3', '.mdb', '.accdb', // Minified/Bundled files '.min.js', '.min.css', '.bundle.js', '.chunk.js', // Source maps (debug files, not source) '.map', // Lock files (handled separately, but also here) '.lock', // Certificates & Keys (security - don't index!) '.pem', '.key', '.crt', '.cer', '.p12', '.pfx', // Data files (often large/binary) '.csv', '.tsv', '.parquet', '.avro', '.feather', '.npy', '.npz', '.pkl', '.pickle', '.h5', '.hdf5', // Misc binary '.bin', '.dat', '.data', '.raw', '.iso', '.img', '.dmg', ]); // Files to ignore by exact name const IGNORED_FILES = new Set([ 'package-lock.json', 'yarn.lock', 'pnpm-lock.yaml', 'composer.lock', 'Gemfile.lock', 'poetry.lock', 'Cargo.lock', 'go.sum', '.gitignore', '.gitattributes', '.npmrc', '.yarnrc', '.editorconfig', '.prettierrc', '.prettierignore', '.eslintignore', '.dockerignore', 'Thumbs.db', '.DS_Store', 'LICENSE', 'LICENSE.md', 'LICENSE.txt', 'CHANGELOG.md', 'CHANGELOG', 'CONTRIBUTING.md', 'CODE_OF_CONDUCT.md', 'SECURITY.md', '.env', '.env.local', '.env.development', '.env.production', '.env.test', '.env.example', ]); // NOTE: Negation patterns in .gitnexusignore (e.g. `!vendor/`) cannot override // entries in DEFAULT_IGNORE_LIST — this is intentional. The hardcoded list protects // against indexing directories that are almost never source code (node_modules, .git, etc.). // Users who need to include such directories should remove them from the hardcoded list. export const shouldIgnorePath = (filePath: string): boolean => { const normalizedPath = filePath.replace(/\\/g, '/'); const parts = normalizedPath.split('/'); const fileName = parts[parts.length - 1]; const fileNameLower = fileName.toLowerCase(); // Check if any path segment is in ignore list for (const part of parts) { if (DEFAULT_IGNORE_LIST.has(part)) { return true; } } // Check exact filename matches if (IGNORED_FILES.has(fileName) || IGNORED_FILES.has(fileNameLower)) { return true; } // Check extension const lastDotIndex = fileNameLower.lastIndexOf('.'); if (lastDotIndex !== -1) { const ext = fileNameLower.substring(lastDotIndex); if (IGNORED_EXTENSIONS.has(ext)) return true; // Handle compound extensions like .min.js, .bundle.js const secondLastDot = fileNameLower.lastIndexOf('.', lastDotIndex - 1); if (secondLastDot !== -1) { const compoundExt = fileNameLower.substring(secondLastDot); if (IGNORED_EXTENSIONS.has(compoundExt)) return true; } } // Ignore hidden files (starting with .) if (fileName.startsWith('.') && fileName !== '.') { // But allow some important config files const allowedDotFiles = ['.env', '.gitignore']; // Already in IGNORED_FILES, so this is redundant // Actually, let's NOT ignore all dot files - many are important configs // Just rely on the explicit lists above } // Ignore files that look like generated/bundled code if (fileNameLower.includes('.bundle.') || fileNameLower.includes('.chunk.') || fileNameLower.includes('.generated.') || fileNameLower.endsWith('.d.ts')) { // TypeScript declaration files return true; } return false; } /** Check if a directory name is in the hardcoded ignore list */ export const isHardcodedIgnoredDirectory = (name: string): boolean => { return DEFAULT_IGNORE_LIST.has(name); }; /** * Load .gitignore and .gitnexusignore rules from the repo root. * Returns an `ignore` instance with all patterns, or null if no files found. */ export interface IgnoreOptions { /** Skip .gitignore parsing, only read .gitnexusignore. Defaults to GITNEXUS_NO_GITIGNORE env var. */ noGitignore?: boolean; } export const loadIgnoreRules = async ( repoPath: string, options?: IgnoreOptions ): Promise<Ignore | null> => { const ig = ignore(); let hasRules = false; // Allow users to bypass .gitignore parsing (e.g. when .gitignore accidentally excludes source files) const skipGitignore = options?.noGitignore ?? !!process.env.GITNEXUS_NO_GITIGNORE; const filenames = skipGitignore ? ['.gitnexusignore'] : ['.gitignore', '.gitnexusignore']; for (const filename of filenames) { try { const content = await fs.readFile(nodePath.join(repoPath, filename), 'utf-8'); ig.add(content); hasRules = true; } catch (err: unknown) { const code = (err as NodeJS.ErrnoException).code; if (code !== 'ENOENT') { console.warn(` Warning: could not read ${filename}: ${(err as Error).message}`); } } } return hasRules ? ig : null; }; /** * Create a glob-compatible ignore filter combining: * - .gitignore / .gitnexusignore patterns (via `ignore` package) * - Hardcoded DEFAULT_IGNORE_LIST, IGNORED_EXTENSIONS, IGNORED_FILES * * Returns an IgnoreLike object for glob's `ignore` option, * enabling directory-level pruning during traversal. */ export const createIgnoreFilter = async (repoPath: string, options?: IgnoreOptions) => { const ig = await loadIgnoreRules(repoPath, options); return { ignored(p: Path): boolean { // path-scurry's Path.relative() returns POSIX paths on all platforms, // which is what the `ignore` package expects. No explicit normalization needed. const rel = p.relative(); if (!rel) return false; // Check .gitignore / .gitnexusignore patterns if (ig && ig.ignores(rel)) return true; // Fall back to hardcoded rules return shouldIgnorePath(rel); }, childrenIgnored(p: Path): boolean { // Fast path: check directory name against hardcoded list. // Note: dot-directories (.git, .vscode, etc.) are primarily excluded by // glob's `dot: false` option in filesystem-walker.ts. This check is // defense-in-depth — do not remove `dot: false` assuming this covers it. if (DEFAULT_IGNORE_LIST.has(p.name)) return true; // Check against .gitignore / .gitnexusignore patterns. // Test both bare path and path with trailing slash to handle // bare-name patterns (e.g. `local`) and dir-only patterns (e.g. `local/`). if (ig) { const rel = p.relative(); if (rel && (ig.ignores(rel) || ig.ignores(rel + '/'))) return true; } return false; }, }; }; ================================================ FILE: gitnexus/src/config/supported-languages.ts ================================================ export enum SupportedLanguages { JavaScript = 'javascript', TypeScript = 'typescript', Python = 'python', Java = 'java', C = 'c', CPlusPlus = 'cpp', CSharp = 'csharp', Go = 'go', Ruby = 'ruby', Rust = 'rust', PHP = 'php', Kotlin = 'kotlin', Swift = 'swift', } ================================================ FILE: gitnexus/src/core/augmentation/engine.ts ================================================ /** * Augmentation Engine * * Lightweight, fast-path enrichment of search patterns with knowledge graph context. * Designed to be called from platform hooks (Claude Code PreToolUse, Cursor beforeShellExecution) * when an agent runs grep/glob/search. * * Performance target: <500ms cold start, <200ms warm. * * Design decisions: * - Uses only BM25 search (no semantic/embedding) for speed * - Clusters used internally for ranking, NEVER in output * - Output is pure relationships: callers, callees, process participation * - Graceful failure: any error → return empty string */ import path from 'path'; import { listRegisteredRepos } from '../../storage/repo-manager.js'; /** * Find the best matching repo for a given working directory. * Matches by checking if cwd is within the repo's path. */ async function findRepoForCwd(cwd: string): Promise<{ name: string; storagePath: string; lbugPath: string; } | null> { try { const entries = await listRegisteredRepos({ validate: true }); const resolved = path.resolve(cwd); // Normalize to lowercase on Windows (drive letters can differ: D: vs d:) const isWindows = process.platform === 'win32'; const normalizedCwd = isWindows ? resolved.toLowerCase() : resolved; const sep = path.sep; // Find the LONGEST matching repo path (most specific match wins) let bestMatch: typeof entries[0] | null = null; let bestLen = 0; for (const entry of entries) { const repoResolved = path.resolve(entry.path); const normalizedRepo = isWindows ? repoResolved.toLowerCase() : repoResolved; // Check if cwd is inside repo OR repo is inside cwd // Must match at a path separator boundary to avoid false positives // (e.g. /projects/gitnexusv2 should NOT match /projects/gitnexus) let matched = false; if (normalizedCwd === normalizedRepo) { matched = true; } else if (normalizedCwd.startsWith(normalizedRepo + sep)) { matched = true; } else if (normalizedRepo.startsWith(normalizedCwd + sep)) { matched = true; } if (matched && normalizedRepo.length > bestLen) { bestMatch = entry; bestLen = normalizedRepo.length; } } if (!bestMatch) return null; return { name: bestMatch.name, storagePath: bestMatch.storagePath, lbugPath: path.join(bestMatch.storagePath, 'lbug'), }; } catch { return null; } } /** * Augment a search pattern with knowledge graph context. * * 1. BM25 search for the pattern * 2. For top matches, fetch callers/callees/processes * 3. Rank by internal cluster cohesion (not exposed) * 4. Format as structured text block * * Returns empty string on any error (graceful failure). */ export async function augment(pattern: string, cwd?: string): Promise<string> { if (!pattern || pattern.length < 3) return ''; const workDir = cwd || process.cwd(); try { const repo = await findRepoForCwd(workDir); if (!repo) return ''; // Lazy-load lbug adapter (skip unnecessary init) const { initLbug, executeQuery, isLbugReady } = await import('../../mcp/core/lbug-adapter.js'); const { searchFTSFromLbug } = await import('../search/bm25-index.js'); const repoId = repo.name.toLowerCase(); // Init LadybugDB if not already if (!isLbugReady(repoId)) { await initLbug(repoId, repo.lbugPath); } // Step 1: BM25 search (fast, no embeddings) const bm25Results = await searchFTSFromLbug(pattern, 10, repoId); if (bm25Results.length === 0) return ''; // Step 2: Map BM25 file results to symbols const symbolMatches: Array<{ nodeId: string; name: string; type: string; filePath: string; score: number; }> = []; for (const result of bm25Results.slice(0, 5)) { const escaped = result.filePath.replace(/'/g, "''"); try { const symbols = await executeQuery(repoId, ` MATCH (n) WHERE n.filePath = '${escaped}' AND n.name CONTAINS '${pattern.replace(/'/g, "''").split(/\s+/)[0]}' RETURN n.id AS id, n.name AS name, labels(n)[0] AS type, n.filePath AS filePath LIMIT 3 `); for (const sym of symbols) { symbolMatches.push({ nodeId: sym.id || sym[0], name: sym.name || sym[1], type: sym.type || sym[2], filePath: sym.filePath || sym[3], score: result.score, }); } } catch { /* skip */ } } if (symbolMatches.length === 0) return ''; // Step 3: Batch-fetch callers/callees/processes/cohesion for top matches // Uses batched WHERE n.id IN [...] queries instead of per-symbol queries const uniqueSymbols = symbolMatches.slice(0, 5).filter((sym, i, arr) => arr.findIndex(s => s.nodeId === sym.nodeId) === i ); if (uniqueSymbols.length === 0) return ''; const idList = uniqueSymbols.map(s => `'${s.nodeId.replace(/'/g, "''")}'`).join(', '); // Batch fetch callers const callersMap = new Map<string, string[]>(); try { const rows = await executeQuery(repoId, ` MATCH (caller)-[:CodeRelation {type: 'CALLS'}]->(n) WHERE n.id IN [${idList}] RETURN n.id AS targetId, caller.name AS name LIMIT 15 `); for (const r of rows) { const tid = r.targetId || r[0]; const name = r.name || r[1]; if (tid && name) { if (!callersMap.has(tid)) callersMap.set(tid, []); callersMap.get(tid)!.push(name); } } } catch { /* skip */ } // Batch fetch callees const calleesMap = new Map<string, string[]>(); try { const rows = await executeQuery(repoId, ` MATCH (n)-[:CodeRelation {type: 'CALLS'}]->(callee) WHERE n.id IN [${idList}] RETURN n.id AS sourceId, callee.name AS name LIMIT 15 `); for (const r of rows) { const sid = r.sourceId || r[0]; const name = r.name || r[1]; if (sid && name) { if (!calleesMap.has(sid)) calleesMap.set(sid, []); calleesMap.get(sid)!.push(name); } } } catch { /* skip */ } // Batch fetch processes const processesMap = new Map<string, string[]>(); try { const rows = await executeQuery(repoId, ` MATCH (n)-[r:CodeRelation {type: 'STEP_IN_PROCESS'}]->(p:Process) WHERE n.id IN [${idList}] RETURN n.id AS nodeId, p.heuristicLabel AS label, r.step AS step, p.stepCount AS stepCount `); for (const r of rows) { const nid = r.nodeId || r[0]; const label = r.label || r[1]; const step = r.step || r[2]; const stepCount = r.stepCount || r[3]; if (nid && label) { if (!processesMap.has(nid)) processesMap.set(nid, []); processesMap.get(nid)!.push(`${label} (step ${step}/${stepCount})`); } } } catch { /* skip */ } // Batch fetch cohesion const cohesionMap = new Map<string, number>(); try { const rows = await executeQuery(repoId, ` MATCH (n)-[:CodeRelation {type: 'MEMBER_OF'}]->(c:Community) WHERE n.id IN [${idList}] RETURN n.id AS nodeId, c.cohesion AS cohesion `); for (const r of rows) { const nid = r.nodeId || r[0]; const coh = r.cohesion ?? r[1] ?? 0; if (nid) cohesionMap.set(nid, coh); } } catch { /* skip */ } // Assemble enriched results const enriched: Array<{ name: string; filePath: string; callers: string[]; callees: string[]; processes: string[]; cohesion: number; }> = []; for (const sym of uniqueSymbols) { enriched.push({ name: sym.name, filePath: sym.filePath, callers: (callersMap.get(sym.nodeId) || []).slice(0, 3), callees: (calleesMap.get(sym.nodeId) || []).slice(0, 3), processes: processesMap.get(sym.nodeId) || [], cohesion: cohesionMap.get(sym.nodeId) || 0, }); } if (enriched.length === 0) return ''; // Step 4: Rank by cohesion (internal signal) and format enriched.sort((a, b) => b.cohesion - a.cohesion); const lines: string[] = [`[GitNexus] ${enriched.length} related symbols found:`, '']; for (const item of enriched) { lines.push(`${item.name} (${item.filePath})`); if (item.callers.length > 0) { lines.push(` Called by: ${item.callers.join(', ')}`); } if (item.callees.length > 0) { lines.push(` Calls: ${item.callees.join(', ')}`); } if (item.processes.length > 0) { lines.push(` Flows: ${item.processes.join(', ')}`); } lines.push(''); } return lines.join('\n').trim(); } catch { // Graceful failure — never break the original tool return ''; } } ================================================ FILE: gitnexus/src/core/embeddings/embedder.ts ================================================ /** * Embedder Module * * Singleton factory for transformers.js embedding pipeline. * Handles model loading, caching, and both single and batch embedding operations. * * Uses snowflake-arctic-embed-xs by default (22M params, 384 dims, ~90MB) */ // Suppress ONNX Runtime native warnings (e.g. VerifyEachNodeIsAssignedToAnEp) // Must be set BEFORE onnxruntime-node is imported by transformers.js // Level 3 = Error only (skips Warning/Info) if (!process.env.ORT_LOG_LEVEL) { process.env.ORT_LOG_LEVEL = '3'; } import { pipeline, env, type FeatureExtractionPipeline } from '@huggingface/transformers'; import { existsSync } from 'fs'; import { execFileSync } from 'child_process'; import { join } from 'path'; import { DEFAULT_EMBEDDING_CONFIG, type EmbeddingConfig, type ModelProgress } from './types.js'; /** * Check whether CUDA libraries are actually available on this system. * ONNX Runtime's native layer crashes (uncatchable) if we attempt CUDA * without the required shared libraries, so we probe first. * * Checks the dynamic linker cache (ldconfig) which covers all architectures * and install paths, then falls back to CUDA_PATH / LD_LIBRARY_PATH env vars. */ function isCudaAvailable(): boolean { // Primary: query the dynamic linker cache — covers all architectures, // distro layouts, and custom install paths registered with ldconfig try { const out = execFileSync('ldconfig', ['-p'], { timeout: 3000, encoding: 'utf-8' }); if (out.includes('libcublasLt.so.12')) return true; } catch { // ldconfig not available (e.g. non-standard container) } // Fallback: check CUDA_PATH and LD_LIBRARY_PATH for environments where // ldconfig doesn't know about the CUDA install (conda, manual /opt/cuda, etc.) for (const envVar of ['CUDA_PATH', 'LD_LIBRARY_PATH']) { const val = process.env[envVar]; if (!val) continue; for (const dir of val.split(':').filter(Boolean)) { if (existsSync(join(dir, 'lib64', 'libcublasLt.so.12')) || existsSync(join(dir, 'lib', 'libcublasLt.so.12')) || existsSync(join(dir, 'libcublasLt.so.12'))) return true; } } return false; } // Module-level state for singleton pattern let embedderInstance: FeatureExtractionPipeline | null = null; let isInitializing = false; let initPromise: Promise<FeatureExtractionPipeline> | null = null; let currentDevice: 'dml' | 'cuda' | 'cpu' | 'wasm' | null = null; /** * Progress callback type for model loading */ export type ModelProgressCallback = (progress: ModelProgress) => void; /** * Get the current device being used for inference */ export const getCurrentDevice = (): 'dml' | 'cuda' | 'cpu' | 'wasm' | null => currentDevice; /** * Initialize the embedding model * Uses singleton pattern - only loads once, subsequent calls return cached instance * * @param onProgress - Optional callback for model download progress * @param config - Optional configuration override * @param forceDevice - Force a specific device * @returns Promise resolving to the embedder pipeline */ export const initEmbedder = async ( onProgress?: ModelProgressCallback, config: Partial<EmbeddingConfig> = {}, forceDevice?: 'dml' | 'cuda' | 'cpu' | 'wasm' ): Promise<FeatureExtractionPipeline> => { // Return existing instance if available if (embedderInstance) { return embedderInstance; } // If already initializing, wait for that promise if (isInitializing && initPromise) { return initPromise; } isInitializing = true; const finalConfig = { ...DEFAULT_EMBEDDING_CONFIG, ...config }; // On Windows, use DirectML for GPU acceleration (via DirectX12) // CUDA is only available on Linux x64 with onnxruntime-node // Probe for CUDA first — ONNX Runtime crashes (uncatchable native error) // if we attempt CUDA without the required shared libraries const isWindows = process.platform === 'win32'; const gpuDevice = isWindows ? 'dml' : (isCudaAvailable() ? 'cuda' : 'cpu'); let requestedDevice = forceDevice || (finalConfig.device === 'auto' ? gpuDevice : finalConfig.device); initPromise = (async () => { try { // Configure transformers.js environment env.allowLocalModels = false; const isDev = process.env.NODE_ENV === 'development'; if (isDev) { console.log(`🧠 Loading embedding model: ${finalConfig.modelId}`); } const progressCallback = onProgress ? (data: any) => { const progress: ModelProgress = { status: data.status || 'progress', file: data.file, progress: data.progress, loaded: data.loaded, total: data.total, }; onProgress(progress); } : undefined; // Try GPU first if auto, fall back to CPU // Windows: dml (DirectML/DirectX12), Linux: cuda const devicesToTry: Array<'dml' | 'cuda' | 'cpu' | 'wasm'> = (requestedDevice === 'dml' || requestedDevice === 'cuda') ? [requestedDevice, 'cpu'] : [requestedDevice as 'cpu' | 'wasm']; for (const device of devicesToTry) { try { if (isDev && device === 'dml') { console.log('🔧 Trying DirectML (DirectX12) GPU backend...'); } else if (isDev && device === 'cuda') { console.log('🔧 Trying CUDA GPU backend...'); } else if (isDev && device === 'cpu') { console.log('🔧 Using CPU backend...'); } else if (isDev && device === 'wasm') { console.log('🔧 Using WASM backend (slower)...'); } embedderInstance = await (pipeline as any)( 'feature-extraction', finalConfig.modelId, { device: device, dtype: 'fp32', progress_callback: progressCallback, session_options: { logSeverityLevel: 3 }, } ); currentDevice = device; if (isDev) { const label = device === 'dml' ? 'GPU (DirectML/DirectX12)' : device === 'cuda' ? 'GPU (CUDA)' : device.toUpperCase(); console.log(`✅ Using ${label} backend`); console.log('✅ Embedding model loaded successfully'); } return embedderInstance!; } catch (deviceError) { if (isDev && (device === 'cuda' || device === 'dml')) { const gpuType = device === 'dml' ? 'DirectML' : 'CUDA'; console.log(`⚠️ ${gpuType} not available, falling back to CPU...`); } // Continue to next device in list if (device === devicesToTry[devicesToTry.length - 1]) { throw deviceError; // Last device failed, propagate error } } } throw new Error('No suitable device found for embedding model'); } catch (error) { isInitializing = false; initPromise = null; embedderInstance = null; throw error; } finally { isInitializing = false; } })(); return initPromise; }; /** * Check if the embedder is initialized and ready */ export const isEmbedderReady = (): boolean => { return embedderInstance !== null; }; /** * Get the embedder instance (throws if not initialized) */ export const getEmbedder = (): FeatureExtractionPipeline => { if (!embedderInstance) { throw new Error('Embedder not initialized. Call initEmbedder() first.'); } return embedderInstance; }; /** * Embed a single text string * * @param text - Text to embed * @returns Float32Array of embedding vector (384 dimensions) */ export const embedText = async (text: string): Promise<Float32Array> => { const embedder = getEmbedder(); const result = await embedder(text, { pooling: 'mean', normalize: true, }); // Result is a Tensor, convert to Float32Array return new Float32Array(result.data as ArrayLike<number>); }; /** * Embed multiple texts in a single batch * More efficient than calling embedText multiple times * * @param texts - Array of texts to embed * @returns Array of Float32Array embedding vectors */ export const embedBatch = async (texts: string[]): Promise<Float32Array[]> => { if (texts.length === 0) { return []; } const embedder = getEmbedder(); // Process batch const result = await embedder(texts, { pooling: 'mean', normalize: true, }); // Result shape is [batch_size, dimensions] // Need to split into individual vectors const data = result.data as ArrayLike<number>; const dimensions = DEFAULT_EMBEDDING_CONFIG.dimensions; const embeddings: Float32Array[] = []; for (let i = 0; i < texts.length; i++) { const start = i * dimensions; const end = start + dimensions; embeddings.push(new Float32Array(Array.prototype.slice.call(data, start, end))); } return embeddings; }; /** * Convert Float32Array to regular number array (for LadybugDB storage) */ export const embeddingToArray = (embedding: Float32Array): number[] => { return Array.from(embedding); }; /** * Cleanup the embedder (free memory) * Call this when done with embeddings */ export const disposeEmbedder = async (): Promise<void> => { if (embedderInstance) { // transformers.js pipelines may have a dispose method try { if ('dispose' in embedderInstance && typeof embedderInstance.dispose === 'function') { await embedderInstance.dispose(); } } catch { // Ignore disposal errors } embedderInstance = null; initPromise = null; } }; ================================================ FILE: gitnexus/src/core/embeddings/embedding-pipeline.ts ================================================ /** * Embedding Pipeline Module * * Orchestrates the background embedding process: * 1. Query embeddable nodes from LadybugDB * 2. Generate text representations * 3. Batch embed using transformers.js * 4. Update LadybugDB with embeddings * 5. Create vector index for semantic search */ import { initEmbedder, embedBatch, embedText, embeddingToArray, isEmbedderReady } from './embedder.js'; import { generateBatchEmbeddingTexts, generateEmbeddingText } from './text-generator.js'; import { type EmbeddingProgress, type EmbeddingConfig, type EmbeddableNode, type SemanticSearchResult, type ModelProgress, DEFAULT_EMBEDDING_CONFIG, EMBEDDABLE_LABELS, } from './types.js'; const isDev = process.env.NODE_ENV === 'development'; /** * Progress callback type */ export type EmbeddingProgressCallback = (progress: EmbeddingProgress) => void; /** * Query all embeddable nodes from LadybugDB * Uses table-specific queries (File has different schema than code elements) */ const queryEmbeddableNodes = async ( executeQuery: (cypher: string) => Promise<any[]> ): Promise<EmbeddableNode[]> => { const allNodes: EmbeddableNode[] = []; // Query each embeddable table with table-specific columns for (const label of EMBEDDABLE_LABELS) { try { let query: string; if (label === 'File') { // File nodes don't have startLine/endLine query = ` MATCH (n:File) RETURN n.id AS id, n.name AS name, 'File' AS label, n.filePath AS filePath, n.content AS content `; } else { // Code elements have startLine/endLine query = ` MATCH (n:${label}) RETURN n.id AS id, n.name AS name, '${label}' AS label, n.filePath AS filePath, n.content AS content, n.startLine AS startLine, n.endLine AS endLine `; } const rows = await executeQuery(query); for (const row of rows) { allNodes.push({ id: row.id ?? row[0], name: row.name ?? row[1], label: row.label ?? row[2], filePath: row.filePath ?? row[3], content: row.content ?? row[4] ?? '', startLine: row.startLine ?? row[5], endLine: row.endLine ?? row[6], }); } } catch (error) { // Table might not exist or be empty, continue if (isDev) { console.warn(`Query for ${label} nodes failed:`, error); } } } return allNodes; }; /** * Batch INSERT embeddings into separate CodeEmbedding table * Using a separate lightweight table avoids copy-on-write overhead * that occurs when UPDATEing nodes with large content fields */ const batchInsertEmbeddings = async ( executeWithReusedStatement: ( cypher: string, paramsList: Array<Record<string, any>> ) => Promise<void>, updates: Array<{ id: string; embedding: number[] }> ): Promise<void> => { // INSERT into separate embedding table - much more memory efficient! const cypher = `CREATE (e:CodeEmbedding {nodeId: $nodeId, embedding: $embedding})`; const paramsList = updates.map(u => ({ nodeId: u.id, embedding: u.embedding })); await executeWithReusedStatement(cypher, paramsList); }; /** * Create the vector index for semantic search * Now indexes the separate CodeEmbedding table */ let vectorExtensionLoaded = false; const createVectorIndex = async ( executeQuery: (cypher: string) => Promise<any[]> ): Promise<void> => { // LadybugDB v0.15+ requires explicit VECTOR extension loading (once per session) if (!vectorExtensionLoaded) { try { await executeQuery('INSTALL VECTOR'); await executeQuery('LOAD EXTENSION VECTOR'); vectorExtensionLoaded = true; } catch { // Extension may already be loaded — CREATE_VECTOR_INDEX will fail clearly if not vectorExtensionLoaded = true; } } const cypher = ` CALL CREATE_VECTOR_INDEX('CodeEmbedding', 'code_embedding_idx', 'embedding', metric := 'cosine') `; try { await executeQuery(cypher); } catch (error) { // Index might already exist if (isDev) { console.warn('Vector index creation warning:', error); } } }; /** * Run the embedding pipeline * * @param executeQuery - Function to execute Cypher queries against LadybugDB * @param executeWithReusedStatement - Function to execute with reused prepared statement * @param onProgress - Callback for progress updates * @param config - Optional configuration override * @param skipNodeIds - Optional set of node IDs that already have embeddings (incremental mode) */ export const runEmbeddingPipeline = async ( executeQuery: (cypher: string) => Promise<any[]>, executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, onProgress: EmbeddingProgressCallback, config: Partial<EmbeddingConfig> = {}, skipNodeIds?: Set<string>, ): Promise<void> => { const finalConfig = { ...DEFAULT_EMBEDDING_CONFIG, ...config }; try { // Phase 1: Load embedding model onProgress({ phase: 'loading-model', percent: 0, modelDownloadPercent: 0, }); await initEmbedder((modelProgress: ModelProgress) => { const downloadPercent = modelProgress.progress ?? 0; onProgress({ phase: 'loading-model', percent: Math.round(downloadPercent * 0.2), modelDownloadPercent: downloadPercent, }); }, finalConfig); onProgress({ phase: 'loading-model', percent: 20, modelDownloadPercent: 100, }); if (isDev) { console.log('🔍 Querying embeddable nodes...'); } // Phase 2: Query embeddable nodes let nodes = await queryEmbeddableNodes(executeQuery); // Incremental mode: filter out nodes that already have embeddings if (skipNodeIds && skipNodeIds.size > 0) { const beforeCount = nodes.length; nodes = nodes.filter(n => !skipNodeIds.has(n.id)); if (isDev) { console.log(`📦 Incremental embeddings: ${beforeCount} total, ${skipNodeIds.size} cached, ${nodes.length} to embed`); } } const totalNodes = nodes.length; if (isDev) { console.log(`📊 Found ${totalNodes} embeddable nodes`); } if (totalNodes === 0) { onProgress({ phase: 'ready', percent: 100, nodesProcessed: 0, totalNodes: 0, }); return; } // Phase 3: Batch embed nodes const batchSize = finalConfig.batchSize; const totalBatches = Math.ceil(totalNodes / batchSize); let processedNodes = 0; onProgress({ phase: 'embedding', percent: 20, nodesProcessed: 0, totalNodes, currentBatch: 0, totalBatches, }); for (let batchIndex = 0; batchIndex < totalBatches; batchIndex++) { const start = batchIndex * batchSize; const end = Math.min(start + batchSize, totalNodes); const batch = nodes.slice(start, end); // Generate texts for this batch const texts = generateBatchEmbeddingTexts(batch, finalConfig); // Embed the batch const embeddings = await embedBatch(texts); // Update LadybugDB with embeddings const updates = batch.map((node, i) => ({ id: node.id, embedding: embeddingToArray(embeddings[i]), })); await batchInsertEmbeddings(executeWithReusedStatement, updates); processedNodes += batch.length; // Report progress (20-90% for embedding phase) const embeddingProgress = 20 + ((processedNodes / totalNodes) * 70); onProgress({ phase: 'embedding', percent: Math.round(embeddingProgress), nodesProcessed: processedNodes, totalNodes, currentBatch: batchIndex + 1, totalBatches, }); } // Phase 4: Create vector index onProgress({ phase: 'indexing', percent: 90, nodesProcessed: totalNodes, totalNodes, }); if (isDev) { console.log('📇 Creating vector index...'); } await createVectorIndex(executeQuery); // Complete onProgress({ phase: 'ready', percent: 100, nodesProcessed: totalNodes, totalNodes, }); if (isDev) { console.log('✅ Embedding pipeline complete!'); } } catch (error) { const errorMessage = error instanceof Error ? error.message : 'Unknown error'; if (isDev) { console.error('❌ Embedding pipeline error:', error); } onProgress({ phase: 'error', percent: 0, error: errorMessage, }); throw error; } }; /** * Perform semantic search using the vector index * * Uses CodeEmbedding table and queries each node table to get metadata * * @param executeQuery - Function to execute Cypher queries * @param query - Search query text * @param k - Number of results to return (default: 10) * @param maxDistance - Maximum distance threshold (default: 0.5) * @returns Array of search results ordered by relevance */ export const semanticSearch = async ( executeQuery: (cypher: string) => Promise<any[]>, query: string, k: number = 10, maxDistance: number = 0.5 ): Promise<SemanticSearchResult[]> => { if (!isEmbedderReady()) { throw new Error('Embedding model not initialized. Run embedding pipeline first.'); } // Embed the query const queryEmbedding = await embedText(query); const queryVec = embeddingToArray(queryEmbedding); const queryVecStr = `[${queryVec.join(',')}]`; // Query the vector index on CodeEmbedding to get nodeIds and distances const vectorQuery = ` CALL QUERY_VECTOR_INDEX('CodeEmbedding', 'code_embedding_idx', CAST(${queryVecStr} AS FLOAT[384]), ${k}) YIELD node AS emb, distance WITH emb, distance WHERE distance < ${maxDistance} RETURN emb.nodeId AS nodeId, distance ORDER BY distance `; const embResults = await executeQuery(vectorQuery); if (embResults.length === 0) { return []; } // Group results by label for batched metadata queries const byLabel = new Map<string, Array<{ nodeId: string; distance: number }>>(); for (const embRow of embResults) { const nodeId = embRow.nodeId ?? embRow[0]; const distance = embRow.distance ?? embRow[1]; const labelEndIdx = nodeId.indexOf(':'); const label = labelEndIdx > 0 ? nodeId.substring(0, labelEndIdx) : 'Unknown'; if (!byLabel.has(label)) byLabel.set(label, []); byLabel.get(label)!.push({ nodeId, distance }); } // Batch-fetch metadata per label const results: SemanticSearchResult[] = []; for (const [label, items] of byLabel) { const idList = items.map(i => `'${i.nodeId.replace(/'/g, "''")}'`).join(', '); try { let nodeQuery: string; if (label === 'File') { nodeQuery = ` MATCH (n:File) WHERE n.id IN [${idList}] RETURN n.id AS id, n.name AS name, n.filePath AS filePath `; } else { nodeQuery = ` MATCH (n:${label}) WHERE n.id IN [${idList}] RETURN n.id AS id, n.name AS name, n.filePath AS filePath, n.startLine AS startLine, n.endLine AS endLine `; } const nodeRows = await executeQuery(nodeQuery); const rowMap = new Map<string, any>(); for (const row of nodeRows) { const id = row.id ?? row[0]; rowMap.set(id, row); } for (const item of items) { const nodeRow = rowMap.get(item.nodeId); if (nodeRow) { results.push({ nodeId: item.nodeId, name: nodeRow.name ?? nodeRow[1] ?? '', label, filePath: nodeRow.filePath ?? nodeRow[2] ?? '', distance: item.distance, startLine: label !== 'File' ? (nodeRow.startLine ?? nodeRow[3]) : undefined, endLine: label !== 'File' ? (nodeRow.endLine ?? nodeRow[4]) : undefined, }); } } } catch { // Table might not exist, skip } } // Re-sort by distance since batch queries may have mixed order results.sort((a, b) => a.distance - b.distance); return results; }; /** * Semantic search with graph expansion (flattened results) * * Note: With multi-table schema, graph traversal is simplified. * Returns semantic matches with their metadata. * For full graph traversal, use execute_vector_cypher tool directly. * * @param executeQuery - Function to execute Cypher queries * @param query - Search query text * @param k - Number of initial semantic matches (default: 5) * @param _hops - Unused (kept for API compatibility). * @returns Semantic matches with metadata */ export const semanticSearchWithContext = async ( executeQuery: (cypher: string) => Promise<any[]>, query: string, k: number = 5, _hops: number = 1 ): Promise<any[]> => { // For multi-table schema, just return semantic search results // Graph traversal is complex with separate tables - use execute_vector_cypher instead const results = await semanticSearch(executeQuery, query, k, 0.5); return results.map(r => ({ matchId: r.nodeId, matchName: r.name, matchLabel: r.label, matchPath: r.filePath, distance: r.distance, connectedId: null, connectedName: null, connectedLabel: null, relationType: null, })); }; ================================================ FILE: gitnexus/src/core/embeddings/index.ts ================================================ /** * Embeddings Module * * Re-exports for the embedding pipeline system. */ export * from './types.js'; export * from './embedder.js'; export * from './text-generator.js'; export * from './embedding-pipeline.js'; ================================================ FILE: gitnexus/src/core/embeddings/text-generator.ts ================================================ /** * Text Generator Module * * Pure functions to generate embedding text from code nodes. * Combines node metadata with code snippets for semantic matching. */ import type { EmbeddableNode, EmbeddingConfig } from './types.js'; import { DEFAULT_EMBEDDING_CONFIG } from './types.js'; /** * Extract the filename from a file path */ const getFileName = (filePath: string): string => { const parts = filePath.split('/'); return parts[parts.length - 1] || filePath; }; /** * Extract the directory path from a file path */ const getDirectory = (filePath: string): string => { const parts = filePath.split('/'); parts.pop(); return parts.join('/') || ''; }; /** * Truncate content to max length, preserving word boundaries */ const truncateContent = (content: string, maxLength: number): string => { if (content.length <= maxLength) { return content; } // Find last space before maxLength to avoid cutting words const truncated = content.slice(0, maxLength); const lastSpace = truncated.lastIndexOf(' '); if (lastSpace > maxLength * 0.8) { return truncated.slice(0, lastSpace) + '...'; } return truncated + '...'; }; /** * Clean code content for embedding * Removes excessive whitespace while preserving structure */ const cleanContent = (content: string): string => { return content // Normalize line endings .replace(/\r\n/g, '\n') // Remove excessive blank lines (more than 2) .replace(/\n{3,}/g, '\n\n') // Trim each line .split('\n') .map(line => line.trimEnd()) .join('\n') .trim(); }; /** * Generate embedding text for a Function node */ const generateFunctionText = ( node: EmbeddableNode, maxSnippetLength: number ): string => { const parts: string[] = [ `Function: ${node.name}`, `File: ${getFileName(node.filePath)}`, ]; const dir = getDirectory(node.filePath); if (dir) { parts.push(`Directory: ${dir}`); } if (node.content) { const cleanedContent = cleanContent(node.content); const snippet = truncateContent(cleanedContent, maxSnippetLength); parts.push('', snippet); } return parts.join('\n'); }; /** * Generate embedding text for a Class node */ const generateClassText = ( node: EmbeddableNode, maxSnippetLength: number ): string => { const parts: string[] = [ `Class: ${node.name}`, `File: ${getFileName(node.filePath)}`, ]; const dir = getDirectory(node.filePath); if (dir) { parts.push(`Directory: ${dir}`); } if (node.content) { const cleanedContent = cleanContent(node.content); const snippet = truncateContent(cleanedContent, maxSnippetLength); parts.push('', snippet); } return parts.join('\n'); }; /** * Generate embedding text for a Method node */ const generateMethodText = ( node: EmbeddableNode, maxSnippetLength: number ): string => { const parts: string[] = [ `Method: ${node.name}`, `File: ${getFileName(node.filePath)}`, ]; const dir = getDirectory(node.filePath); if (dir) { parts.push(`Directory: ${dir}`); } if (node.content) { const cleanedContent = cleanContent(node.content); const snippet = truncateContent(cleanedContent, maxSnippetLength); parts.push('', snippet); } return parts.join('\n'); }; /** * Generate embedding text for an Interface node */ const generateInterfaceText = ( node: EmbeddableNode, maxSnippetLength: number ): string => { const parts: string[] = [ `Interface: ${node.name}`, `File: ${getFileName(node.filePath)}`, ]; const dir = getDirectory(node.filePath); if (dir) { parts.push(`Directory: ${dir}`); } if (node.content) { const cleanedContent = cleanContent(node.content); const snippet = truncateContent(cleanedContent, maxSnippetLength); parts.push('', snippet); } return parts.join('\n'); }; /** * Generate embedding text for a File node * Uses file name and first N characters of content */ const generateFileText = ( node: EmbeddableNode, maxSnippetLength: number ): string => { const parts: string[] = [ `File: ${node.name}`, `Path: ${node.filePath}`, ]; if (node.content) { const cleanedContent = cleanContent(node.content); // For files, use a shorter snippet since they can be very long const snippet = truncateContent(cleanedContent, Math.min(maxSnippetLength, 300)); parts.push('', snippet); } return parts.join('\n'); }; /** * Generate embedding text for any embeddable node * Dispatches to the appropriate generator based on node label * * @param node - The node to generate text for * @param config - Optional configuration for max snippet length * @returns Text suitable for embedding */ export const generateEmbeddingText = ( node: EmbeddableNode, config: Partial<EmbeddingConfig> = {} ): string => { const maxSnippetLength = config.maxSnippetLength ?? DEFAULT_EMBEDDING_CONFIG.maxSnippetLength; switch (node.label) { case 'Function': return generateFunctionText(node, maxSnippetLength); case 'Class': return generateClassText(node, maxSnippetLength); case 'Method': return generateMethodText(node, maxSnippetLength); case 'Interface': return generateInterfaceText(node, maxSnippetLength); case 'File': return generateFileText(node, maxSnippetLength); default: // Fallback for any other embeddable type return `${node.label}: ${node.name}\nPath: ${node.filePath}`; } }; /** * Generate embedding texts for a batch of nodes * * @param nodes - Array of nodes to generate text for * @param config - Optional configuration * @returns Array of texts in the same order as input nodes */ export const generateBatchEmbeddingTexts = ( nodes: EmbeddableNode[], config: Partial<EmbeddingConfig> = {} ): string[] => { return nodes.map(node => generateEmbeddingText(node, config)); }; ================================================ FILE: gitnexus/src/core/embeddings/types.ts ================================================ /** * Embedding Pipeline Types * * Type definitions for the embedding generation and semantic search system. */ /** * Node labels that should be embedded for semantic search * These are code elements that benefit from semantic matching */ export const EMBEDDABLE_LABELS = [ 'Function', 'Class', 'Method', 'Interface', 'File', ] as const; export type EmbeddableLabel = typeof EMBEDDABLE_LABELS[number]; /** * Check if a label should be embedded */ export const isEmbeddableLabel = (label: string): label is EmbeddableLabel => EMBEDDABLE_LABELS.includes(label as EmbeddableLabel); /** * Embedding pipeline phases */ export type EmbeddingPhase = | 'idle' | 'loading-model' | 'embedding' | 'indexing' | 'ready' | 'error'; /** * Progress information for the embedding pipeline */ export interface EmbeddingProgress { phase: EmbeddingPhase; percent: number; modelDownloadPercent?: number; nodesProcessed?: number; totalNodes?: number; currentBatch?: number; totalBatches?: number; error?: string; } /** * Configuration for the embedding pipeline */ export interface EmbeddingConfig { /** Model identifier for transformers.js */ modelId: string; /** Number of nodes to embed in each batch */ batchSize: number; /** Embedding vector dimensions */ dimensions: number; /** Device to use for inference: 'auto' tries GPU first (DirectML on Windows, CUDA on Linux), falls back to CPU */ device: 'auto' | 'dml' | 'cuda' | 'cpu' | 'wasm'; /** Maximum characters of code snippet to include */ maxSnippetLength: number; } /** * Default embedding configuration * Uses snowflake-arctic-embed-xs for browser efficiency * Tries WebGPU first (fast), user can choose WASM fallback if unavailable */ export const DEFAULT_EMBEDDING_CONFIG: EmbeddingConfig = { modelId: 'Snowflake/snowflake-arctic-embed-xs', batchSize: 16, dimensions: 384, device: 'auto', maxSnippetLength: 500, }; /** * Result from semantic search */ export interface SemanticSearchResult { nodeId: string; name: string; label: string; filePath: string; distance: number; startLine?: number; endLine?: number; } /** * Node data for embedding (minimal structure from LadybugDB query) */ export interface EmbeddableNode { id: string; name: string; label: string; filePath: string; content: string; startLine?: number; endLine?: number; } /** * Model download progress from transformers.js */ export interface ModelProgress { status: 'initiate' | 'download' | 'progress' | 'done' | 'ready'; file?: string; progress?: number; loaded?: number; total?: number; } ================================================ FILE: gitnexus/src/core/graph/graph.ts ================================================ import { GraphNode, GraphRelationship, KnowledgeGraph } from './types.js' export const createKnowledgeGraph = (): KnowledgeGraph => { const nodeMap = new Map<string, GraphNode>(); const relationshipMap = new Map<string, GraphRelationship>(); const addNode = (node: GraphNode) => { if(!nodeMap.has(node.id)) { nodeMap.set(node.id, node); } }; const addRelationship = (relationship: GraphRelationship) => { if (!relationshipMap.has(relationship.id)) { relationshipMap.set(relationship.id, relationship); } }; /** * Remove a single node and all relationships involving it */ const removeNode = (nodeId: string): boolean => { if (!nodeMap.has(nodeId)) return false; nodeMap.delete(nodeId); // Remove all relationships involving this node for (const [relId, rel] of relationshipMap) { if (rel.sourceId === nodeId || rel.targetId === nodeId) { relationshipMap.delete(relId); } } return true; }; /** * Remove all nodes (and their relationships) belonging to a file */ const removeNodesByFile = (filePath: string): number => { let removed = 0; for (const [nodeId, node] of nodeMap) { if (node.properties?.filePath === filePath) { removeNode(nodeId); removed++; } } return removed; }; return{ get nodes(){ return Array.from(nodeMap.values()) }, get relationships(){ return Array.from(relationshipMap.values()) }, iterNodes: () => nodeMap.values(), iterRelationships: () => relationshipMap.values(), forEachNode(fn: (node: GraphNode) => void) { nodeMap.forEach(fn); }, forEachRelationship(fn: (rel: GraphRelationship) => void) { relationshipMap.forEach(fn); }, getNode: (id: string) => nodeMap.get(id), // O(1) count getters - avoid creating arrays just for length get nodeCount() { return nodeMap.size; }, get relationshipCount() { return relationshipMap.size; }, addNode, addRelationship, removeNode, removeNodesByFile, }; }; ================================================ FILE: gitnexus/src/core/graph/types.ts ================================================ export type NodeLabel = | 'Project' | 'Package' | 'Module' | 'Folder' | 'File' | 'Class' | 'Function' | 'Method' | 'Variable' | 'Interface' | 'Enum' | 'Decorator' | 'Import' | 'Type' | 'CodeElement' | 'Community' | 'Process' // Multi-language node types | 'Struct' | 'Macro' | 'Typedef' | 'Union' | 'Namespace' | 'Trait' | 'Impl' | 'TypeAlias' | 'Const' | 'Static' | 'Property' | 'Record' | 'Delegate' | 'Annotation' | 'Constructor' | 'Template'; import { SupportedLanguages } from '../../config/supported-languages.js'; export type NodeProperties = { name: string, filePath: string, startLine?: number, endLine?: number, language?: SupportedLanguages, isExported?: boolean, // Optional AST-derived framework hint (e.g. @Controller, @GetMapping) astFrameworkMultiplier?: number, astFrameworkReason?: string, // Community-specific properties heuristicLabel?: string, cohesion?: number, symbolCount?: number, keywords?: string[], description?: string, enrichedBy?: 'heuristic' | 'llm', // Process-specific properties processType?: 'intra_community' | 'cross_community', stepCount?: number, communities?: string[], entryPointId?: string, terminalId?: string, // Entry point scoring (computed by process detection) entryPointScore?: number, entryPointReason?: string, // Method signature (for MRO disambiguation) parameterCount?: number, returnType?: string, } export type RelationshipType = | 'CONTAINS' | 'CALLS' | 'INHERITS' | 'OVERRIDES' | 'IMPORTS' | 'USES' | 'DEFINES' | 'DECORATES' | 'IMPLEMENTS' | 'EXTENDS' | 'HAS_METHOD' | 'HAS_PROPERTY' | 'ACCESSES' | 'MEMBER_OF' | 'STEP_IN_PROCESS' export interface GraphNode { id: string, label: NodeLabel, properties: NodeProperties, } export interface GraphRelationship { id: string, sourceId: string, targetId: string, type: RelationshipType, /** Confidence score 0-1 (1.0 = certain, lower = uncertain resolution) */ confidence: number, /** Semantics are edge-type-dependent: CALLS uses resolution tier, ACCESSES uses 'read'/'write', OVERRIDES uses MRO reason */ reason: string, /** Step number for STEP_IN_PROCESS relationships (1-indexed) */ step?: number, } export interface KnowledgeGraph { /** Returns a full array copy — prefer iterNodes() for iteration */ nodes: GraphNode[], /** Returns a full array copy — prefer iterRelationships() for iteration */ relationships: GraphRelationship[], /** Zero-copy iterator over nodes */ iterNodes: () => IterableIterator<GraphNode>, /** Zero-copy iterator over relationships */ iterRelationships: () => IterableIterator<GraphRelationship>, /** Zero-copy forEach — avoids iterator protocol overhead in hot loops */ forEachNode: (fn: (node: GraphNode) => void) => void, forEachRelationship: (fn: (rel: GraphRelationship) => void) => void, /** Lookup a single node by id — O(1) */ getNode: (id: string) => GraphNode | undefined, nodeCount: number, relationshipCount: number, addNode: (node: GraphNode) => void, addRelationship: (relationship: GraphRelationship) => void, removeNode: (nodeId: string) => boolean, removeNodesByFile: (filePath: string) => number, } ================================================ FILE: gitnexus/src/core/ingestion/ast-cache.ts ================================================ import { LRUCache } from 'lru-cache'; import Parser from 'tree-sitter'; // Define the interface for the Cache export interface ASTCache { get: (filePath: string) => Parser.Tree | undefined; set: (filePath: string, tree: Parser.Tree) => void; clear: () => void; stats: () => { size: number; maxSize: number }; } export const createASTCache = (maxSize: number = 50): ASTCache => { const effectiveMax = Math.max(maxSize, 1); // Initialize the cache with a 'dispose' handler // This is the magic: When an item is evicted (dropped), this runs automatically. const cache = new LRUCache<string, Parser.Tree>({ max: effectiveMax, dispose: (tree) => { try { // NOTE: web-tree-sitter has tree.delete(); native tree-sitter trees are GC-managed. // Keep this try/catch so we don't crash on either runtime. (tree as any).delete?.(); } catch (e) { console.warn('Failed to delete tree from WASM memory', e); } } }); return { get: (filePath: string) => { const tree = cache.get(filePath); return tree; // Returns undefined if not found }, set: (filePath: string, tree: Parser.Tree) => { cache.set(filePath, tree); }, clear: () => { cache.clear(); }, stats: () => ({ size: cache.size, maxSize: effectiveMax }) }; }; ================================================ FILE: gitnexus/src/core/ingestion/call-processor.ts ================================================ import { KnowledgeGraph } from '../graph/types.js'; import { ASTCache } from './ast-cache.js'; import type { SymbolDefinition } from './symbol-table.js'; import Parser from 'tree-sitter'; import type { ResolutionContext } from './resolution-context.js'; import { TIER_CONFIDENCE, type ResolutionTier } from './resolution-context.js'; import { isLanguageAvailable, loadParser, loadLanguage } from '../tree-sitter/parser-loader.js'; import { LANGUAGE_QUERIES } from './tree-sitter-queries.js'; import { generateId } from '../../lib/utils.js'; import { getLanguageFromFilename, isVerboseIngestionEnabled, yieldToEventLoop, FUNCTION_NODE_TYPES, extractFunctionName, isBuiltInOrNoise, countCallArguments, inferCallForm, extractReceiverName, extractReceiverNode, findEnclosingClassId, CALL_EXPRESSION_TYPES, extractMixedChain, type MixedChainStep, } from './utils.js'; import { buildTypeEnv, isSubclassOf } from './type-env.js'; import type { ConstructorBinding } from './type-env.js'; import { getTreeSitterBufferSize } from './constants.js'; import type { ExtractedCall, ExtractedAssignment, ExtractedHeritage, ExtractedRoute, FileConstructorBindings } from './workers/parse-worker.js'; import { callRouters } from './call-routing.js'; import { extractReturnTypeName, stripNullable } from './type-extractors/shared.js'; import { typeConfigs } from './type-extractors/index.js'; import type { LiteralTypeInferrer } from './type-extractors/types.js'; import type { SyntaxNode } from './utils.js'; // Stdlib methods that preserve the receiver's type identity. When TypeEnv already // strips nullable wrappers (Option<User> → User), these chain steps are no-ops // for type resolution — the current type passes through unchanged. const TYPE_PRESERVING_METHODS = new Set([ 'unwrap', 'expect', 'unwrap_or', 'unwrap_or_default', 'unwrap_or_else', // Rust Option/Result 'clone', 'to_owned', 'as_ref', 'as_mut', 'borrow', 'borrow_mut', // Rust clone/borrow 'get', // Kotlin/Java Optional.get() 'orElseThrow', // Java Optional ]); /** * Walk up the AST from a node to find the enclosing function/method. * Returns null if the call is at module/file level (top-level code). */ const findEnclosingFunction = ( node: any, filePath: string, ctx: ResolutionContext ): string | null => { let current = node.parent; while (current) { if (FUNCTION_NODE_TYPES.has(current.type)) { const { funcName, label } = extractFunctionName(current); if (funcName) { const resolved = ctx.resolve(funcName, filePath); if (resolved?.tier === 'same-file' && resolved.candidates.length > 0) { return resolved.candidates[0].nodeId; } return generateId(label, `${filePath}:${funcName}`); } } current = current.parent; } return null; }; /** * Verify constructor bindings against SymbolTable and infer receiver types. * Shared between sequential (processCalls) and worker (processCallsFromExtracted) paths. */ const verifyConstructorBindings = ( bindings: readonly ConstructorBinding[], filePath: string, ctx: ResolutionContext, graph?: KnowledgeGraph, ): Map<string, string> => { const verified = new Map<string, string>(); for (const { scope, varName, calleeName, receiverClassName } of bindings) { const tiered = ctx.resolve(calleeName, filePath); const isClass = tiered?.candidates.some(def => def.type === 'Class') ?? false; if (isClass) { verified.set(receiverKey(scope, varName), calleeName); } else { let callableDefs = tiered?.candidates.filter(d => d.type === 'Function' || d.type === 'Method' ); // When receiver class is known (e.g. $this->method() in PHP), narrow // candidates to methods owned by that class to avoid false disambiguation failures. if (callableDefs && callableDefs.length > 1 && receiverClassName) { if (graph) { // Worker path: use graph.getNode (fast, already in-memory) const narrowed = callableDefs.filter(d => { if (!d.ownerId) return false; const owner = graph.getNode(d.ownerId); return owner?.properties.name === receiverClassName; }); if (narrowed.length > 0) callableDefs = narrowed; } else { // Sequential path: use ctx.resolve (no graph available) const classResolved = ctx.resolve(receiverClassName, filePath); if (classResolved && classResolved.candidates.length > 0) { const classNodeIds = new Set(classResolved.candidates.map(c => c.nodeId)); const narrowed = callableDefs.filter(d => d.ownerId && classNodeIds.has(d.ownerId) ); if (narrowed.length > 0) callableDefs = narrowed; } } } if (callableDefs && callableDefs.length === 1 && callableDefs[0].returnType) { const typeName = extractReturnTypeName(callableDefs[0].returnType); if (typeName) { verified.set(receiverKey(scope, varName), typeName); } } } } return verified; }; export const processCalls = async ( graph: KnowledgeGraph, files: { path: string; content: string }[], astCache: ASTCache, ctx: ResolutionContext, onProgress?: (current: number, total: number) => void, ): Promise<ExtractedHeritage[]> => { const parser = await loadParser(); const collectedHeritage: ExtractedHeritage[] = []; const pendingWrites: { receiverTypeName: string; propertyName: string; filePath: string; srcId: string }[] = []; // Phase P cross-file: accumulate heritage across files for cross-file isSubclassOf. // Used as a secondary check when per-file parentMap lacks the relationship — helps // when the heritage-declaring file is processed before the call site file. // For remaining cases (reverse file order), the SymbolTable class-type fallback applies. const globalParentMap = new Map<string, string[]>(); const globalParentSeen = new Map<string, Set<string>>(); const logSkipped = isVerboseIngestionEnabled(); const skippedByLang = logSkipped ? new Map<string, number>() : null; for (let i = 0; i < files.length; i++) { const file = files[i]; onProgress?.(i + 1, files.length); if (i % 20 === 0) await yieldToEventLoop(); const language = getLanguageFromFilename(file.path); if (!language) continue; if (!isLanguageAvailable(language)) { if (skippedByLang) { skippedByLang.set(language, (skippedByLang.get(language) ?? 0) + 1); } continue; } const queryStr = LANGUAGE_QUERIES[language]; if (!queryStr) continue; await loadLanguage(language, file.path); let tree = astCache.get(file.path); if (!tree) { try { tree = parser.parse(file.content, undefined, { bufferSize: getTreeSitterBufferSize(file.content.length) }); } catch (parseError) { continue; } astCache.set(file.path, tree); } let query; let matches; try { const language = parser.getLanguage(); query = new Parser.Query(language, queryStr); matches = query.matches(tree.rootNode); } catch (queryError) { console.warn(`Query error for ${file.path}:`, queryError); continue; } const lang = getLanguageFromFilename(file.path); // Pre-pass: extract heritage from query matches to build parentMap for buildTypeEnv. // Heritage-processor runs in PARALLEL, so graph edges don't exist when buildTypeEnv runs. const fileParentMap = new Map<string, string[]>(); for (const match of matches) { const captureMap: Record<string, any> = {}; match.captures.forEach(c => captureMap[c.name] = c.node); if (captureMap['heritage.class'] && captureMap['heritage.extends']) { const className: string = captureMap['heritage.class'].text; const parentName: string = captureMap['heritage.extends'].text; const extendsNode = captureMap['heritage.extends']; const fieldDecl = extendsNode.parent; if (fieldDecl?.type === 'field_declaration' && fieldDecl.childForFieldName('name')) continue; let parents = fileParentMap.get(className); if (!parents) { parents = []; fileParentMap.set(className, parents); } if (!parents.includes(parentName)) parents.push(parentName); } } const parentMap: ReadonlyMap<string, readonly string[]> = fileParentMap; // Merge per-file heritage into globalParentMap for cross-file isSubclassOf lookups. // Uses a parallel Set (globalParentSeen) for O(1) deduplication instead of O(n) includes(). for (const [cls, parents] of fileParentMap) { let global = globalParentMap.get(cls); let seen = globalParentSeen.get(cls); if (!global) { global = []; globalParentMap.set(cls, global); } if (!seen) { seen = new Set(); globalParentSeen.set(cls, seen); } for (const p of parents) { if (!seen.has(p)) { seen.add(p); global.push(p); } } } const typeEnv = lang ? buildTypeEnv(tree, lang, { symbolTable: ctx.symbols, parentMap }) : null; const callRouter = callRouters[language]; const verifiedReceivers = typeEnv && typeEnv.constructorBindings.length > 0 ? verifyConstructorBindings(typeEnv.constructorBindings, file.path, ctx) : new Map<string, string>(); const receiverIndex = buildReceiverTypeIndex(verifiedReceivers); ctx.enableCache(file.path); matches.forEach(match => { const captureMap: Record<string, any> = {}; match.captures.forEach(c => captureMap[c.name] = c.node); // ── Write access: emit ACCESSES {reason: 'write'} for assignments to member fields ── if (captureMap['assignment'] && captureMap['assignment.receiver'] && captureMap['assignment.property']) { const receiverNode = captureMap['assignment.receiver']; const propertyName: string = captureMap['assignment.property'].text; // Resolve receiver type: simple identifier → TypeEnv lookup or class resolution let receiverTypeName: string | undefined; const receiverText = receiverNode.text; if (receiverText && typeEnv) { receiverTypeName = typeEnv.lookup(receiverText, captureMap['assignment']); } // Fall back to verified constructor bindings (mirrors CALLS resolution tier 2) if (!receiverTypeName && receiverText && receiverIndex.size > 0) { const enclosing = findEnclosingFunction(captureMap['assignment'], file.path, ctx); const funcName = enclosing ? extractFuncNameFromSourceId(enclosing) : ''; receiverTypeName = lookupReceiverType(receiverIndex, funcName, receiverText); } if (!receiverTypeName && receiverText) { const resolved = ctx.resolve(receiverText, file.path); if (resolved?.candidates.some(d => d.type === 'Class' || d.type === 'Struct' || d.type === 'Interface' || d.type === 'Enum' || d.type === 'Record' || d.type === 'Impl', )) { receiverTypeName = receiverText; } } if (receiverTypeName) { const enclosing = findEnclosingFunction(captureMap['assignment'], file.path, ctx); const srcId = enclosing || generateId('File', file.path); // Defer resolution: Ruby attr_accessor properties are registered during // this same loop, so cross-file lookups fail if the declaring file hasn't // been processed yet. Collect now, resolve after all files are done. pendingWrites.push({ receiverTypeName, propertyName, filePath: file.path, srcId }); } // Assignment-only capture (no @call sibling): skip the rest of this // forEach iteration — this acts as a `continue` in the match loop. if (!captureMap['call']) return; } if (!captureMap['call']) return; const nameNode = captureMap['call.name']; if (!nameNode) return; const calledName = nameNode.text; const routed = callRouter(calledName, captureMap['call']); if (routed) { switch (routed.kind) { case 'skip': case 'import': return; case 'heritage': for (const item of routed.items) { collectedHeritage.push({ filePath: file.path, className: item.enclosingClass, parentName: item.mixinName, kind: item.heritageKind, }); } return; case 'properties': { const fileId = generateId('File', file.path); const propEnclosingClassId = findEnclosingClassId(captureMap['call'], file.path); for (const item of routed.items) { const nodeId = generateId('Property', `${file.path}:${item.propName}`); graph.addNode({ id: nodeId, label: 'Property', properties: { name: item.propName, filePath: file.path, startLine: item.startLine, endLine: item.endLine, language, isExported: true, description: item.accessorType, }, }); ctx.symbols.add(file.path, item.propName, nodeId, 'Property', { ...(propEnclosingClassId ? { ownerId: propEnclosingClassId } : {}), ...(item.declaredType ? { declaredType: item.declaredType } : {}), }); const relId = generateId('DEFINES', `${fileId}->${nodeId}`); graph.addRelationship({ id: relId, sourceId: fileId, targetId: nodeId, type: 'DEFINES', confidence: 1.0, reason: '', }); if (propEnclosingClassId) { graph.addRelationship({ id: generateId('HAS_PROPERTY', `${propEnclosingClassId}->${nodeId}`), sourceId: propEnclosingClassId, targetId: nodeId, type: 'HAS_PROPERTY', confidence: 1.0, reason: '', }); } } return; } case 'call': break; } } if (isBuiltInOrNoise(calledName)) return; const callNode = captureMap['call']; const callForm = inferCallForm(callNode, nameNode); const receiverName = callForm === 'member' ? extractReceiverName(nameNode) : undefined; let receiverTypeName = receiverName && typeEnv ? typeEnv.lookup(receiverName, callNode) : undefined; // Phase P: virtual dispatch override — when the declared type is a base class but // the constructor created a known subclass, prefer the more specific type. // Checks per-file parentMap first, then falls back to globalParentMap for // cross-file heritage (e.g. Dog extends Animal declared in a different file). // Reconstructs the exact scope key (funcName@startIndex\0varName) from the // enclosing function AST node for a correct, O(1) map lookup. if (receiverTypeName && receiverName && typeEnv && typeEnv.constructorTypeMap.size > 0) { // Reconstruct scope key to match constructorTypeMap's scope\0varName format let scope = ''; let p = callNode.parent; while (p) { if (FUNCTION_NODE_TYPES.has(p.type)) { const { funcName } = extractFunctionName(p); if (funcName) { scope = `${funcName}@${p.startIndex}`; break; } } p = p.parent; } const ctorType = typeEnv.constructorTypeMap.get(`${scope}\0${receiverName}`); if (ctorType && ctorType !== receiverTypeName) { // Verify subclass relationship: per-file parentMap first, then cross-file // globalParentMap, then fall back to SymbolTable class verification. // The SymbolTable fallback handles cross-file cases where heritage is declared // in a file not yet processed (e.g. Dog extends Animal in models/Dog.kt when // processing services/App.kt). Since constructorTypeMap only records entries // when a type annotation AND constructor are both present (val x: Base = Sub()), // confirming both are class-like types is sufficient — the original code would // not compile if Sub didn't extend Base. if (isSubclassOf(ctorType, receiverTypeName, parentMap) || isSubclassOf(ctorType, receiverTypeName, globalParentMap) || (ctx.symbols.lookupFuzzy(ctorType).some(d => d.type === 'Class' || d.type === 'Struct') && ctx.symbols.lookupFuzzy(receiverTypeName).some(d => d.type === 'Class' || d.type === 'Struct' || d.type === 'Interface'))) { receiverTypeName = ctorType; } } } // Fall back to verified constructor bindings for return type inference if (!receiverTypeName && receiverName && receiverIndex.size > 0) { const enclosingFunc = findEnclosingFunction(callNode, file.path, ctx); const funcName = enclosingFunc ? extractFuncNameFromSourceId(enclosingFunc) : ''; receiverTypeName = lookupReceiverType(receiverIndex, funcName, receiverName); } // Fall back to class-as-receiver for static method calls (e.g. UserService.find_user()). // When the receiver name is not a variable in TypeEnv but resolves to a Class/Struct/Interface // through the standard tiered resolution, use it directly as the receiver type. if (!receiverTypeName && receiverName && callForm === 'member') { const typeResolved = ctx.resolve(receiverName, file.path); if (typeResolved && typeResolved.candidates.some( d => d.type === 'Class' || d.type === 'Interface' || d.type === 'Struct' || d.type === 'Enum', )) { receiverTypeName = receiverName; } } // Hoist sourceId so it's available for ACCESSES edge emission during chain walk. const enclosingFuncId = findEnclosingFunction(callNode, file.path, ctx); const sourceId = enclosingFuncId || generateId('File', file.path); // Fall back to mixed chain resolution when the receiver is a complex expression // (field chain, call chain, or interleaved — e.g. user.address.city.save() or // svc.getUser().address.save()). Handles all cases with a single unified walk. if (callForm === 'member' && !receiverTypeName && !receiverName) { const receiverNode = extractReceiverNode(nameNode); if (receiverNode) { const extracted = extractMixedChain(receiverNode); if (extracted && extracted.chain.length > 0) { let currentType = extracted.baseReceiverName && typeEnv ? typeEnv.lookup(extracted.baseReceiverName, callNode) : undefined; if (!currentType && extracted.baseReceiverName && receiverIndex.size > 0) { const funcName = enclosingFuncId ? extractFuncNameFromSourceId(enclosingFuncId) : ''; currentType = lookupReceiverType(receiverIndex, funcName, extracted.baseReceiverName); } if (!currentType && extracted.baseReceiverName) { const cr = ctx.resolve(extracted.baseReceiverName, file.path); if (cr?.candidates.some(d => d.type === 'Class' || d.type === 'Interface' || d.type === 'Struct' || d.type === 'Enum', )) { currentType = extracted.baseReceiverName; } } if (currentType) { receiverTypeName = walkMixedChain( extracted.chain, currentType, file.path, ctx, makeAccessEmitter(graph, sourceId), ); } } } } // Build overload hints for languages with inferLiteralType (Java/Kotlin/C#/C++). // Only used when multiple candidates survive arity filtering — ~1-3% of calls. const langConfig = lang ? typeConfigs[lang as keyof typeof typeConfigs] : undefined; const hints: OverloadHints | undefined = langConfig?.inferLiteralType ? { callNode, inferLiteralType: langConfig.inferLiteralType } : undefined; const resolved = resolveCallTarget({ calledName, argCount: countCallArguments(callNode), callForm, receiverTypeName, }, file.path, ctx, hints); if (!resolved) return; const relId = generateId('CALLS', `${sourceId}:${calledName}->${resolved.nodeId}`); graph.addRelationship({ id: relId, sourceId, targetId: resolved.nodeId, type: 'CALLS', confidence: resolved.confidence, reason: resolved.reason, }); }); ctx.clearCache(); } // ── Resolve deferred write-access edges ── // All properties (including Ruby attr_accessor) are now registered. for (const pw of pendingWrites) { const fieldOwner = resolveFieldOwnership(pw.receiverTypeName, pw.propertyName, pw.filePath, ctx); if (fieldOwner) { graph.addRelationship({ id: generateId('ACCESSES', `${pw.srcId}:${fieldOwner.nodeId}:write`), sourceId: pw.srcId, targetId: fieldOwner.nodeId, type: 'ACCESSES', confidence: 1.0, reason: 'write', }); } } if (skippedByLang && skippedByLang.size > 0) { for (const [lang, count] of skippedByLang.entries()) { console.warn( `[ingestion] Skipped ${count} ${lang} file(s) in call processing — ${lang} parser not available.` ); } } return collectedHeritage; }; /** * Resolution result with confidence scoring */ interface ResolveResult { nodeId: string; confidence: number; reason: string; returnType?: string; } const CALLABLE_SYMBOL_TYPES = new Set([ 'Function', 'Method', 'Constructor', 'Macro', 'Delegate', ]); const CONSTRUCTOR_TARGET_TYPES = new Set(['Constructor', 'Class', 'Struct', 'Record']); const filterCallableCandidates = ( candidates: readonly SymbolDefinition[], argCount?: number, callForm?: 'free' | 'member' | 'constructor', ): SymbolDefinition[] => { let kindFiltered: SymbolDefinition[]; if (callForm === 'constructor') { const constructors = candidates.filter(c => c.type === 'Constructor'); if (constructors.length > 0) { kindFiltered = constructors; } else { const types = candidates.filter(c => CONSTRUCTOR_TARGET_TYPES.has(c.type)); kindFiltered = types.length > 0 ? types : candidates.filter(c => CALLABLE_SYMBOL_TYPES.has(c.type)); } } else { kindFiltered = candidates.filter(c => CALLABLE_SYMBOL_TYPES.has(c.type)); } if (kindFiltered.length === 0) return []; if (argCount === undefined) return kindFiltered; const hasParameterMetadata = kindFiltered.some(candidate => candidate.parameterCount !== undefined); if (!hasParameterMetadata) return kindFiltered; return kindFiltered.filter(candidate => candidate.parameterCount === undefined || (argCount >= (candidate.requiredParameterCount ?? candidate.parameterCount) && argCount <= candidate.parameterCount) ); }; const toResolveResult = ( definition: SymbolDefinition, tier: ResolutionTier, ): ResolveResult => ({ nodeId: definition.nodeId, confidence: TIER_CONFIDENCE[tier], reason: tier === 'same-file' ? 'same-file' : tier === 'import-scoped' ? 'import-resolved' : 'global', returnType: definition.returnType, }); /** Optional hints for overload disambiguation via argument literal types. * Only available on the sequential path (has AST); worker path passes undefined. */ interface OverloadHints { callNode: SyntaxNode; inferLiteralType: LiteralTypeInferrer; } /** * Kotlin (and JVM in general) uses boxed type names in parameter declarations * (e.g. `Int`, `Long`, `Boolean`) while inferJvmLiteralType returns unboxed * primitives (`int`, `long`, `boolean`). Normalise both sides to lowercase so * that the comparison `'Int' === 'int'` does not fail. * * Only applied to single-word identifiers that look like a JVM primitive alias; * multi-word or qualified names are left untouched. */ const KOTLIN_BOXED_TO_PRIMITIVE: Readonly<Record<string, string>> = { Int: 'int', Long: 'long', Short: 'short', Byte: 'byte', Float: 'float', Double: 'double', Boolean: 'boolean', Char: 'char', }; const normalizeJvmTypeName = (name: string): string => KOTLIN_BOXED_TO_PRIMITIVE[name] ?? name; /** * Try to disambiguate overloaded candidates using argument literal types. * Only invoked when filteredCandidates.length > 1 and at least one has parameterTypes. * Returns the single matching candidate, or null if ambiguous/inconclusive. */ const tryOverloadDisambiguation = ( candidates: SymbolDefinition[], hints: OverloadHints, ): SymbolDefinition | null => { if (!candidates.some(c => c.parameterTypes)) return null; // Find the argument list node in the call expression. // Kotlin wraps value_arguments inside a call_suffix child, so we must also // search one level deeper when a direct match is not found. let argList: any = hints.callNode.childForFieldName?.('arguments') ?? hints.callNode.children.find((c: any) => c.type === 'arguments' || c.type === 'argument_list' || c.type === 'value_arguments' ); if (!argList) { // Kotlin: call_expression → call_suffix → value_arguments const callSuffix = hints.callNode.children.find((c: any) => c.type === 'call_suffix'); if (callSuffix) { argList = callSuffix.children.find((c: any) => c.type === 'value_arguments'); } } if (!argList) return null; const argTypes: (string | undefined)[] = []; for (const arg of argList.namedChildren) { if (arg.type === 'comment') continue; // Unwrap argument wrapper nodes before passing to inferLiteralType: // - Kotlin value_argument: has 'value' field containing the literal // - C# argument: has 'expression' field (handles named args like `name: "alice"` // where firstNamedChild would return name_colon instead of the value) // - Java/others: arg IS the literal directly (no unwrapping needed) const valueNode = arg.childForFieldName?.('value') ?? arg.childForFieldName?.('expression') ?? (arg.type === 'argument' || arg.type === 'value_argument' ? arg.firstNamedChild ?? arg : arg); argTypes.push(hints.inferLiteralType(valueNode)); } // If no literal types could be inferred, can't disambiguate if (argTypes.every(t => t === undefined)) return null; const matched = candidates.filter(c => { // Keep candidates without type info — conservative: partially-annotated codebases // (e.g. C++ with some missing declarations) may have mixed typed/untyped overloads. // If one typed and one untyped both survive, matched.length > 1 → returns null (no edge). if (!c.parameterTypes) return true; return c.parameterTypes.every((pType, i) => { if (i >= argTypes.length || !argTypes[i]) return true; // Normalise Kotlin boxed type names (Int→int, Boolean→boolean, etc.) so // that the stored declaration type matches the inferred literal type. return normalizeJvmTypeName(pType) === argTypes[i]; }); }); if (matched.length === 1) return matched[0]; // Multiple survivors may share the same nodeId (e.g. TypeScript overload signatures + // implementation body all collide via generateId). Deduplicate by nodeId — if all // matched candidates resolve to the same graph node, disambiguation succeeded. if (matched.length > 1) { const uniqueIds = new Set(matched.map(c => c.nodeId)); if (uniqueIds.size === 1) return matched[0]; } return null; }; /** * Resolve a function call to its target node ID using priority strategy: * A. Narrow candidates by scope tier via ctx.resolve() * B. Filter to callable symbol kinds (constructor-aware when callForm is set) * C. Apply arity filtering when parameter metadata is available * D. Apply receiver-type filtering for member calls with typed receivers * E. Apply overload disambiguation via argument literal types (when available) * * If filtering still leaves multiple candidates, refuse to emit a CALLS edge. */ const resolveCallTarget = ( call: Pick<ExtractedCall, 'calledName' | 'argCount' | 'callForm' | 'receiverTypeName'>, currentFile: string, ctx: ResolutionContext, overloadHints?: OverloadHints, ): ResolveResult | null => { const tiered = ctx.resolve(call.calledName, currentFile); if (!tiered) return null; const filteredCandidates = filterCallableCandidates(tiered.candidates, call.argCount, call.callForm); // D. Receiver-type filtering: for member calls with a known receiver type, // resolve the type through the same tiered import infrastructure, then // filter method candidates to the type's defining file. Fall back to // fuzzy ownerId matching only when file-based narrowing is inconclusive. // // Applied regardless of candidate count — the sole same-file candidate may // belong to the wrong class (e.g. super.save() should hit the parent's save, // not the child's own save method in the same file). if (call.callForm === 'member' && call.receiverTypeName) { // D1. Resolve the receiver type const typeResolved = ctx.resolve(call.receiverTypeName, currentFile); if (typeResolved && typeResolved.candidates.length > 0) { const typeNodeIds = new Set(typeResolved.candidates.map(d => d.nodeId)); const typeFiles = new Set(typeResolved.candidates.map(d => d.filePath)); // D2. Widen candidates: same-file tier may miss the parent's method when // it lives in another file. Query the symbol table directly for all // global methods with this name, then apply arity/kind filtering. const methodPool = filteredCandidates.length <= 1 ? filterCallableCandidates(ctx.symbols.lookupFuzzy(call.calledName), call.argCount, call.callForm) : filteredCandidates; // D3. File-based: prefer candidates whose filePath matches the resolved type's file const fileFiltered = methodPool.filter(c => typeFiles.has(c.filePath)); if (fileFiltered.length === 1) { return toResolveResult(fileFiltered[0], tiered.tier); } // D4. ownerId fallback: narrow by ownerId matching the type's nodeId const pool = fileFiltered.length > 0 ? fileFiltered : methodPool; const ownerFiltered = pool.filter(c => c.ownerId && typeNodeIds.has(c.ownerId)); if (ownerFiltered.length === 1) { return toResolveResult(ownerFiltered[0], tiered.tier); } // E. Try overload disambiguation on the narrowed pool if ((fileFiltered.length > 1 || ownerFiltered.length > 1) && overloadHints) { const overloadPool = ownerFiltered.length > 1 ? ownerFiltered : fileFiltered; const disambiguated = tryOverloadDisambiguation(overloadPool, overloadHints); if (disambiguated) return toResolveResult(disambiguated, tiered.tier); } if (fileFiltered.length > 1 || ownerFiltered.length > 1) return null; } } // E. Overload disambiguation: when multiple candidates survive arity + receiver filtering, // try matching argument literal types against parameter types (Phase P). // Only available on sequential path (has AST); worker path falls through gracefully. if (filteredCandidates.length > 1 && overloadHints) { const disambiguated = tryOverloadDisambiguation(filteredCandidates, overloadHints); if (disambiguated) return toResolveResult(disambiguated, tiered.tier); } if (filteredCandidates.length !== 1) return null; return toResolveResult(filteredCandidates[0], tiered.tier); }; // ── Scope key helpers ──────────────────────────────────────────────────── // Scope keys use the format "funcName@startIndex" (produced by type-env.ts). // Source IDs use "Label:filepath:funcName" (produced by parse-worker.ts). // NUL (\0) is used as a composite-key separator because it cannot appear // in source-code identifiers, preventing ambiguous concatenation. // // receiverKey stores the FULL scope (funcName@startIndex) to prevent // collisions between overloaded methods with the same name in different // classes (e.g. User.save@100 and Repo.save@200 are distinct keys). // Lookup uses a secondary funcName-only index built in lookupReceiverType. /** Extract the function name from a scope key ("funcName@startIndex" → "funcName"). */ const extractFuncNameFromScope = (scope: string): string => scope.slice(0, scope.indexOf('@')); /** Extract the trailing function name from a sourceId ("Function:filepath:funcName" → "funcName"). */ const extractFuncNameFromSourceId = (sourceId: string): string => { const lastColon = sourceId.lastIndexOf(':'); return lastColon >= 0 ? sourceId.slice(lastColon + 1) : ''; }; /** * Build a composite key for receiver type storage. * Uses the full scope string (e.g. "save@100") to distinguish overloaded * methods with the same name in different classes. */ const receiverKey = (scope: string, varName: string): string => `${scope}\0${varName}`; /** * Pre-built secondary index for O(1) receiver type lookups. * Built once per file from the verified receiver map, keyed by funcName → varName. */ type ReceiverTypeEntry = | { readonly kind: 'resolved'; readonly value: string } | { readonly kind: 'ambiguous' }; type ReceiverTypeIndex = Map<string, Map<string, ReceiverTypeEntry>>; /** * Build a two-level secondary index from the verified receiver map. * The verified map is keyed by `scope\0varName` where scope is either * "funcName@startIndex" (inside a function) or "" (file level). * Index structure: Map<funcName, Map<varName, ReceiverTypeEntry>> */ const buildReceiverTypeIndex = (map: Map<string, string>): ReceiverTypeIndex => { const index: ReceiverTypeIndex = new Map(); for (const [key, typeName] of map) { const nul = key.indexOf('\0'); if (nul < 0) continue; const scope = key.slice(0, nul); const varName = key.slice(nul + 1); if (!varName) continue; if (scope !== '' && !scope.includes('@')) continue; const funcName = scope === '' ? '' : scope.slice(0, scope.indexOf('@')); let varMap = index.get(funcName); if (!varMap) { varMap = new Map(); index.set(funcName, varMap); } const existing = varMap.get(varName); if (existing === undefined) { varMap.set(varName, { kind: 'resolved', value: typeName }); } else if (existing.kind === 'resolved' && existing.value !== typeName) { varMap.set(varName, { kind: 'ambiguous' }); } } return index; }; /** * O(1) receiver type lookup using the pre-built secondary index. * Returns the unique type name if unambiguous. Falls back to file-level scope. */ const lookupReceiverType = ( index: ReceiverTypeIndex, funcName: string, varName: string, ): string | undefined => { const funcBucket = index.get(funcName); if (funcBucket) { const entry = funcBucket.get(varName); if (entry?.kind === 'resolved') return entry.value; if (entry?.kind === 'ambiguous') { // Ambiguous in this function scope — try file-level fallback const fileEntry = index.get('')?.get(varName); return fileEntry?.kind === 'resolved' ? fileEntry.value : undefined; } } // Fallback: file-level scope (funcName "") if (funcName !== '') { const fileEntry = index.get('')?.get(varName); if (fileEntry?.kind === 'resolved') return fileEntry.value; } return undefined; }; interface FieldResolution { typeName: string; // resolved declared type (continues chain threading) fieldNodeId: string; // nodeId of the Property symbol (for ACCESSES edge target) } /** * Resolve the type that results from accessing `receiverName.fieldName`. * Requires declaredType on the Property node (needed for chain walking continuation). */ const resolveFieldAccessType = ( receiverName: string, fieldName: string, filePath: string, ctx: ResolutionContext, ): FieldResolution | undefined => { const fieldDef = resolveFieldOwnership(receiverName, fieldName, filePath, ctx); if (!fieldDef?.declaredType) return undefined; // Use stripNullable (not extractReturnTypeName) — field types like List<User> // should be preserved as-is, not unwrapped to User. Only strip nullable wrappers. return { typeName: stripNullable(fieldDef.declaredType), fieldNodeId: fieldDef.nodeId, }; }; /** * Resolve a field's Property node given a receiver type name and field name. * Does NOT require declaredType — used by write-access tracking where only the * fieldNodeId is needed (no chain continuation). */ const resolveFieldOwnership = ( receiverName: string, fieldName: string, filePath: string, ctx: ResolutionContext, ): { nodeId: string; declaredType?: string } | undefined => { const typeResolved = ctx.resolve(receiverName, filePath); if (!typeResolved) return undefined; const classDef = typeResolved.candidates.find( d => d.type === 'Class' || d.type === 'Struct' || d.type === 'Interface' || d.type === 'Enum' || d.type === 'Record' || d.type === 'Impl', ); if (!classDef) return undefined; return ctx.symbols.lookupFieldByOwner(classDef.nodeId, fieldName) ?? undefined; }; /** * Create a deduplicated ACCESSES edge emitter for a single source node. * Each (sourceId, fieldNodeId) pair is emitted at most once per source. */ const makeAccessEmitter = ( graph: KnowledgeGraph, sourceId: string, ): OnFieldResolved => { const emitted = new Set<string>(); return (fieldNodeId: string): void => { const key = `${sourceId}\0${fieldNodeId}`; if (emitted.has(key)) return; emitted.add(key); graph.addRelationship({ id: generateId('ACCESSES', `${sourceId}:${fieldNodeId}:read`), sourceId, targetId: fieldNodeId, type: 'ACCESSES', confidence: 1.0, reason: 'read', }); }; }; /** * Walk a pre-built mixed chain of field/call steps, threading the current type * through each step and returning the final resolved type. * * Returns `undefined` if any step cannot be resolved (chain is broken). * The caller is responsible for seeding `startType` from its own context * (TypeEnv, constructor bindings, or static-class fallback). */ type OnFieldResolved = (fieldNodeId: string) => void; const walkMixedChain = ( chain: MixedChainStep[], startType: string, filePath: string, ctx: ResolutionContext, onFieldResolved?: OnFieldResolved, ): string | undefined => { let currentType: string | undefined = startType; for (const step of chain) { if (!currentType) break; if (step.kind === 'field') { const resolved = resolveFieldAccessType(currentType, step.name, filePath, ctx); if (!resolved) { currentType = undefined; break; } onFieldResolved?.(resolved.fieldNodeId); currentType = resolved.typeName; } else { // Ruby/Python: property access is syntactically identical to method calls. // Try field resolution first — if the name is a known property with declaredType, // use that type directly. Otherwise fall back to method call resolution. const fieldResolved = resolveFieldAccessType(currentType, step.name, filePath, ctx); if (fieldResolved) { onFieldResolved?.(fieldResolved.fieldNodeId); currentType = fieldResolved.typeName; continue; } const resolved = resolveCallTarget( { calledName: step.name, callForm: 'member', receiverTypeName: currentType }, filePath, ctx, ); if (!resolved) { // Stdlib passthrough: unwrap(), clone(), etc. preserve the receiver type if (TYPE_PRESERVING_METHODS.has(step.name)) continue; currentType = undefined; break; } if (!resolved.returnType) { currentType = undefined; break; } const retType = extractReturnTypeName(resolved.returnType); if (!retType) { currentType = undefined; break; } currentType = retType; } } return currentType; }; /** * Fast path: resolve pre-extracted call sites from workers. * No AST parsing — workers already extracted calledName + sourceId. */ export const processCallsFromExtracted = async ( graph: KnowledgeGraph, extractedCalls: ExtractedCall[], ctx: ResolutionContext, onProgress?: (current: number, total: number) => void, constructorBindings?: FileConstructorBindings[], ) => { // Scope-aware receiver types: keyed by filePath → "funcName\0varName" → typeName. // The scope dimension prevents collisions when two functions in the same file // have same-named locals pointing to different constructor types. const fileReceiverTypes = new Map<string, ReceiverTypeIndex>(); if (constructorBindings) { for (const { filePath, bindings } of constructorBindings) { const verified = verifyConstructorBindings(bindings, filePath, ctx, graph); if (verified.size > 0) { fileReceiverTypes.set(filePath, buildReceiverTypeIndex(verified)); } } } const byFile = new Map<string, ExtractedCall[]>(); for (const call of extractedCalls) { let list = byFile.get(call.filePath); if (!list) { list = []; byFile.set(call.filePath, list); } list.push(call); } const totalFiles = byFile.size; let filesProcessed = 0; for (const [filePath, calls] of byFile) { filesProcessed++; if (filesProcessed % 100 === 0) { onProgress?.(filesProcessed, totalFiles); await yieldToEventLoop(); } ctx.enableCache(filePath); const receiverMap = fileReceiverTypes.get(filePath); for (const call of calls) { let effectiveCall = call; // Step 1: resolve receiver type from constructor bindings if (!call.receiverTypeName && call.receiverName && receiverMap) { const callFuncName = extractFuncNameFromSourceId(call.sourceId); const resolvedType = lookupReceiverType(receiverMap, callFuncName, call.receiverName); if (resolvedType) { effectiveCall = { ...call, receiverTypeName: resolvedType }; } } // Step 1b: class-as-receiver for static method calls (e.g. UserService.find_user()) if (!effectiveCall.receiverTypeName && effectiveCall.receiverName && effectiveCall.callForm === 'member') { const typeResolved = ctx.resolve(effectiveCall.receiverName, effectiveCall.filePath); if (typeResolved && typeResolved.candidates.some( d => d.type === 'Class' || d.type === 'Interface' || d.type === 'Struct' || d.type === 'Enum', )) { effectiveCall = { ...effectiveCall, receiverTypeName: effectiveCall.receiverName }; } } // Step 1c: mixed chain resolution (field, call, or interleaved — e.g. svc.getUser().address.save()). // Runs whenever receiverMixedChain is present. Steps 1/1b may have resolved the base receiver // type already; that type is used as the chain's starting point. if (effectiveCall.receiverMixedChain?.length) { // Use the already-resolved base type (from Steps 1/1b) or look it up now. let currentType: string | undefined = effectiveCall.receiverTypeName; if (!currentType && effectiveCall.receiverName && receiverMap) { const callFuncName = extractFuncNameFromSourceId(effectiveCall.sourceId); currentType = lookupReceiverType(receiverMap, callFuncName, effectiveCall.receiverName); } if (!currentType && effectiveCall.receiverName) { const typeResolved = ctx.resolve(effectiveCall.receiverName, effectiveCall.filePath); if (typeResolved?.candidates.some(d => d.type === 'Class' || d.type === 'Interface' || d.type === 'Struct' || d.type === 'Enum', )) { currentType = effectiveCall.receiverName; } } if (currentType) { const walkedType = walkMixedChain( effectiveCall.receiverMixedChain, currentType, effectiveCall.filePath, ctx, makeAccessEmitter(graph, effectiveCall.sourceId), ); if (walkedType) { effectiveCall = { ...effectiveCall, receiverTypeName: walkedType }; } } } const resolved = resolveCallTarget(effectiveCall, effectiveCall.filePath, ctx); if (!resolved) continue; const relId = generateId('CALLS', `${effectiveCall.sourceId}:${effectiveCall.calledName}->${resolved.nodeId}`); graph.addRelationship({ id: relId, sourceId: effectiveCall.sourceId, targetId: resolved.nodeId, type: 'CALLS', confidence: resolved.confidence, reason: resolved.reason, }); } ctx.clearCache(); } onProgress?.(totalFiles, totalFiles); }; /** * Resolve pre-extracted field write assignments to ACCESSES {reason: 'write'} edges. * Accepts optional constructorBindings for return-type-aware receiver inference, * mirroring processCallsFromExtracted's verified binding lookup. */ export const processAssignmentsFromExtracted = ( graph: KnowledgeGraph, assignments: ExtractedAssignment[], ctx: ResolutionContext, constructorBindings?: FileConstructorBindings[], ): void => { // Build per-file receiver type indexes from verified constructor bindings const fileReceiverTypes = new Map<string, ReceiverTypeIndex>(); if (constructorBindings) { for (const { filePath, bindings } of constructorBindings) { const verified = verifyConstructorBindings(bindings, filePath, ctx, graph); if (verified.size > 0) { fileReceiverTypes.set(filePath, buildReceiverTypeIndex(verified)); } } } for (const asn of assignments) { // Resolve the receiver type let receiverTypeName = asn.receiverTypeName; // Tier 2: verified constructor bindings (return-type inference) if (!receiverTypeName && fileReceiverTypes.size > 0) { const receiverMap = fileReceiverTypes.get(asn.filePath); if (receiverMap) { const funcName = extractFuncNameFromSourceId(asn.sourceId); receiverTypeName = lookupReceiverType(receiverMap, funcName, asn.receiverText); } } // Tier 3: static class-as-receiver fallback if (!receiverTypeName) { const resolved = ctx.resolve(asn.receiverText, asn.filePath); if (resolved?.candidates.some(d => d.type === 'Class' || d.type === 'Struct' || d.type === 'Interface' || d.type === 'Enum' || d.type === 'Record' || d.type === 'Impl', )) { receiverTypeName = asn.receiverText; } } if (!receiverTypeName) continue; const fieldOwner = resolveFieldOwnership(receiverTypeName, asn.propertyName, asn.filePath, ctx); if (!fieldOwner) continue; graph.addRelationship({ id: generateId('ACCESSES', `${asn.sourceId}:${fieldOwner.nodeId}:write`), sourceId: asn.sourceId, targetId: fieldOwner.nodeId, type: 'ACCESSES', confidence: 1.0, reason: 'write', }); } }; /** * Resolve pre-extracted Laravel routes to CALLS edges from route files to controller methods. */ export const processRoutesFromExtracted = async ( graph: KnowledgeGraph, extractedRoutes: ExtractedRoute[], ctx: ResolutionContext, onProgress?: (current: number, total: number) => void, ) => { for (let i = 0; i < extractedRoutes.length; i++) { const route = extractedRoutes[i]; if (i % 50 === 0) { onProgress?.(i, extractedRoutes.length); await yieldToEventLoop(); } if (!route.controllerName || !route.methodName) continue; const controllerResolved = ctx.resolve(route.controllerName, route.filePath); if (!controllerResolved || controllerResolved.candidates.length === 0) continue; if (controllerResolved.tier === 'global' && controllerResolved.candidates.length > 1) continue; const controllerDef = controllerResolved.candidates[0]; const confidence = TIER_CONFIDENCE[controllerResolved.tier]; const methodResolved = ctx.resolve(route.methodName, controllerDef.filePath); const methodId = methodResolved?.tier === 'same-file' ? methodResolved.candidates[0]?.nodeId : undefined; const sourceId = generateId('File', route.filePath); if (!methodId) { const guessedId = generateId('Method', `${controllerDef.filePath}:${route.methodName}`); const relId = generateId('CALLS', `${sourceId}:route->${guessedId}`); graph.addRelationship({ id: relId, sourceId, targetId: guessedId, type: 'CALLS', confidence: confidence * 0.8, reason: 'laravel-route', }); continue; } const relId = generateId('CALLS', `${sourceId}:route->${methodId}`); graph.addRelationship({ id: relId, sourceId, targetId: methodId, type: 'CALLS', confidence, reason: 'laravel-route', }); } onProgress?.(extractedRoutes.length, extractedRoutes.length); }; ================================================ FILE: gitnexus/src/core/ingestion/call-routing.ts ================================================ /** * Shared Ruby call routing logic. * * Ruby expresses imports, heritage (mixins), and property definitions as * method calls rather than syntax-level constructs. This module provides a * routing function used by the CLI call-processor, CLI parse-worker, and * the web call-processor so that the classification logic lives in one place. * * NOTE: This file is intentionally duplicated in gitnexus-web/ because the * two packages have separate build targets (Node native vs WASM/browser). * Keep both copies in sync until a shared package is introduced. */ import { SupportedLanguages } from '../../config/supported-languages.js'; // ── Call routing dispatch table ───────────────────────────────────────────── /** null = this call was not routed; fall through to default call handling */ export type CallRoutingResult = RubyCallRouting | null; export type CallRouter = ( calledName: string, callNode: any, ) => CallRoutingResult; /** No-op router: returns null for every call (passthrough to normal processing) */ const noRouting: CallRouter = () => null; /** Per-language call routing. noRouting = no special routing (normal call processing) */ export const callRouters = { [SupportedLanguages.JavaScript]: noRouting, [SupportedLanguages.TypeScript]: noRouting, [SupportedLanguages.Python]: noRouting, [SupportedLanguages.Java]: noRouting, [SupportedLanguages.Kotlin]: noRouting, [SupportedLanguages.Go]: noRouting, [SupportedLanguages.Rust]: noRouting, [SupportedLanguages.CSharp]: noRouting, [SupportedLanguages.PHP]: noRouting, [SupportedLanguages.Swift]: noRouting, [SupportedLanguages.CPlusPlus]: noRouting, [SupportedLanguages.C]: noRouting, [SupportedLanguages.Ruby]: routeRubyCall, } satisfies Record<SupportedLanguages, CallRouter>; // ── Result types ──────────────────────────────────────────────────────────── export type RubyCallRouting = | { kind: 'import'; importPath: string; isRelative: boolean } | { kind: 'heritage'; items: RubyHeritageItem[] } | { kind: 'properties'; items: RubyPropertyItem[] } | { kind: 'call' } | { kind: 'skip' }; export interface RubyHeritageItem { enclosingClass: string; mixinName: string; heritageKind: 'include' | 'extend' | 'prepend'; } export type RubyAccessorType = 'attr_accessor' | 'attr_reader' | 'attr_writer'; export interface RubyPropertyItem { propName: string; accessorType: RubyAccessorType; startLine: number; endLine: number; /** YARD @return [Type] annotation preceding the attr_accessor call */ declaredType?: string; } // ── Pre-allocated singletons for common return values ──────────────────────── const CALL_RESULT: RubyCallRouting = { kind: 'call' }; const SKIP_RESULT: RubyCallRouting = { kind: 'skip' }; /** Max depth for parent-walking loops to prevent pathological AST traversals */ const MAX_PARENT_DEPTH = 50; // ── Routing function ──────────────────────────────────────────────────────── /** * Classify a Ruby call node and extract its semantic payload. * * @param calledName - The method name (e.g. 'require', 'include', 'attr_accessor') * @param callNode - The tree-sitter `call` AST node * @returns A discriminated union describing the call's semantic role */ export function routeRubyCall(calledName: string, callNode: any): RubyCallRouting { // ── require / require_relative → import ───────────────────────────────── if (calledName === 'require' || calledName === 'require_relative') { const argList = callNode.childForFieldName?.('arguments'); const stringNode = argList?.children?.find((c: any) => c.type === 'string'); const contentNode = stringNode?.children?.find((c: any) => c.type === 'string_content'); if (!contentNode) return SKIP_RESULT; let importPath: string = contentNode.text; // Validate: reject null bytes, control chars, excessively long paths if (!importPath || importPath.length > 1024 || /[\x00-\x1f]/.test(importPath)) { return SKIP_RESULT; } const isRelative = calledName === 'require_relative'; if (isRelative && !importPath.startsWith('.')) { importPath = './' + importPath; } return { kind: 'import', importPath, isRelative }; } // ── include / extend / prepend → heritage (mixin) ────────────────────── if (calledName === 'include' || calledName === 'extend' || calledName === 'prepend') { let enclosingClass: string | null = null; let current = callNode.parent; let depth = 0; while (current && ++depth <= MAX_PARENT_DEPTH) { if (current.type === 'class' || current.type === 'module') { const nameNode = current.childForFieldName?.('name'); if (nameNode) { enclosingClass = nameNode.text; break; } } current = current.parent; } if (!enclosingClass) return SKIP_RESULT; const items: RubyHeritageItem[] = []; const argList = callNode.childForFieldName?.('arguments'); for (const arg of (argList?.children ?? [])) { if (arg.type === 'constant' || arg.type === 'scope_resolution') { items.push({ enclosingClass, mixinName: arg.text, heritageKind: calledName as 'include' | 'extend' | 'prepend' }); } } return items.length > 0 ? { kind: 'heritage', items } : SKIP_RESULT; } // ── attr_accessor / attr_reader / attr_writer → property definitions ─── if (calledName === 'attr_accessor' || calledName === 'attr_reader' || calledName === 'attr_writer') { // Extract YARD @return [Type] from preceding comment (e.g. `# @return [Address]`) let yardType: string | undefined; let sibling = callNode.previousSibling; while (sibling) { if (sibling.type === 'comment') { const match = /@return\s+\[([^\]]+)\]/.exec(sibling.text); if (match) { const raw = match[1].trim(); // Extract simple type name: "User", "Array<User>" → "User" const simple = raw.match(/^([A-Z]\w*)/); if (simple) yardType = simple[1]; break; } } else if (sibling.isNamed) { break; // stop at non-comment named sibling } sibling = sibling.previousSibling; } const items: RubyPropertyItem[] = []; const argList = callNode.childForFieldName?.('arguments'); for (const arg of (argList?.children ?? [])) { if (arg.type === 'simple_symbol') { items.push({ propName: arg.text.startsWith(':') ? arg.text.slice(1) : arg.text, accessorType: calledName as RubyAccessorType, startLine: arg.startPosition.row, endLine: arg.endPosition.row, ...(yardType ? { declaredType: yardType } : {}), }); } } return items.length > 0 ? { kind: 'properties', items } : SKIP_RESULT; } // ── Everything else → regular call ───────────────────────────────────── return CALL_RESULT; } ================================================ FILE: gitnexus/src/core/ingestion/cluster-enricher.ts ================================================ /** * Cluster Enricher * * LLM-based enrichment for community clusters. * Generates semantic names, keywords, and descriptions using an LLM. */ import { CommunityNode } from './community-processor.js'; // ============================================================================ // TYPES // ============================================================================ export interface ClusterEnrichment { name: string; keywords: string[]; description: string; } export interface EnrichmentResult { enrichments: Map<string, ClusterEnrichment>; tokensUsed: number; } export interface LLMClient { generate: (prompt: string) => Promise<string>; } export interface ClusterMemberInfo { name: string; filePath: string; type: string; // 'Function' | 'Class' | 'Method' | 'Interface' } // ============================================================================ // PROMPT TEMPLATE // ============================================================================ const buildEnrichmentPrompt = ( members: ClusterMemberInfo[], heuristicLabel: string ): string => { // Limit to first 20 members to control token usage const limitedMembers = members.slice(0, 20); const memberList = limitedMembers .map(m => `${m.name} (${m.type})`) .join(', '); return `Analyze this code cluster and provide a semantic name and short description. Heuristic: "${heuristicLabel}" Members: ${memberList}${members.length > 20 ? ` (+${members.length - 20} more)` : ''} Reply with JSON only: {"name": "2-4 word semantic name", "description": "One sentence describing purpose"}` }; // ============================================================================ // PARSE LLM RESPONSE // ============================================================================ const parseEnrichmentResponse = ( response: string, fallbackLabel: string ): ClusterEnrichment => { try { // Extract JSON from response (handles markdown code blocks) const jsonMatch = response.match(/\{[\s\S]*\}/); if (!jsonMatch) { throw new Error('No JSON found in response'); } const parsed = JSON.parse(jsonMatch[0]); return { name: parsed.name || fallbackLabel, keywords: Array.isArray(parsed.keywords) ? parsed.keywords : [], description: parsed.description || '', }; } catch { // Fallback if parsing fails return { name: fallbackLabel, keywords: [], description: '', }; } }; // ============================================================================ // MAIN ENRICHMENT FUNCTION // ============================================================================ /** * Enrich clusters with LLM-generated names, keywords, and descriptions * * @param communities - Community nodes to enrich * @param memberMap - Map of communityId -> member info * @param llmClient - LLM client for generation * @param onProgress - Progress callback */ export const enrichClusters = async ( communities: CommunityNode[], memberMap: Map<string, ClusterMemberInfo[]>, llmClient: LLMClient, onProgress?: (current: number, total: number) => void ): Promise<EnrichmentResult> => { const enrichments = new Map<string, ClusterEnrichment>(); let tokensUsed = 0; for (let i = 0; i < communities.length; i++) { const community = communities[i]; const members = memberMap.get(community.id) || []; onProgress?.(i + 1, communities.length); if (members.length === 0) { // No members, use heuristic enrichments.set(community.id, { name: community.heuristicLabel, keywords: [], description: '', }); continue; } try { const prompt = buildEnrichmentPrompt(members, community.heuristicLabel); const response = await llmClient.generate(prompt); // Rough token estimate tokensUsed += prompt.length / 4 + response.length / 4; const enrichment = parseEnrichmentResponse(response, community.heuristicLabel); enrichments.set(community.id, enrichment); } catch (error) { // On error, fallback to heuristic console.warn(`Failed to enrich cluster ${community.id}:`, error); enrichments.set(community.id, { name: community.heuristicLabel, keywords: [], description: '', }); } } return { enrichments, tokensUsed }; }; // ============================================================================ // BATCH ENRICHMENT (more efficient) // ============================================================================ /** * Enrich multiple clusters in a single LLM call (batch mode) * More efficient for token usage but requires larger context window */ export const enrichClustersBatch = async ( communities: CommunityNode[], memberMap: Map<string, ClusterMemberInfo[]>, llmClient: LLMClient, batchSize: number = 5, onProgress?: (current: number, total: number) => void ): Promise<EnrichmentResult> => { const enrichments = new Map<string, ClusterEnrichment>(); let tokensUsed = 0; // Process in batches for (let i = 0; i < communities.length; i += batchSize) { // Report progress onProgress?.(Math.min(i + batchSize, communities.length), communities.length); const batch = communities.slice(i, i + batchSize); const batchPrompt = batch.map((community, idx) => { const members = memberMap.get(community.id) || []; const limitedMembers = members.slice(0, 15); const memberList = limitedMembers .map(m => `${m.name} (${m.type})`) .join(', '); return `Cluster ${idx + 1} (id: ${community.id}): Heuristic: "${community.heuristicLabel}" Members: ${memberList}`; }).join('\n\n'); const prompt = `Analyze these code clusters and generate semantic names, keywords, and descriptions. ${batchPrompt} Output JSON array: [ {"id": "comm_X", "name": "...", "keywords": [...], "description": "..."}, ... ]`; try { const response = await llmClient.generate(prompt); tokensUsed += prompt.length / 4 + response.length / 4; // Parse batch response const jsonMatch = response.match(/\[[\s\S]*\]/); if (jsonMatch) { const parsed = JSON.parse(jsonMatch[0]) as Array<{ id: string; name: string; keywords: string[]; description: string; }>; for (const item of parsed) { enrichments.set(item.id, { name: item.name, keywords: item.keywords || [], description: item.description || '', }); } } } catch (error) { console.warn('Batch enrichment failed, falling back to heuristics:', error); // Fallback for this batch for (const community of batch) { enrichments.set(community.id, { name: community.heuristicLabel, keywords: [], description: '', }); } } } // Fill in any missing communities for (const community of communities) { if (!enrichments.has(community.id)) { enrichments.set(community.id, { name: community.heuristicLabel, keywords: [], description: '', }); } } return { enrichments, tokensUsed }; }; ================================================ FILE: gitnexus/src/core/ingestion/community-processor.ts ================================================ /** * Community Detection Processor * * Uses the Leiden algorithm (via graphology-communities-leiden) to detect * communities/clusters in the code graph based on CALLS relationships. * * Communities represent groups of code that work together frequently, * helping agents navigate the codebase by functional area rather than file structure. */ // NOTE: The Leiden algorithm source is vendored from graphology's repo // (src/communities-leiden) because it was never published to npm. // We use createRequire to load the CommonJS vendored files in ESM context. import Graph from 'graphology'; import { createRequire } from 'node:module'; import { fileURLToPath } from 'node:url'; import { dirname, resolve } from 'node:path'; import { KnowledgeGraph, NodeLabel } from '../graph/types.js'; const __filename = fileURLToPath(import.meta.url); const __dirname = dirname(__filename); // Navigate to package root (works from both src/ and dist/) const leidenPath = resolve(__dirname, '..', '..', '..', 'vendor', 'leiden', 'index.cjs'); const _require = createRequire(import.meta.url); const leiden = _require(leidenPath); // ============================================================================ // TYPES // ============================================================================ export interface CommunityNode { id: string; label: string; heuristicLabel: string; cohesion: number; symbolCount: number; } export interface CommunityMembership { nodeId: string; communityId: string; } export interface CommunityDetectionResult { communities: CommunityNode[]; memberships: CommunityMembership[]; stats: { totalCommunities: number; modularity: number; nodesProcessed: number; }; } // ============================================================================ // COMMUNITY COLORS (for visualization) // ============================================================================ export const COMMUNITY_COLORS = [ '#ef4444', // red '#f97316', // orange '#eab308', // yellow '#22c55e', // green '#06b6d4', // cyan '#3b82f6', // blue '#8b5cf6', // violet '#d946ef', // fuchsia '#ec4899', // pink '#f43f5e', // rose '#14b8a6', // teal '#84cc16', // lime ]; export const getCommunityColor = (communityIndex: number): string => { return COMMUNITY_COLORS[communityIndex % COMMUNITY_COLORS.length]; }; // ============================================================================ // MAIN PROCESSOR // ============================================================================ /** * Detect communities in the knowledge graph using Leiden algorithm * * This runs AFTER all relationships (CALLS, IMPORTS, etc.) have been built. * It uses primarily CALLS edges to cluster code that works together. */ export const processCommunities = async ( knowledgeGraph: KnowledgeGraph, onProgress?: (message: string, progress: number) => void ): Promise<CommunityDetectionResult> => { onProgress?.('Building graph for community detection...', 0); // Pre-check total symbol count to determine large-graph mode before building let symbolCount = 0; knowledgeGraph.forEachNode(node => { if (node.label === 'Function' || node.label === 'Class' || node.label === 'Method' || node.label === 'Interface') { symbolCount++; } }); const isLarge = symbolCount > 10_000; const graph = buildGraphologyGraph(knowledgeGraph, isLarge); if (graph.order === 0) { return { communities: [], memberships: [], stats: { totalCommunities: 0, modularity: 0, nodesProcessed: 0 } }; } const nodeCount = graph.order; const edgeCount = graph.size; onProgress?.(`Running Leiden on ${nodeCount} nodes, ${edgeCount} edges${isLarge ? ` (filtered from ${symbolCount} symbols)` : ''}...`, 30); // Large graphs: higher resolution + capped iterations (matching Python leidenalg default of 2). // The first 2 iterations capture ~95%+ of modularity; additional iterations have diminishing returns. // Timeout: abort after 60s for pathological graph structures. const LEIDEN_TIMEOUT_MS = 60_000; let details: any; try { details = await Promise.race([ Promise.resolve((leiden as any).detailed(graph, { resolution: isLarge ? 2.0 : 1.0, maxIterations: isLarge ? 3 : 0, })), new Promise((_, reject) => setTimeout(() => reject(new Error('Leiden timeout')), LEIDEN_TIMEOUT_MS) ), ]); } catch (e: any) { if (e.message === 'Leiden timeout') { onProgress?.('Community detection timed out, using fallback...', 60); // Fallback: assign all nodes to community 0 const communities: Record<string, number> = {}; graph.forEachNode((node: string) => { communities[node] = 0; }); details = { communities, count: 1, modularity: 0 }; } else { throw e; } } onProgress?.(`Found ${details.count} communities...`, 60); // Step 3: Create community nodes with heuristic labels const communityNodes = createCommunityNodes( details.communities as Record<string, number>, details.count, graph, knowledgeGraph ); onProgress?.('Creating membership edges...', 80); // Step 4: Create membership mappings const memberships: CommunityMembership[] = []; Object.entries(details.communities).forEach(([nodeId, communityNum]) => { memberships.push({ nodeId, communityId: `comm_${communityNum}`, }); }); onProgress?.('Community detection complete!', 100); return { communities: communityNodes, memberships, stats: { totalCommunities: details.count, modularity: details.modularity, nodesProcessed: graph.order, } }; }; // ============================================================================ // HELPER: Build graphology graph from knowledge graph // ============================================================================ /** * Build a graphology graph containing only symbol nodes and clustering edges. * For large graphs (>10K symbols), filter out low-confidence fuzzy-global edges * and degree-1 nodes that add noise and massively increase Leiden runtime. */ const MIN_CONFIDENCE_LARGE = 0.5; const buildGraphologyGraph = (knowledgeGraph: KnowledgeGraph, isLarge: boolean): any => { const graph = new (Graph as any)({ type: 'undirected', allowSelfLoops: false }); const symbolTypes = new Set<NodeLabel>(['Function', 'Class', 'Method', 'Interface']); const clusteringRelTypes = new Set(['CALLS', 'EXTENDS', 'IMPLEMENTS']); const connectedNodes = new Set<string>(); const nodeDegree = new Map<string, number>(); knowledgeGraph.forEachRelationship(rel => { if (!clusteringRelTypes.has(rel.type) || rel.sourceId === rel.targetId) return; if (isLarge && rel.confidence < MIN_CONFIDENCE_LARGE) return; connectedNodes.add(rel.sourceId); connectedNodes.add(rel.targetId); nodeDegree.set(rel.sourceId, (nodeDegree.get(rel.sourceId) || 0) + 1); nodeDegree.set(rel.targetId, (nodeDegree.get(rel.targetId) || 0) + 1); }); knowledgeGraph.forEachNode(node => { if (!symbolTypes.has(node.label) || !connectedNodes.has(node.id)) return; // For large graphs, skip degree-1 nodes — they just become singletons or // get absorbed into their single neighbor's community, but cost iteration time. if (isLarge && (nodeDegree.get(node.id) || 0) < 2) return; graph.addNode(node.id, { name: node.properties.name, filePath: node.properties.filePath, type: node.label, }); }); knowledgeGraph.forEachRelationship(rel => { if (!clusteringRelTypes.has(rel.type)) return; if (isLarge && rel.confidence < MIN_CONFIDENCE_LARGE) return; if (graph.hasNode(rel.sourceId) && graph.hasNode(rel.targetId) && rel.sourceId !== rel.targetId) { if (!graph.hasEdge(rel.sourceId, rel.targetId)) { graph.addEdge(rel.sourceId, rel.targetId); } } }); return graph; }; // ============================================================================ // HELPER: Create community nodes with heuristic labels // ============================================================================ /** * Create Community nodes with auto-generated labels based on member file paths */ const createCommunityNodes = ( communities: Record<string, number>, communityCount: number, graph: any, knowledgeGraph: KnowledgeGraph ): CommunityNode[] => { // Group node IDs by community const communityMembers = new Map<number, string[]>(); Object.entries(communities).forEach(([nodeId, commNum]) => { if (!communityMembers.has(commNum)) { communityMembers.set(commNum, []); } communityMembers.get(commNum)!.push(nodeId); }); // Build node lookup for file paths const nodePathMap = new Map<string, string>(); for (const node of knowledgeGraph.iterNodes()) { if (node.properties.filePath) { nodePathMap.set(node.id, node.properties.filePath); } } // Create community nodes - SKIP SINGLETONS (isolated nodes) const communityNodes: CommunityNode[] = []; communityMembers.forEach((memberIds, commNum) => { // Skip singleton communities - they're just isolated nodes if (memberIds.length < 2) return; const heuristicLabel = generateHeuristicLabel(memberIds, nodePathMap, graph, commNum); communityNodes.push({ id: `comm_${commNum}`, label: heuristicLabel, heuristicLabel, cohesion: calculateCohesion(memberIds, graph), symbolCount: memberIds.length, }); }); // Sort by size descending communityNodes.sort((a, b) => b.symbolCount - a.symbolCount); return communityNodes; }; // ============================================================================ // HELPER: Generate heuristic label from folder patterns // ============================================================================ /** * Generate a human-readable label from the most common folder name in the community */ const generateHeuristicLabel = ( memberIds: string[], nodePathMap: Map<string, string>, graph: any, commNum: number ): string => { // Collect folder names from file paths const folderCounts = new Map<string, number>(); memberIds.forEach(nodeId => { const filePath = nodePathMap.get(nodeId) || ''; const parts = filePath.split('/').filter(Boolean); // Get the most specific folder (parent directory) if (parts.length >= 2) { const folder = parts[parts.length - 2]; // Skip generic folder names if (!['src', 'lib', 'core', 'utils', 'common', 'shared', 'helpers'].includes(folder.toLowerCase())) { folderCounts.set(folder, (folderCounts.get(folder) || 0) + 1); } } }); // Find most common folder let maxCount = 0; let bestFolder = ''; folderCounts.forEach((count, folder) => { if (count > maxCount) { maxCount = count; bestFolder = folder; } }); if (bestFolder) { // Capitalize first letter return bestFolder.charAt(0).toUpperCase() + bestFolder.slice(1); } // Fallback: use function names to detect patterns const names: string[] = []; memberIds.forEach(nodeId => { const name = graph.getNodeAttribute(nodeId, 'name'); if (name) names.push(name); }); // Look for common prefixes if (names.length > 2) { const commonPrefix = findCommonPrefix(names); if (commonPrefix.length > 2) { return commonPrefix.charAt(0).toUpperCase() + commonPrefix.slice(1); } } // Last resort: generic name with community ID for uniqueness return `Cluster_${commNum}`; }; /** * Find common prefix among strings */ const findCommonPrefix = (strings: string[]): string => { if (strings.length === 0) return ''; const sorted = strings.slice().sort(); const first = sorted[0]; const last = sorted[sorted.length - 1]; let i = 0; while (i < first.length && first[i] === last[i]) { i++; } return first.substring(0, i); }; // ============================================================================ // HELPER: Calculate community cohesion // ============================================================================ /** * Estimate cohesion score (0-1) based on internal edge density. * Uses sampling for large communities to avoid O(N^2) cost. */ const calculateCohesion = (memberIds: string[], graph: any): number => { if (memberIds.length <= 1) return 1.0; const memberSet = new Set(memberIds); // Sample up to 50 members for large communities const SAMPLE_SIZE = 50; const sample = memberIds.length <= SAMPLE_SIZE ? memberIds : memberIds.slice(0, SAMPLE_SIZE); let internalEdges = 0; let totalEdges = 0; for (const nodeId of sample) { if (!graph.hasNode(nodeId)) continue; graph.forEachNeighbor(nodeId, (neighbor: string) => { totalEdges++; if (memberSet.has(neighbor)) { internalEdges++; } }); } // Cohesion = fraction of edges that stay internal if (totalEdges === 0) return 1.0; return Math.min(1.0, internalEdges / totalEdges); }; ================================================ FILE: gitnexus/src/core/ingestion/constants.ts ================================================ /** * Default minimum buffer size for tree-sitter parsing (512 KB). * tree-sitter requires bufferSize >= file size in bytes. */ export const TREE_SITTER_BUFFER_SIZE = 512 * 1024; /** * Maximum buffer size cap (32 MB) to prevent OOM on huge files. * Also used as the file-size skip threshold — files larger than this are not parsed. */ export const TREE_SITTER_MAX_BUFFER = 32 * 1024 * 1024; /** * Compute adaptive buffer size for tree-sitter parsing. * Uses 2× file size, clamped between 512 KB and 32 MB. * Previous 256 KB fixed limit silently skipped files > ~200 KB (e.g., imgui.h at 411 KB). */ export const getTreeSitterBufferSize = (contentLength: number): number => Math.min(Math.max(contentLength * 2, TREE_SITTER_BUFFER_SIZE), TREE_SITTER_MAX_BUFFER); ================================================ FILE: gitnexus/src/core/ingestion/entry-point-scoring.ts ================================================ /** * Entry Point Scoring * * Calculates entry point scores for process detection based on: * 1. Call ratio (existing algorithm - callees / (callers + 1)) * 2. Export status (exported functions get higher priority) * 3. Name patterns (functions matching entry point patterns like handle*, on*, *Controller) * 4. Framework detection (path-based detection for Next.js, Express, Django, etc.) * * This module is language-agnostic - language-specific patterns are defined per language. */ import { detectFrameworkFromPath } from './framework-detection.js'; import { SupportedLanguages } from '../../config/supported-languages.js'; // ============================================================================ // NAME PATTERNS - All 11 supported languages // ============================================================================ /** * Common entry point naming patterns by language * These patterns indicate functions that are likely feature entry points */ const ENTRY_POINT_PATTERNS: Record<string, RegExp[]> = { // Universal patterns (apply to all languages) '*': [ /^(main|init|bootstrap|start|run|setup|configure)$/i, /^handle[A-Z]/, // handleLogin, handleSubmit /^on[A-Z]/, // onClick, onSubmit /Handler$/, // RequestHandler /Controller$/, // UserController /^process[A-Z]/, // processPayment /^execute[A-Z]/, // executeQuery /^perform[A-Z]/, // performAction /^dispatch[A-Z]/, // dispatchEvent /^trigger[A-Z]/, // triggerAction /^fire[A-Z]/, // fireEvent /^emit[A-Z]/, // emitEvent ], // JavaScript/TypeScript [SupportedLanguages.JavaScript]: [ /^use[A-Z]/, // React hooks (useEffect, etc.) ], [SupportedLanguages.TypeScript]: [ /^use[A-Z]/, // React hooks ], // Python [SupportedLanguages.Python]: [ /^app$/, // Flask/FastAPI app /^(get|post|put|delete|patch)_/i, // REST conventions /^api_/, // API functions /^view_/, // Django views ], // Java [SupportedLanguages.Java]: [ /^do[A-Z]/, // doGet, doPost (Servlets) /^create[A-Z]/, // Factory patterns /^build[A-Z]/, // Builder patterns /Service$/, // UserService ], // C# [SupportedLanguages.CSharp]: [ /^(Get|Post|Put|Delete|Patch)/, // ASP.NET action methods /Action$/, // MVC actions /^On[A-Z]/, // Event handlers / Blazor lifecycle /Async$/, // Async entry points /^Configure$/, // Startup.Configure /^ConfigureServices$/, // Startup.ConfigureServices /^Handle$/, // MediatR / generic handler /^Execute$/, // Command pattern /^Invoke$/, // Middleware Invoke /^Map[A-Z]/, // Minimal API MapGet, MapPost /Service$/, // Service classes /^Seed/, // Database seeding ], // Go [SupportedLanguages.Go]: [ /Handler$/, // http.Handler pattern /^Serve/, // ServeHTTP /^New[A-Z]/, // Constructor pattern (returns new instance) /^Make[A-Z]/, // Make functions ], // Rust [SupportedLanguages.Rust]: [ /^(get|post|put|delete)_handler$/i, /^handle_/, // handle_request /^new$/, // Constructor pattern /^run$/, // run entry point /^spawn/, // Async spawn ], // C - explicit main() boost plus common C entry point conventions [SupportedLanguages.C]: [ /^main$/, // THE entry point /^init_/, // init_server, init_client /_init$/, // module_init, server_init /^start_/, // start_server /_start$/, // thread_start /^run_/, // run_loop /_run$/, // event_run /^stop_/, // stop_server /_stop$/, // service_stop /^open_/, // open_connection /_open$/, // file_open /^close_/, // close_connection /_close$/, // socket_close /^create_/, // create_session /_create$/, // object_create /^destroy_/, // destroy_session /_destroy$/, // object_destroy /^handle_/, // handle_request /_handler$/, // signal_handler /_callback$/, // event_callback /^cmd_/, // tmux: cmd_new_window, cmd_attach_session /^server_/, // server_start, server_loop /^client_/, // client_connect /^session_/, // session_create /^window_/, // window_resize (tmux) /^key_/, // key_press /^input_/, // input_parse /^output_/, // output_write /^notify_/, // notify_client /^control_/, // control_start ], // C++ - same as C plus OOP/template patterns [SupportedLanguages.CPlusPlus]: [ /^main$/, // THE entry point /^init_/, /_init$/, /^Create[A-Z]/, // Factory patterns /^create_/, /^Run$/, // Run methods /^run$/, /^Start$/, // Start methods /^start$/, /^handle_/, /_handler$/, /_callback$/, /^OnEvent/, // Event callbacks /^on_/, /::Run$/, // Class::Run /::Start$/, // Class::Start /::Init$/, // Class::Init /::Execute$/, // Class::Execute ], // Swift / iOS [SupportedLanguages.Swift]: [ /^viewDidLoad$/, // UIKit lifecycle /^viewWillAppear$/, // UIKit lifecycle /^viewDidAppear$/, // UIKit lifecycle /^viewWillDisappear$/, // UIKit lifecycle /^viewDidDisappear$/, // UIKit lifecycle /^application\(/, // AppDelegate methods /^scene\(/, // SceneDelegate methods /^body$/, // SwiftUI View.body /Coordinator$/, // Coordinator pattern /^sceneDidBecomeActive$/, // SceneDelegate lifecycle /^sceneWillResignActive$/, // SceneDelegate lifecycle /^didFinishLaunchingWithOptions$/, // AppDelegate /ViewController$/, // ViewController classes /^configure[A-Z]/, // Configuration methods /^setup[A-Z]/, // Setup methods /^makeBody$/, // SwiftUI ViewModifier ], // PHP / Laravel [SupportedLanguages.PHP]: [ /Controller$/, // UserController (class name convention) /^handle$/, // Job::handle(), Listener::handle() /^execute$/, // Command::execute() /^boot$/, // ServiceProvider::boot() /^register$/, // ServiceProvider::register() /^__invoke$/, // Invokable controllers/actions /^(index|show|store|update|destroy|create|edit)$/, // RESTful resource methods /^(get|post|put|delete|patch)[A-Z]/, // Explicit HTTP method actions /^run$/, // Command/Job run() /^fire$/, // Event fire() /^dispatch$/, // Dispatchable jobs /Service$/, // UserService (Service layer) /Repository$/, // UserRepository (Repository pattern) /^find$/, // Repository::find() /^findAll$/, // Repository::findAll() /^save$/, // Repository::save() /^delete$/, // Repository::delete() ], // Ruby [SupportedLanguages.Ruby]: [ /^call$/, // Service objects (MyService.call) /^perform$/, // Background jobs (Sidekiq, ActiveJob) /^execute$/, // Command pattern ], }; /** Pre-computed merged patterns (universal + language-specific) to avoid per-call array allocation. */ const MERGED_ENTRY_POINT_PATTERNS: Record<string, RegExp[]> = {}; const UNIVERSAL_PATTERNS = ENTRY_POINT_PATTERNS['*'] || []; for (const [lang, patterns] of Object.entries(ENTRY_POINT_PATTERNS)) { if (lang === '*') continue; MERGED_ENTRY_POINT_PATTERNS[lang] = [...UNIVERSAL_PATTERNS, ...patterns]; } // ============================================================================ // UTILITY PATTERNS - Functions that should be penalized // ============================================================================ /** * Patterns that indicate utility/helper functions (NOT entry points) * These get penalized in scoring */ const UTILITY_PATTERNS: RegExp[] = [ /^(get|set|is|has|can|should|will|did)[A-Z]/, // Accessors/predicates /^_/, // Private by convention /^(format|parse|validate|convert|transform)/i, // Transformation utilities /^(log|debug|error|warn|info)$/i, // Logging /^(to|from)[A-Z]/, // Conversions /^(encode|decode)/i, // Encoding utilities /^(serialize|deserialize)/i, // Serialization /^(clone|copy|deep)/i, // Cloning utilities /^(merge|extend|assign)/i, // Object utilities /^(filter|map|reduce|sort|find)/i, // Collection utilities (standalone) /Helper$/, /Util$/, /Utils$/, /^utils?$/i, /^helpers?$/i, ]; // ============================================================================ // TYPES // ============================================================================ export interface EntryPointScoreResult { score: number; reasons: string[]; } // ============================================================================ // MAIN SCORING FUNCTION // ============================================================================ /** * Calculate an entry point score for a function/method * * Higher scores indicate better entry point candidates. * Score = baseScore × exportMultiplier × nameMultiplier * * @param name - Function/method name * @param language - Programming language * @param isExported - Whether the function is exported/public * @param callerCount - Number of functions that call this function * @param calleeCount - Number of functions this function calls * @returns Score and array of reasons explaining the score */ export function calculateEntryPointScore( name: string, language: SupportedLanguages, isExported: boolean, callerCount: number, calleeCount: number, filePath: string = '' // Optional for backwards compatibility ): EntryPointScoreResult { const reasons: string[] = []; // Must have outgoing calls to be an entry point (we need to trace forward) if (calleeCount === 0) { return { score: 0, reasons: ['no-outgoing-calls'] }; } // Base score: call ratio (existing algorithm) // High ratio = calls many, called by few = likely entry point const baseScore = calleeCount / (callerCount + 1); reasons.push(`base:${baseScore.toFixed(2)}`); // Export bonus: exported/public functions are more likely entry points const exportMultiplier = isExported ? 2.0 : 1.0; if (isExported) { reasons.push('exported'); } // Name pattern scoring let nameMultiplier = 1.0; // Check negative patterns first (utilities get penalized) if (UTILITY_PATTERNS.some(p => p.test(name))) { nameMultiplier = 0.3; // Significant penalty reasons.push('utility-pattern'); } else { // Check positive patterns const allPatterns = MERGED_ENTRY_POINT_PATTERNS[language] || UNIVERSAL_PATTERNS; if (allPatterns.some(p => p.test(name))) { nameMultiplier = 1.5; // Bonus for matching entry point pattern reasons.push('entry-pattern'); } } // Framework detection bonus (Phase 2) let frameworkMultiplier = 1.0; if (filePath) { const frameworkHint = detectFrameworkFromPath(filePath); if (frameworkHint) { frameworkMultiplier = frameworkHint.entryPointMultiplier; reasons.push(`framework:${frameworkHint.reason}`); } } // Calculate final score const finalScore = baseScore * exportMultiplier * nameMultiplier * frameworkMultiplier; return { score: finalScore, reasons, }; } // ============================================================================ // HELPER FUNCTIONS // ============================================================================ /** * Check if a file path is a test file (should be excluded from entry points) * Covers common test file patterns across all supported languages */ export function isTestFile(filePath: string): boolean { const p = filePath.toLowerCase().replace(/\\/g, '/'); return ( // JavaScript/TypeScript test patterns p.includes('.test.') || p.includes('.spec.') || p.includes('__tests__/') || p.includes('__mocks__/') || // Generic test folders p.includes('/test/') || p.includes('/tests/') || p.includes('/testing/') || // Python test patterns p.endsWith('_test.py') || p.includes('/test_') || // Go test patterns p.endsWith('_test.go') || // Java test patterns p.includes('/src/test/') || // Rust test patterns (inline tests are different, but test files) p.includes('/tests/') || // Swift/iOS test patterns p.endsWith('tests.swift') || p.endsWith('test.swift') || p.includes('uitests/') || // C# test patterns p.endsWith('tests.cs') || p.endsWith('test.cs') || p.includes('.tests/') || p.includes('.test/') || p.includes('.integrationtests/') || p.includes('.unittests/') || p.includes('/testproject/') || // PHP/Laravel test patterns p.endsWith('test.php') || p.endsWith('spec.php') || p.includes('/tests/feature/') || p.includes('/tests/unit/') || // Ruby test patterns p.endsWith('_spec.rb') || p.endsWith('_test.rb') || p.includes('/spec/') || p.includes('/test/fixtures/') ); } /** * Check if a file path is likely a utility/helper file * These might still have entry points but should be lower priority */ export function isUtilityFile(filePath: string): boolean { const p = filePath.toLowerCase().replace(/\\/g, '/'); return ( p.includes('/utils/') || p.includes('/util/') || p.includes('/helpers/') || p.includes('/helper/') || p.includes('/common/') || p.includes('/shared/') || p.includes('/lib/') || p.endsWith('/utils.ts') || p.endsWith('/utils.js') || p.endsWith('/helpers.ts') || p.endsWith('/helpers.js') || p.endsWith('_utils.py') || p.endsWith('_helpers.py') ); } ================================================ FILE: gitnexus/src/core/ingestion/export-detection.ts ================================================ /** * Export Detection * * Determines whether a symbol (function, class, etc.) is exported/public * in its language. This is a pure function — safe for use in worker threads. * * Shared between parse-worker.ts (worker pool) and parsing-processor.ts (sequential fallback). */ import { findSiblingChild, SyntaxNode } from './utils.js'; import { SupportedLanguages } from '../../config/supported-languages.js'; /** Handler type: given a node and symbol name, return true if the symbol is exported/public. */ type ExportChecker = (node: SyntaxNode, name: string) => boolean; // ============================================================================ // Per-language export checkers // ============================================================================ /** JS/TS: walk ancestors looking for export_statement or export_specifier. */ const tsExportChecker: ExportChecker = (node, _name) => { let current: SyntaxNode | null = node; while (current) { const type = current.type; if (type === 'export_statement' || type === 'export_specifier' || (type === 'lexical_declaration' && current.parent?.type === 'export_statement')) { return true; } // Fallback: check if node text starts with 'export ' for edge cases if (current.text?.startsWith('export ')) { return true; } current = current.parent; } return false; }; /** Python: public if no leading underscore (convention). */ const pythonExportChecker: ExportChecker = (_node, name) => !name.startsWith('_'); /** Java: check for 'public' modifier — modifiers are siblings of the name node, not parents. */ const javaExportChecker: ExportChecker = (node, _name) => { let current: SyntaxNode | null = node; while (current) { if (current.parent) { const parent = current.parent; for (let i = 0; i < parent.childCount; i++) { const child = parent.child(i); if (child?.type === 'modifiers' && child.text?.includes('public')) { return true; } } if (parent.type === 'method_declaration' || parent.type === 'constructor_declaration') { if (parent.text?.trimStart().startsWith('public')) { return true; } } } current = current.parent; } return false; }; /** C# declaration node types for sibling modifier scanning. */ const CSHARP_DECL_TYPES = new Set([ 'method_declaration', 'local_function_statement', 'constructor_declaration', 'class_declaration', 'interface_declaration', 'struct_declaration', 'enum_declaration', 'record_declaration', 'record_struct_declaration', 'record_class_declaration', 'delegate_declaration', 'property_declaration', 'field_declaration', 'event_declaration', 'namespace_declaration', 'file_scoped_namespace_declaration', ]); /** * C#: modifier nodes are SIBLINGS of the name node inside the declaration. * Walk up to the declaration node, then scan its direct children. */ const csharpExportChecker: ExportChecker = (node, _name) => { let current: SyntaxNode | null = node; while (current) { if (CSHARP_DECL_TYPES.has(current.type)) { for (let i = 0; i < current.childCount; i++) { const child = current.child(i); if (child?.type === 'modifier' && child.text === 'public') return true; } return false; } current = current.parent; } return false; }; /** Go: uppercase first letter = exported. */ const goExportChecker: ExportChecker = (_node, name) => { if (name.length === 0) return false; const first = name[0]; return first === first.toUpperCase() && first !== first.toLowerCase(); }; /** Rust declaration node types for sibling visibility_modifier scanning. */ const RUST_DECL_TYPES = new Set([ 'function_item', 'struct_item', 'enum_item', 'trait_item', 'impl_item', 'union_item', 'type_item', 'const_item', 'static_item', 'mod_item', 'use_declaration', 'associated_type', 'function_signature_item', ]); /** * Rust: visibility_modifier is a SIBLING of the name node within the declaration node * (function_item, struct_item, etc.), not a parent. Walk up to the declaration node, * then scan its direct children. */ const rustExportChecker: ExportChecker = (node, _name) => { let current: SyntaxNode | null = node; while (current) { if (RUST_DECL_TYPES.has(current.type)) { for (let i = 0; i < current.childCount; i++) { const child = current.child(i); if (child?.type === 'visibility_modifier' && child.text?.startsWith('pub')) return true; } return false; } current = current.parent; } return false; }; /** * Kotlin: default visibility is public (unlike Java). * visibility_modifier is inside modifiers, a sibling of the name node within the declaration. */ const kotlinExportChecker: ExportChecker = (node, _name) => { let current: SyntaxNode | null = node; while (current) { if (current.parent) { const visMod = findSiblingChild(current.parent, 'modifiers', 'visibility_modifier'); if (visMod) { const text = visMod.text; if (text === 'private' || text === 'internal' || text === 'protected') return false; if (text === 'public') return true; } } current = current.parent; } // No visibility modifier = public (Kotlin default) return true; }; /** * C/C++: functions without 'static' storage class have external linkage by default, * making them globally accessible (equivalent to exported). Only functions explicitly * marked 'static' are file-scoped (not exported). C++ anonymous namespaces * (namespace { ... }) also give internal linkage. */ const cCppExportChecker: ExportChecker = (node, _name) => { let cur: SyntaxNode | null = node; while (cur) { if (cur.type === 'function_definition' || cur.type === 'declaration') { // Check for 'static' storage class specifier as a direct child node. // This avoids reading the full function text (which can be very large). for (let i = 0; i < cur.childCount; i++) { const child = cur.child(i); if (child?.type === 'storage_class_specifier' && child.text === 'static') return false; } } // C++ anonymous namespace: namespace_definition with no name child = internal linkage if (cur.type === 'namespace_definition') { const hasName = cur.childForFieldName?.('name'); if (!hasName) return false; } cur = cur.parent; } return true; // Top-level C/C++ functions default to external linkage }; /** PHP: check for visibility modifier or top-level scope. */ const phpExportChecker: ExportChecker = (node, _name) => { let current: SyntaxNode | null = node; while (current) { if (current.type === 'class_declaration' || current.type === 'interface_declaration' || current.type === 'trait_declaration' || current.type === 'enum_declaration') { return true; } if (current.type === 'visibility_modifier') { return current.text === 'public'; } current = current.parent; } // Top-level functions are globally accessible return true; }; /** Swift: check for 'public' or 'open' access modifiers. */ const swiftExportChecker: ExportChecker = (node, _name) => { let current: SyntaxNode | null = node; while (current) { if (current.type === 'modifiers' || current.type === 'visibility_modifier') { const text = current.text || ''; if (text.includes('public') || text.includes('open')) return true; } current = current.parent; } return false; }; // ============================================================================ // Exhaustive dispatch table — satisfies enforces all SupportedLanguages are covered // ============================================================================ const exportCheckers = { [SupportedLanguages.JavaScript]: tsExportChecker, [SupportedLanguages.TypeScript]: tsExportChecker, [SupportedLanguages.Python]: pythonExportChecker, [SupportedLanguages.Java]: javaExportChecker, [SupportedLanguages.CSharp]: csharpExportChecker, [SupportedLanguages.Go]: goExportChecker, [SupportedLanguages.Rust]: rustExportChecker, [SupportedLanguages.Kotlin]: kotlinExportChecker, [SupportedLanguages.C]: cCppExportChecker, [SupportedLanguages.CPlusPlus]: cCppExportChecker, [SupportedLanguages.PHP]: phpExportChecker, [SupportedLanguages.Swift]: swiftExportChecker, [SupportedLanguages.Ruby]: (_node, _name) => true, } satisfies Record<SupportedLanguages, ExportChecker>; // ============================================================================ // Public API // ============================================================================ /** * Check if a tree-sitter node is exported/public in its language. * @param node - The tree-sitter AST node * @param name - The symbol name * @param language - The programming language * @returns true if the symbol is exported/public */ export const isNodeExported = (node: SyntaxNode, name: string, language: SupportedLanguages): boolean => { const checker = exportCheckers[language]; if (!checker) return false; return checker(node, name); }; ================================================ FILE: gitnexus/src/core/ingestion/filesystem-walker.ts ================================================ import fs from 'fs/promises'; import path from 'path'; import { glob } from 'glob'; import { createIgnoreFilter } from '../../config/ignore-service.js'; export interface FileEntry { path: string; content: string; } /** Lightweight entry — path + size from stat, no content in memory */ export interface ScannedFile { path: string; size: number; } /** Path-only reference (for type signatures) */ export interface FilePath { path: string; } const READ_CONCURRENCY = 32; /** Skip files larger than 512KB — they're usually generated/vendored and crash tree-sitter */ const MAX_FILE_SIZE = 512 * 1024; /** * Phase 1: Scan repository — stat files to get paths + sizes, no content loaded. * Memory: ~10MB for 100K files vs ~1GB+ with content. */ export const walkRepositoryPaths = async ( repoPath: string, onProgress?: (current: number, total: number, filePath: string) => void ): Promise<ScannedFile[]> => { const ignoreFilter = await createIgnoreFilter(repoPath); const filtered = await glob('**/*', { cwd: repoPath, nodir: true, dot: false, ignore: ignoreFilter, }); const entries: ScannedFile[] = []; let processed = 0; let skippedLarge = 0; for (let start = 0; start < filtered.length; start += READ_CONCURRENCY) { const batch = filtered.slice(start, start + READ_CONCURRENCY); const results = await Promise.allSettled( batch.map(async relativePath => { const fullPath = path.join(repoPath, relativePath); const stat = await fs.stat(fullPath); if (stat.size > MAX_FILE_SIZE) { skippedLarge++; return null; } return { path: relativePath.replace(/\\/g, '/'), size: stat.size }; }) ); for (const result of results) { processed++; if (result.status === 'fulfilled' && result.value !== null) { entries.push(result.value); onProgress?.(processed, filtered.length, result.value.path); } else { onProgress?.(processed, filtered.length, batch[results.indexOf(result)]); } } } if (skippedLarge > 0) { console.warn(` Skipped ${skippedLarge} large files (>${MAX_FILE_SIZE / 1024}KB, likely generated/vendored)`); } return entries; }; /** * Phase 2: Read file contents for a specific set of relative paths. * Returns a Map for O(1) lookup. Silently skips files that fail to read. */ export const readFileContents = async ( repoPath: string, relativePaths: string[], ): Promise<Map<string, string>> => { const contents = new Map<string, string>(); for (let start = 0; start < relativePaths.length; start += READ_CONCURRENCY) { const batch = relativePaths.slice(start, start + READ_CONCURRENCY); const results = await Promise.allSettled( batch.map(async relativePath => { const fullPath = path.join(repoPath, relativePath); const content = await fs.readFile(fullPath, 'utf-8'); return { path: relativePath, content }; }) ); for (const result of results) { if (result.status === 'fulfilled') { contents.set(result.value.path, result.value.content); } } } return contents; }; /** * Legacy API — scans and reads everything into memory. * Used by sequential fallback path only. */ export const walkRepository = async ( repoPath: string, onProgress?: (current: number, total: number, filePath: string) => void ): Promise<FileEntry[]> => { const scanned = await walkRepositoryPaths(repoPath, onProgress); const contents = await readFileContents(repoPath, scanned.map(f => f.path)); return scanned .filter(f => contents.has(f.path)) .map(f => ({ path: f.path, content: contents.get(f.path)! })); }; ================================================ FILE: gitnexus/src/core/ingestion/framework-detection.ts ================================================ /** * Framework Detection * * Detects frameworks from: * 1) file path patterns * 2) AST definition text (decorators/annotations/attributes) * and provides entry point multipliers for process scoring. * * DESIGN: Returns null for unknown frameworks, which causes a 1.0 multiplier * (no bonus, no penalty) - same behavior as before this feature. */ // ============================================================================ // TYPES // ============================================================================ export interface FrameworkHint { framework: string; entryPointMultiplier: number; reason: string; } // ============================================================================ // PATH-BASED FRAMEWORK DETECTION // ============================================================================ /** * Detect framework from file path patterns * * This provides entry point multipliers based on well-known framework conventions. * Returns null if no framework pattern is detected (falls back to 1.0 multiplier). */ export function detectFrameworkFromPath(filePath: string): FrameworkHint | null { // Normalize path separators and ensure leading slash for consistent matching let p = filePath.toLowerCase().replace(/\\/g, '/'); if (!p.startsWith('/')) { p = '/' + p; // Add leading slash so patterns like '/app/' match 'app/...' } // ========== JAVASCRIPT / TYPESCRIPT FRAMEWORKS ========== // Next.js - Pages Router (high confidence) if (p.includes('/pages/') && !p.includes('/_') && !p.includes('/api/')) { if (p.endsWith('.tsx') || p.endsWith('.ts') || p.endsWith('.jsx') || p.endsWith('.js')) { return { framework: 'nextjs-pages', entryPointMultiplier: 3.0, reason: 'nextjs-page' }; } } // Next.js - App Router (page.tsx files) if (p.includes('/app/') && ( p.endsWith('page.tsx') || p.endsWith('page.ts') || p.endsWith('page.jsx') || p.endsWith('page.js') )) { return { framework: 'nextjs-app', entryPointMultiplier: 3.0, reason: 'nextjs-app-page' }; } // Next.js - API Routes if (p.includes('/pages/api/') || (p.includes('/app/') && p.includes('/api/') && p.endsWith('route.ts'))) { return { framework: 'nextjs-api', entryPointMultiplier: 3.0, reason: 'nextjs-api-route' }; } // Next.js - Layout files (moderate - they're entry-ish but not the main entry) if (p.includes('/app/') && (p.endsWith('layout.tsx') || p.endsWith('layout.ts'))) { return { framework: 'nextjs-app', entryPointMultiplier: 2.0, reason: 'nextjs-layout' }; } // Express / Node.js routes if (p.includes('/routes/') && (p.endsWith('.ts') || p.endsWith('.js'))) { return { framework: 'express', entryPointMultiplier: 2.5, reason: 'routes-folder' }; } // Generic controllers (MVC pattern) if (p.includes('/controllers/') && (p.endsWith('.ts') || p.endsWith('.js'))) { return { framework: 'mvc', entryPointMultiplier: 2.5, reason: 'controllers-folder' }; } // Generic handlers if (p.includes('/handlers/') && (p.endsWith('.ts') || p.endsWith('.js'))) { return { framework: 'handlers', entryPointMultiplier: 2.5, reason: 'handlers-folder' }; } // React components (lower priority - not all are entry points) if ((p.includes('/components/') || p.includes('/views/')) && (p.endsWith('.tsx') || p.endsWith('.jsx'))) { // Only boost if PascalCase filename (likely a component, not util) const fileName = p.split('/').pop() || ''; if (/^[A-Z]/.test(fileName)) { return { framework: 'react', entryPointMultiplier: 1.5, reason: 'react-component' }; } } // ========== PYTHON FRAMEWORKS ========== // Django views (high confidence) if (p.endsWith('views.py')) { return { framework: 'django', entryPointMultiplier: 3.0, reason: 'django-views' }; } // Django URL configs if (p.endsWith('urls.py')) { return { framework: 'django', entryPointMultiplier: 2.0, reason: 'django-urls' }; } // FastAPI / Flask routers if ((p.includes('/routers/') || p.includes('/endpoints/') || p.includes('/routes/')) && p.endsWith('.py')) { return { framework: 'fastapi', entryPointMultiplier: 2.5, reason: 'api-routers' }; } // Python API folder if (p.includes('/api/') && p.endsWith('.py') && !p.endsWith('__init__.py')) { return { framework: 'python-api', entryPointMultiplier: 2.0, reason: 'api-folder' }; } // ========== JAVA FRAMEWORKS ========== // Spring Boot controllers if ((p.includes('/controller/') || p.includes('/controllers/')) && p.endsWith('.java')) { return { framework: 'spring', entryPointMultiplier: 3.0, reason: 'spring-controller' }; } // Spring Boot - files ending in Controller.java if (p.endsWith('controller.java')) { return { framework: 'spring', entryPointMultiplier: 3.0, reason: 'spring-controller-file' }; } // Java service layer (often entry points for business logic) if ((p.includes('/service/') || p.includes('/services/')) && p.endsWith('.java')) { return { framework: 'java-service', entryPointMultiplier: 1.8, reason: 'java-service' }; } // ========== KOTLIN FRAMEWORKS ========== // Spring Boot Kotlin controllers if ((p.includes('/controller/') || p.includes('/controllers/')) && p.endsWith('.kt')) { return { framework: 'spring-kotlin', entryPointMultiplier: 3.0, reason: 'spring-kotlin-controller' }; } // Spring Boot - files ending in Controller.kt if (p.endsWith('controller.kt')) { return { framework: 'spring-kotlin', entryPointMultiplier: 3.0, reason: 'spring-kotlin-controller-file' }; } // Ktor routes if (p.includes('/routes/') && p.endsWith('.kt')) { return { framework: 'ktor', entryPointMultiplier: 2.5, reason: 'ktor-routes' }; } // Ktor plugins folder or Routing.kt files if (p.includes('/plugins/') && p.endsWith('.kt')) { return { framework: 'ktor', entryPointMultiplier: 2.0, reason: 'ktor-plugin' }; } if (p.endsWith('routing.kt') || p.endsWith('routes.kt')) { return { framework: 'ktor', entryPointMultiplier: 2.5, reason: 'ktor-routing-file' }; } // Android Activities, Fragments if ((p.includes('/activity/') || p.includes('/ui/')) && p.endsWith('.kt')) { return { framework: 'android-kotlin', entryPointMultiplier: 2.5, reason: 'android-ui' }; } if (p.endsWith('activity.kt') || p.endsWith('fragment.kt')) { return { framework: 'android-kotlin', entryPointMultiplier: 2.5, reason: 'android-component' }; } // Kotlin main entry point if (p.endsWith('/main.kt')) { return { framework: 'kotlin', entryPointMultiplier: 3.0, reason: 'kotlin-main' }; } // Kotlin Application entry point (common naming) if (p.endsWith('/application.kt')) { return { framework: 'kotlin', entryPointMultiplier: 2.5, reason: 'kotlin-application' }; } // ========== C# / .NET FRAMEWORKS ========== // ASP.NET Controllers if (p.includes('/controllers/') && p.endsWith('.cs')) { return { framework: 'aspnet', entryPointMultiplier: 3.0, reason: 'aspnet-controller' }; } // ASP.NET - files ending in Controller.cs if (p.endsWith('controller.cs')) { return { framework: 'aspnet', entryPointMultiplier: 3.0, reason: 'aspnet-controller-file' }; } // ASP.NET Services if ((p.includes('/services/') || p.includes('/service/')) && p.endsWith('.cs')) { return { framework: 'aspnet', entryPointMultiplier: 1.8, reason: 'aspnet-service' }; } // ASP.NET Middleware if (p.includes('/middleware/') && p.endsWith('.cs')) { return { framework: 'aspnet', entryPointMultiplier: 2.5, reason: 'aspnet-middleware' }; } // SignalR Hubs if (p.includes('/hubs/') && p.endsWith('.cs')) { return { framework: 'signalr', entryPointMultiplier: 2.5, reason: 'signalr-hub' }; } if (p.endsWith('hub.cs')) { return { framework: 'signalr', entryPointMultiplier: 2.5, reason: 'signalr-hub-file' }; } // Minimal API / Program.cs / Startup.cs if (p.endsWith('/program.cs') || p.endsWith('/startup.cs')) { return { framework: 'aspnet', entryPointMultiplier: 3.0, reason: 'aspnet-entry' }; } // Background services / Hosted services if ((p.includes('/backgroundservices/') || p.includes('/hostedservices/')) && p.endsWith('.cs')) { return { framework: 'aspnet', entryPointMultiplier: 2.0, reason: 'aspnet-background-service' }; } // Blazor pages if (p.includes('/pages/') && p.endsWith('.razor')) { return { framework: 'blazor', entryPointMultiplier: 2.5, reason: 'blazor-page' }; } // ========== GO FRAMEWORKS ========== // Go handlers if ((p.includes('/handlers/') || p.includes('/handler/')) && p.endsWith('.go')) { return { framework: 'go-http', entryPointMultiplier: 2.5, reason: 'go-handlers' }; } // Go routes if (p.includes('/routes/') && p.endsWith('.go')) { return { framework: 'go-http', entryPointMultiplier: 2.5, reason: 'go-routes' }; } // Go controllers if (p.includes('/controllers/') && p.endsWith('.go')) { return { framework: 'go-mvc', entryPointMultiplier: 2.5, reason: 'go-controller' }; } // Go main.go files (THE entry point) if (p.endsWith('/main.go') || p.endsWith('/cmd/') && p.endsWith('.go')) { return { framework: 'go', entryPointMultiplier: 3.0, reason: 'go-main' }; } // ========== RUST FRAMEWORKS ========== // Rust handlers/routes if ((p.includes('/handlers/') || p.includes('/routes/')) && p.endsWith('.rs')) { return { framework: 'rust-web', entryPointMultiplier: 2.5, reason: 'rust-handlers' }; } // Rust main.rs (THE entry point) if (p.endsWith('/main.rs')) { return { framework: 'rust', entryPointMultiplier: 3.0, reason: 'rust-main' }; } // Rust bin folder (executables) if (p.includes('/bin/') && p.endsWith('.rs')) { return { framework: 'rust', entryPointMultiplier: 2.5, reason: 'rust-bin' }; } // ========== C / C++ ========== // C/C++ main files if (p.endsWith('/main.c') || p.endsWith('/main.cpp') || p.endsWith('/main.cc')) { return { framework: 'c-cpp', entryPointMultiplier: 3.0, reason: 'c-main' }; } // C/C++ src folder entry points (if named specifically) if ((p.includes('/src/') && (p.endsWith('/app.c') || p.endsWith('/app.cpp')))) { return { framework: 'c-cpp', entryPointMultiplier: 2.5, reason: 'c-app' }; } // ========== PHP / LARAVEL FRAMEWORKS ========== // Laravel routes (highest - these ARE the entry point definitions) if (p.includes('/routes/') && p.endsWith('.php')) { return { framework: 'laravel', entryPointMultiplier: 3.0, reason: 'laravel-routes' }; } // Laravel controllers (very high - receive HTTP requests) if ((p.includes('/http/controllers/') || p.includes('/controllers/')) && p.endsWith('.php')) { return { framework: 'laravel', entryPointMultiplier: 3.0, reason: 'laravel-controller' }; } // Laravel controller by file name convention if (p.endsWith('controller.php')) { return { framework: 'laravel', entryPointMultiplier: 3.0, reason: 'laravel-controller-file' }; } // Laravel console commands if ((p.includes('/console/commands/') || p.includes('/commands/')) && p.endsWith('.php')) { return { framework: 'laravel', entryPointMultiplier: 2.5, reason: 'laravel-command' }; } // Laravel jobs (queue entry points) if (p.includes('/jobs/') && p.endsWith('.php')) { return { framework: 'laravel', entryPointMultiplier: 2.5, reason: 'laravel-job' }; } // Laravel listeners (event-driven entry points) if (p.includes('/listeners/') && p.endsWith('.php')) { return { framework: 'laravel', entryPointMultiplier: 2.5, reason: 'laravel-listener' }; } // Laravel middleware if (p.includes('/http/middleware/') && p.endsWith('.php')) { return { framework: 'laravel', entryPointMultiplier: 2.5, reason: 'laravel-middleware' }; } // Laravel service providers if (p.includes('/providers/') && p.endsWith('.php')) { return { framework: 'laravel', entryPointMultiplier: 1.8, reason: 'laravel-provider' }; } // Laravel policies if (p.includes('/policies/') && p.endsWith('.php')) { return { framework: 'laravel', entryPointMultiplier: 2.0, reason: 'laravel-policy' }; } // Laravel models (important but not entry points per se) if (p.includes('/models/') && p.endsWith('.php')) { return { framework: 'laravel', entryPointMultiplier: 1.5, reason: 'laravel-model' }; } // Laravel services (Service Repository pattern) if (p.includes('/services/') && p.endsWith('.php')) { return { framework: 'laravel', entryPointMultiplier: 1.8, reason: 'laravel-service' }; } // Laravel repositories (Service Repository pattern) if (p.includes('/repositories/') && p.endsWith('.php')) { return { framework: 'laravel', entryPointMultiplier: 1.5, reason: 'laravel-repository' }; } // ========== RUBY ========== // Ruby: bin/ or exe/ (CLI entry points) if ((p.includes('/bin/') || p.includes('/exe/')) && p.endsWith('.rb')) { return { framework: 'ruby', entryPointMultiplier: 2.5, reason: 'ruby-executable' }; } // Ruby: Rakefile or *.rake (task definitions) if (p.endsWith('/rakefile') || p.endsWith('.rake')) { return { framework: 'ruby', entryPointMultiplier: 1.5, reason: 'ruby-rake' }; } // ========== SWIFT / iOS ========== // iOS App entry points (highest priority) if (p.endsWith('/appdelegate.swift') || p.endsWith('/scenedelegate.swift') || p.endsWith('/app.swift')) { return { framework: 'ios', entryPointMultiplier: 3.0, reason: 'ios-app-entry' }; } // SwiftUI App entry (@main) if (p.endsWith('app.swift') && p.includes('/sources/')) { return { framework: 'swiftui', entryPointMultiplier: 3.0, reason: 'swiftui-app' }; } // UIKit ViewControllers (high priority - screen entry points) if ((p.includes('/viewcontrollers/') || p.includes('/controllers/') || p.includes('/screens/')) && p.endsWith('.swift')) { return { framework: 'uikit', entryPointMultiplier: 2.5, reason: 'uikit-viewcontroller' }; } // ViewController by filename convention if (p.endsWith('viewcontroller.swift') || p.endsWith('vc.swift')) { return { framework: 'uikit', entryPointMultiplier: 2.5, reason: 'uikit-viewcontroller-file' }; } // Coordinator pattern (navigation entry points) if (p.includes('/coordinators/') && p.endsWith('.swift')) { return { framework: 'ios-coordinator', entryPointMultiplier: 2.5, reason: 'ios-coordinator' }; } // Coordinator by filename if (p.endsWith('coordinator.swift')) { return { framework: 'ios-coordinator', entryPointMultiplier: 2.5, reason: 'ios-coordinator-file' }; } // SwiftUI Views (moderate - reusable components) if ((p.includes('/views/') || p.includes('/scenes/')) && p.endsWith('.swift')) { return { framework: 'swiftui', entryPointMultiplier: 1.8, reason: 'swiftui-view' }; } // Service layer if (p.includes('/services/') && p.endsWith('.swift')) { return { framework: 'ios-service', entryPointMultiplier: 1.8, reason: 'ios-service' }; } // Router / navigation if (p.includes('/router/') && p.endsWith('.swift')) { return { framework: 'ios-router', entryPointMultiplier: 2.0, reason: 'ios-router' }; } // ========== GENERIC PATTERNS ========== // Any language: index files in API folders if (p.includes('/api/') && ( p.endsWith('/index.ts') || p.endsWith('/index.js') || p.endsWith('/__init__.py') )) { return { framework: 'api', entryPointMultiplier: 1.8, reason: 'api-index' }; } // No framework detected - return null for graceful fallback (1.0 multiplier) return null; } // ============================================================================ // AST-BASED FRAMEWORK DETECTION // ============================================================================ /** * Patterns that indicate framework entry points within code definitions. * These are matched against AST node text (class/method/function declaration text). */ export const FRAMEWORK_AST_PATTERNS = { // JavaScript/TypeScript decorators 'nestjs': ['@Controller', '@Get', '@Post', '@Put', '@Delete', '@Patch'], 'express': ['app.get', 'app.post', 'app.put', 'app.delete', 'router.get', 'router.post'], // Python decorators 'fastapi': ['@app.get', '@app.post', '@app.put', '@app.delete', '@router.get'], 'flask': ['@app.route', '@blueprint.route'], // Java annotations 'spring': ['@RestController', '@Controller', '@GetMapping', '@PostMapping', '@RequestMapping'], 'jaxrs': ['@Path', '@GET', '@POST', '@PUT', '@DELETE'], // C# attributes 'aspnet': ['[ApiController]', '[HttpGet]', '[HttpPost]', '[HttpPut]', '[HttpDelete]', '[Route]', '[Authorize]', '[AllowAnonymous]'], 'signalr': ['[HubMethodName]', ': Hub', ': Hub<'], 'blazor': ['@page', '[Parameter]', '@inject'], 'efcore': ['DbContext', 'DbSet<', 'OnModelCreating'], // Go patterns (function signatures) 'go-http': ['http.Handler', 'http.HandlerFunc', 'ServeHTTP'], // PHP/Laravel 'laravel': ['Route::get', 'Route::post', 'Route::put', 'Route::delete', 'Route::resource', 'Route::apiResource', '#[Route('], // Rust macros 'actix': ['#[get', '#[post', '#[put', '#[delete'], 'axum': ['Router::new'], 'rocket': ['#[get', '#[post'], // Swift/iOS 'uikit': ['viewDidLoad', 'viewWillAppear', 'viewDidAppear', 'UIViewController'], 'swiftui': ['@main', 'WindowGroup', 'ContentView', '@StateObject', '@ObservedObject'], 'combine': ['sink', 'assign', 'Publisher', 'Subscriber'], }; import { SupportedLanguages } from '../../config/supported-languages.js'; interface AstFrameworkPatternConfig { framework: string; entryPointMultiplier: number; reason: string; patterns: string[]; } const AST_FRAMEWORK_PATTERNS_BY_LANGUAGE: Record<string, AstFrameworkPatternConfig[]> = { [SupportedLanguages.JavaScript]: [ { framework: 'nestjs', entryPointMultiplier: 3.2, reason: 'nestjs-decorator', patterns: FRAMEWORK_AST_PATTERNS.nestjs }, ], [SupportedLanguages.TypeScript]: [ { framework: 'nestjs', entryPointMultiplier: 3.2, reason: 'nestjs-decorator', patterns: FRAMEWORK_AST_PATTERNS.nestjs }, ], [SupportedLanguages.Python]: [ { framework: 'fastapi', entryPointMultiplier: 3.0, reason: 'fastapi-decorator', patterns: FRAMEWORK_AST_PATTERNS.fastapi }, { framework: 'flask', entryPointMultiplier: 2.8, reason: 'flask-decorator', patterns: FRAMEWORK_AST_PATTERNS.flask }, ], [SupportedLanguages.Java]: [ { framework: 'spring', entryPointMultiplier: 3.2, reason: 'spring-annotation', patterns: FRAMEWORK_AST_PATTERNS.spring }, { framework: 'jaxrs', entryPointMultiplier: 3.0, reason: 'jaxrs-annotation', patterns: FRAMEWORK_AST_PATTERNS.jaxrs }, ], [SupportedLanguages.Kotlin]: [ { framework: 'spring-kotlin', entryPointMultiplier: 3.2, reason: 'spring-kotlin-annotation', patterns: FRAMEWORK_AST_PATTERNS.spring }, { framework: 'jaxrs', entryPointMultiplier: 3.0, reason: 'jaxrs-annotation', patterns: FRAMEWORK_AST_PATTERNS.jaxrs }, { framework: 'ktor', entryPointMultiplier: 2.8, reason: 'ktor-routing', patterns: ['routing', 'embeddedServer', 'Application.module'] }, { framework: 'android-kotlin', entryPointMultiplier: 2.5, reason: 'android-annotation', patterns: ['@AndroidEntryPoint', 'AppCompatActivity', 'Fragment('] }, ], [SupportedLanguages.CSharp]: [ { framework: 'aspnet', entryPointMultiplier: 3.2, reason: 'aspnet-attribute', patterns: FRAMEWORK_AST_PATTERNS.aspnet }, { framework: 'signalr', entryPointMultiplier: 2.8, reason: 'signalr-attribute', patterns: FRAMEWORK_AST_PATTERNS.signalr }, { framework: 'blazor', entryPointMultiplier: 2.5, reason: 'blazor-attribute', patterns: FRAMEWORK_AST_PATTERNS.blazor }, { framework: 'efcore', entryPointMultiplier: 2.0, reason: 'efcore-pattern', patterns: FRAMEWORK_AST_PATTERNS.efcore }, ], [SupportedLanguages.PHP]: [ { framework: 'laravel', entryPointMultiplier: 3.0, reason: 'php-route-attribute', patterns: FRAMEWORK_AST_PATTERNS.laravel }, ], }; /** Pre-lowercased patterns for O(1) pattern matching at runtime */ const AST_PATTERNS_LOWERED: Record<string, Array<{ framework: string; entryPointMultiplier: number; reason: string; patterns: string[] }>> = Object.fromEntries( Object.entries(AST_FRAMEWORK_PATTERNS_BY_LANGUAGE).map(([lang, cfgs]) => [ lang, cfgs.map(cfg => ({ ...cfg, patterns: cfg.patterns.map(p => p.toLowerCase()) })), ]) ); /** * Detect framework entry points from AST definition text (decorators/annotations/attributes). * Returns null if no known pattern is found. * Note: callers should slice definitionText to ~300 chars since annotations appear at the start. */ export function detectFrameworkFromAST( language: SupportedLanguages, definitionText: string ): FrameworkHint | null { if (!language || !definitionText) return null; const configs = AST_PATTERNS_LOWERED[language.toLowerCase()]; if (!configs || configs.length === 0) return null; const normalized = definitionText.toLowerCase(); for (const cfg of configs) { for (const pattern of cfg.patterns) { if (normalized.includes(pattern)) { return { framework: cfg.framework, entryPointMultiplier: cfg.entryPointMultiplier, reason: cfg.reason, }; } } } return null; } ================================================ FILE: gitnexus/src/core/ingestion/heritage-processor.ts ================================================ /** * Heritage Processor * * Extracts class inheritance relationships: * - EXTENDS: Class extends another Class (TS, JS, Python, C#, C++) * - IMPLEMENTS: Class implements an Interface (TS, C#, Java, Kotlin, PHP) * * Languages like C# use a single `base_list` for both class and interface parents. * We resolve the correct edge type by checking the symbol table: if the parent is * registered as an Interface, we emit IMPLEMENTS; otherwise EXTENDS. For unresolved * external symbols, the fallback heuristic is language-gated: * - C# / Java: apply the `I[A-Z]` naming convention (e.g. IDisposable → IMPLEMENTS) * - Swift: default to IMPLEMENTS (protocol conformance is more common than class inheritance) * - All other languages: default to EXTENDS */ import { KnowledgeGraph } from '../graph/types.js'; import { ASTCache } from './ast-cache.js'; import Parser from 'tree-sitter'; import { isLanguageAvailable, loadParser, loadLanguage } from '../tree-sitter/parser-loader.js'; import { LANGUAGE_QUERIES } from './tree-sitter-queries.js'; import { generateId } from '../../lib/utils.js'; import { getLanguageFromFilename, isVerboseIngestionEnabled, yieldToEventLoop } from './utils.js'; import { SupportedLanguages } from '../../config/supported-languages.js'; import { getTreeSitterBufferSize } from './constants.js'; import type { ExtractedHeritage } from './workers/parse-worker.js'; import type { ResolutionContext } from './resolution-context.js'; /** C#/Java convention: interfaces start with I followed by an uppercase letter */ const INTERFACE_NAME_RE = /^I[A-Z]/; /** * Determine whether a heritage.extends capture is actually an IMPLEMENTS relationship. * Uses the symbol table first (authoritative — Tier 1); falls back to a language-gated * heuristic for external symbols not present in the graph: * - C# / Java: `I[A-Z]` naming convention * - Swift: default IMPLEMENTS (protocol conformance is the norm) * - All others: default EXTENDS */ const resolveExtendsType = ( parentName: string, currentFilePath: string, ctx: ResolutionContext, language: SupportedLanguages, ): { type: 'EXTENDS' | 'IMPLEMENTS'; idPrefix: string } => { const resolved = ctx.resolve(parentName, currentFilePath); if (resolved && resolved.candidates.length > 0) { const isInterface = resolved.candidates[0].type === 'Interface'; return isInterface ? { type: 'IMPLEMENTS', idPrefix: 'Interface' } : { type: 'EXTENDS', idPrefix: 'Class' }; } // Unresolved symbol — fall back to language-specific heuristic if (language === SupportedLanguages.CSharp || language === SupportedLanguages.Java) { if (INTERFACE_NAME_RE.test(parentName)) { return { type: 'IMPLEMENTS', idPrefix: 'Interface' }; } } else if (language === SupportedLanguages.Swift) { // Protocol conformance is far more common than class inheritance in Swift return { type: 'IMPLEMENTS', idPrefix: 'Interface' }; } return { type: 'EXTENDS', idPrefix: 'Class' }; }; /** * Resolve a symbol ID for heritage, with fallback to generated ID. * Uses ctx.resolve() → pick first candidate's nodeId → generate synthetic ID. */ const resolveHeritageId = ( name: string, filePath: string, ctx: ResolutionContext, fallbackLabel: string, fallbackKey?: string, ): string => { const resolved = ctx.resolve(name, filePath); if (resolved && resolved.candidates.length > 0) { // For global with multiple candidates, refuse (a wrong edge is worse than no edge) if (resolved.tier === 'global' && resolved.candidates.length > 1) { return generateId(fallbackLabel, fallbackKey ?? name); } return resolved.candidates[0].nodeId; } return generateId(fallbackLabel, fallbackKey ?? name); }; export const processHeritage = async ( graph: KnowledgeGraph, files: { path: string; content: string }[], astCache: ASTCache, ctx: ResolutionContext, onProgress?: (current: number, total: number) => void, ) => { const parser = await loadParser(); const logSkipped = isVerboseIngestionEnabled(); const skippedByLang = logSkipped ? new Map<string, number>() : null; for (let i = 0; i < files.length; i++) { const file = files[i]; onProgress?.(i + 1, files.length); if (i % 20 === 0) await yieldToEventLoop(); // 1. Check language support const language = getLanguageFromFilename(file.path); if (!language) continue; if (!isLanguageAvailable(language)) { if (skippedByLang) { skippedByLang.set(language, (skippedByLang.get(language) ?? 0) + 1); } continue; } const queryStr = LANGUAGE_QUERIES[language]; if (!queryStr) continue; // 2. Load the language await loadLanguage(language, file.path); // 3. Get AST let tree = astCache.get(file.path); if (!tree) { // Use larger bufferSize for files > 32KB try { tree = parser.parse(file.content, undefined, { bufferSize: getTreeSitterBufferSize(file.content.length) }); } catch (parseError) { // Skip files that can't be parsed continue; } // Cache re-parsed tree for potential future use astCache.set(file.path, tree); } let query; let matches; try { const language = parser.getLanguage(); query = new Parser.Query(language, queryStr); matches = query.matches(tree.rootNode); } catch (queryError) { console.warn(`Heritage query error for ${file.path}:`, queryError); continue; } // 4. Process heritage matches matches.forEach(match => { const captureMap: Record<string, any> = {}; match.captures.forEach(c => { captureMap[c.name] = c.node; }); // EXTENDS or IMPLEMENTS: resolve via symbol table for languages where // the tree-sitter query can't distinguish classes from interfaces (C#, Java) if (captureMap['heritage.class'] && captureMap['heritage.extends']) { // Go struct embedding: skip named fields (only anonymous fields are embedded) const extendsNode = captureMap['heritage.extends']; const fieldDecl = extendsNode.parent; if (fieldDecl?.type === 'field_declaration' && fieldDecl.childForFieldName('name')) { return; // Named field, not struct embedding } const className = captureMap['heritage.class'].text; const parentClassName = captureMap['heritage.extends'].text; const { type: relType, idPrefix } = resolveExtendsType(parentClassName, file.path, ctx, language); const childId = resolveHeritageId(className, file.path, ctx, 'Class', `${file.path}:${className}`); const parentId = resolveHeritageId(parentClassName, file.path, ctx, idPrefix); if (childId && parentId && childId !== parentId) { graph.addRelationship({ id: generateId(relType, `${childId}->${parentId}`), sourceId: childId, targetId: parentId, type: relType, confidence: 1.0, reason: '', }); } } // IMPLEMENTS: Class implements Interface (TypeScript only) if (captureMap['heritage.class'] && captureMap['heritage.implements']) { const className = captureMap['heritage.class'].text; const interfaceName = captureMap['heritage.implements'].text; const classId = resolveHeritageId(className, file.path, ctx, 'Class', `${file.path}:${className}`); const interfaceId = resolveHeritageId(interfaceName, file.path, ctx, 'Interface'); if (classId && interfaceId) { graph.addRelationship({ id: generateId('IMPLEMENTS', `${classId}->${interfaceId}`), sourceId: classId, targetId: interfaceId, type: 'IMPLEMENTS', confidence: 1.0, reason: '', }); } } // IMPLEMENTS (Rust): impl Trait for Struct if (captureMap['heritage.trait'] && captureMap['heritage.class']) { const structName = captureMap['heritage.class'].text; const traitName = captureMap['heritage.trait'].text; const structId = resolveHeritageId(structName, file.path, ctx, 'Struct', `${file.path}:${structName}`); const traitId = resolveHeritageId(traitName, file.path, ctx, 'Trait'); if (structId && traitId) { graph.addRelationship({ id: generateId('IMPLEMENTS', `${structId}->${traitId}`), sourceId: structId, targetId: traitId, type: 'IMPLEMENTS', confidence: 1.0, reason: 'trait-impl', }); } } }); // Tree is now owned by the LRU cache — no manual delete needed } if (skippedByLang && skippedByLang.size > 0) { for (const [lang, count] of skippedByLang.entries()) { console.warn( `[ingestion] Skipped ${count} ${lang} file(s) in heritage processing — ${lang} parser not available.` ); } } }; /** * Fast path: resolve pre-extracted heritage from workers. * No AST parsing — workers already extracted className + parentName + kind. */ export const processHeritageFromExtracted = async ( graph: KnowledgeGraph, extractedHeritage: ExtractedHeritage[], ctx: ResolutionContext, onProgress?: (current: number, total: number) => void, ) => { const total = extractedHeritage.length; for (let i = 0; i < extractedHeritage.length; i++) { if (i % 500 === 0) { onProgress?.(i, total); await yieldToEventLoop(); } const h = extractedHeritage[i]; if (h.kind === 'extends') { const fileLanguage = getLanguageFromFilename(h.filePath); if (!fileLanguage) continue; const { type: relType, idPrefix } = resolveExtendsType(h.parentName, h.filePath, ctx, fileLanguage); const childId = resolveHeritageId(h.className, h.filePath, ctx, 'Class', `${h.filePath}:${h.className}`); const parentId = resolveHeritageId(h.parentName, h.filePath, ctx, idPrefix); if (childId && parentId && childId !== parentId) { graph.addRelationship({ id: generateId(relType, `${childId}->${parentId}`), sourceId: childId, targetId: parentId, type: relType, confidence: 1.0, reason: '', }); } } else if (h.kind === 'implements') { const classId = resolveHeritageId(h.className, h.filePath, ctx, 'Class', `${h.filePath}:${h.className}`); const interfaceId = resolveHeritageId(h.parentName, h.filePath, ctx, 'Interface'); if (classId && interfaceId) { graph.addRelationship({ id: generateId('IMPLEMENTS', `${classId}->${interfaceId}`), sourceId: classId, targetId: interfaceId, type: 'IMPLEMENTS', confidence: 1.0, reason: '', }); } } else if (h.kind === 'trait-impl' || h.kind === 'include' || h.kind === 'extend' || h.kind === 'prepend') { const structId = resolveHeritageId(h.className, h.filePath, ctx, 'Struct', `${h.filePath}:${h.className}`); const traitId = resolveHeritageId(h.parentName, h.filePath, ctx, 'Trait'); if (structId && traitId) { graph.addRelationship({ id: generateId('IMPLEMENTS', `${structId}->${traitId}:${h.kind}`), sourceId: structId, targetId: traitId, type: 'IMPLEMENTS', confidence: 1.0, reason: h.kind, }); } } } onProgress?.(total, total); }; ================================================ FILE: gitnexus/src/core/ingestion/import-processor.ts ================================================ import { KnowledgeGraph } from '../graph/types.js'; import { ASTCache } from './ast-cache.js'; import Parser from 'tree-sitter'; import { isLanguageAvailable, loadParser, loadLanguage } from '../tree-sitter/parser-loader.js'; import { LANGUAGE_QUERIES } from './tree-sitter-queries.js'; import { generateId } from '../../lib/utils.js'; import { getLanguageFromFilename, isVerboseIngestionEnabled, yieldToEventLoop } from './utils.js'; import { SupportedLanguages } from '../../config/supported-languages.js'; import { extractNamedBindings } from './named-binding-extraction.js'; import type { ExtractedImport } from './workers/parse-worker.js'; import { getTreeSitterBufferSize } from './constants.js'; import { loadTsconfigPaths, loadGoModulePath, loadComposerConfig, loadCSharpProjectConfig, loadSwiftPackageConfig, type SwiftPackageConfig, } from './language-config.js'; import { buildSuffixIndex, resolveImportPath, appendKotlinWildcard, KOTLIN_EXTENSIONS, resolveJvmWildcard, resolveJvmMemberImport, resolveGoPackageDir, resolveGoPackage, resolveCSharpImport, resolveCSharpNamespaceDir, resolvePhpImport, resolveRustImport, resolveRubyImport, resolvePythonImport, } from './resolvers/index.js'; import { callRouters } from './call-routing.js'; import type { ResolutionContext } from './resolution-context.js'; import type { SuffixIndex, TsconfigPaths, GoModuleConfig, CSharpProjectConfig, ComposerConfig } from './resolvers/index.js'; // Re-export resolver types for consumers export type { SuffixIndex, TsconfigPaths, GoModuleConfig, CSharpProjectConfig, ComposerConfig } from './resolvers/index.js'; const isDev = process.env.NODE_ENV === 'development'; // Type: Map<FilePath, Set<ResolvedFilePath>> // Stores all files that a given file imports from export type ImportMap = Map<string, Set<string>>; // Type: Map<FilePath, Set<PackageDirSuffix>> // Stores Go package directory suffixes imported by a file (e.g., "/internal/auth/"). // Avoids expanding every Go package import into N individual ImportMap edges. export type PackageMap = Map<string, Set<string>>; // Type: Map<ImportingFilePath, Map<LocalName, {sourcePath, exportedName}>> // Tracks which specific names a file imports from which sources (TS/Python only). // Used to tighten Tier 2a resolution: `import { User } from './models'` // means only `User` (not `Repo`) is visible from models.ts via this import. // Stores both the resolved source path and the original exported name so that // aliased imports (`import { User as U }`) can resolve U → User in the source file. export interface NamedImportBinding { sourcePath: string; exportedName: string } export type NamedImportMap = Map<string, Map<string, NamedImportBinding>>; /** * Check if a file path is directly inside a package directory identified by its suffix. * Used by the symbol resolver for Go and C# directory-level import matching. */ export function isFileInPackageDir(filePath: string, dirSuffix: string): boolean { // Prepend '/' so paths like "internal/auth/service.go" match suffix "/internal/auth/" const normalized = '/' + filePath.replace(/\\/g, '/'); if (!normalized.includes(dirSuffix)) return false; const afterDir = normalized.substring(normalized.indexOf(dirSuffix) + dirSuffix.length); return !afterDir.includes('/'); } /** Pre-built lookup structures for import resolution. Build once, reuse across chunks. */ export interface ImportResolutionContext { allFilePaths: Set<string>; allFileList: string[]; normalizedFileList: string[]; suffixIndex: SuffixIndex; resolveCache: Map<string, string | null>; } export function buildImportResolutionContext(allPaths: string[]): ImportResolutionContext { const allFileList = allPaths; const normalizedFileList = allFileList.map(p => p.replace(/\\/g, '/')); const allFilePaths = new Set(allFileList); const suffixIndex = buildSuffixIndex(normalizedFileList, allFileList); return { allFilePaths, allFileList, normalizedFileList, suffixIndex, resolveCache: new Map() }; } // Config loaders extracted to ./language-config.ts (Phase 2 refactor) // Resolver functions are in ./resolvers/ — imported above // ============================================================================ // SHARED LANGUAGE DISPATCH // ============================================================================ /** Bundled language-specific configs loaded once per ingestion run. */ interface LanguageConfigs { tsconfigPaths: TsconfigPaths | null; goModule: GoModuleConfig | null; composerConfig: ComposerConfig | null; swiftPackageConfig: SwiftPackageConfig | null; csharpConfigs: CSharpProjectConfig[]; } /** Context for import path resolution (file lists, indexes, cache). */ interface ResolveCtx { allFilePaths: Set<string>; allFileList: string[]; normalizedFileList: string[]; index: SuffixIndex; resolveCache: Map<string, string | null>; } /** * Result of resolving an import via language-specific dispatch. * - 'files': resolved to one or more files → add to ImportMap * - 'package': resolved to a directory → add graph edges + store dirSuffix in PackageMap * - null: no resolution (external dependency, etc.) */ type ImportResult = | { kind: 'files'; files: string[] } | { kind: 'package'; files: string[]; dirSuffix: string } | null; /** * Shared language dispatch for import resolution. * Used by both processImports and processImportsFromExtracted. */ function resolveLanguageImport( filePath: string, rawImportPath: string, language: SupportedLanguages, configs: LanguageConfigs, ctx: ResolveCtx, ): ImportResult { const { allFilePaths, allFileList, normalizedFileList, index, resolveCache } = ctx; const { tsconfigPaths, goModule, composerConfig, swiftPackageConfig, csharpConfigs } = configs; // JVM languages (Java + Kotlin): handle wildcards and member imports if (language === SupportedLanguages.Java || language === SupportedLanguages.Kotlin) { const exts = language === SupportedLanguages.Java ? ['.java'] : KOTLIN_EXTENSIONS; if (rawImportPath.endsWith('.*')) { const matchedFiles = resolveJvmWildcard(rawImportPath, normalizedFileList, allFileList, exts, index); if (matchedFiles.length === 0 && language === SupportedLanguages.Kotlin) { const javaMatches = resolveJvmWildcard(rawImportPath, normalizedFileList, allFileList, ['.java'], index); if (javaMatches.length > 0) return { kind: 'files', files: javaMatches }; } if (matchedFiles.length > 0) return { kind: 'files', files: matchedFiles }; // Fall through to standard resolution } else { let memberResolved = resolveJvmMemberImport(rawImportPath, normalizedFileList, allFileList, exts, index); if (!memberResolved && language === SupportedLanguages.Kotlin) { memberResolved = resolveJvmMemberImport(rawImportPath, normalizedFileList, allFileList, ['.java'], index); } if (memberResolved) return { kind: 'files', files: [memberResolved] }; // Fall through to standard resolution } } // Go: handle package-level imports if (language === SupportedLanguages.Go && goModule && rawImportPath.startsWith(goModule.modulePath)) { const pkgSuffix = resolveGoPackageDir(rawImportPath, goModule); if (pkgSuffix) { const pkgFiles = resolveGoPackage(rawImportPath, goModule, normalizedFileList, allFileList); if (pkgFiles.length > 0) { return { kind: 'package', files: pkgFiles, dirSuffix: pkgSuffix }; } } // Fall through if no files found (package might be external) } // C#: handle namespace-based imports (using directives) if (language === SupportedLanguages.CSharp && csharpConfigs.length > 0) { const resolvedFiles = resolveCSharpImport(rawImportPath, csharpConfigs, normalizedFileList, allFileList, index); if (resolvedFiles.length > 1) { const dirSuffix = resolveCSharpNamespaceDir(rawImportPath, csharpConfigs); if (dirSuffix) { return { kind: 'package', files: resolvedFiles, dirSuffix }; } } if (resolvedFiles.length > 0) return { kind: 'files', files: resolvedFiles }; return null; } // PHP: handle namespace-based imports (use statements) if (language === SupportedLanguages.PHP) { const resolved = resolvePhpImport(rawImportPath, composerConfig, allFilePaths, normalizedFileList, allFileList, index); return resolved ? { kind: 'files', files: [resolved] } : null; } // Swift: handle module imports if (language === SupportedLanguages.Swift && swiftPackageConfig) { const targetDir = swiftPackageConfig.targets.get(rawImportPath); if (targetDir) { const dirPrefix = targetDir + '/'; const files: string[] = []; for (let i = 0; i < normalizedFileList.length; i++) { if (normalizedFileList[i].startsWith(dirPrefix) && normalizedFileList[i].endsWith('.swift')) { files.push(allFileList[i]); } } if (files.length > 0) return { kind: 'files', files }; } return null; // External framework (Foundation, UIKit, etc.) } // Python: relative imports (PEP 328) + proximity-based bare imports // Falls through to standard suffix resolution when proximity finds no match. if (language === SupportedLanguages.Python) { const resolved = resolvePythonImport(filePath, rawImportPath, allFilePaths); if (resolved) return { kind: 'files', files: [resolved] }; if (rawImportPath.startsWith('.')) return null; // relative but unresolved — don't suffix-match } // Ruby: require / require_relative if (language === SupportedLanguages.Ruby) { const resolved = resolveRubyImport(rawImportPath, normalizedFileList, allFileList, index); return resolved ? { kind: 'files', files: [resolved] } : null; } // Rust: expand top-level grouped imports: use {crate::a, crate::b} if (language === SupportedLanguages.Rust && rawImportPath.startsWith('{') && rawImportPath.endsWith('}')) { const inner = rawImportPath.slice(1, -1); const parts = inner.split(',').map(p => p.trim()).filter(Boolean); const resolved: string[] = []; for (const part of parts) { const r = resolveRustImport(filePath, part, allFilePaths); if (r) resolved.push(r); } return resolved.length > 0 ? { kind: 'files', files: resolved } : null; } // Standard single-file resolution const resolvedPath = resolveImportPath( filePath, rawImportPath, allFilePaths, allFileList, normalizedFileList, resolveCache, language, tsconfigPaths, index, ); return resolvedPath ? { kind: 'files', files: [resolvedPath] } : null; } /** * Apply an ImportResult: emit graph edges and update ImportMap/PackageMap. * If namedBindings are provided and the import resolves to a single file, * also populate the NamedImportMap for precise Tier 2a resolution. */ function applyImportResult( result: ImportResult, filePath: string, importMap: ImportMap, packageMap: PackageMap | undefined, addImportEdge: (from: string, to: string) => void, addImportGraphEdge: (from: string, to: string) => void, namedBindings?: { local: string; exported: string }[], namedImportMap?: NamedImportMap, ): void { if (!result) return; if (result.kind === 'package' && packageMap) { // Store directory suffix in PackageMap (skip ImportMap expansion) for (const resolvedFile of result.files) { addImportGraphEdge(filePath, resolvedFile); } if (!packageMap.has(filePath)) packageMap.set(filePath, new Set()); packageMap.get(filePath)!.add(result.dirSuffix); } else { // 'files' kind, or 'package' without PackageMap — use ImportMap directly const files = result.files; for (const resolvedFile of files) { addImportEdge(filePath, resolvedFile); } // Record named bindings for precise Tier 2a resolution if (namedBindings && namedImportMap && files.length === 1) { const resolvedFile = files[0]; if (!namedImportMap.has(filePath)) namedImportMap.set(filePath, new Map()); const fileBindings = namedImportMap.get(filePath)!; for (const binding of namedBindings) { fileBindings.set(binding.local, { sourcePath: resolvedFile, exportedName: binding.exported }); } } } } // ============================================================================ // MAIN IMPORT PROCESSOR // ============================================================================ export const processImports = async ( graph: KnowledgeGraph, files: { path: string; content: string }[], astCache: ASTCache, ctx: ResolutionContext, onProgress?: (current: number, total: number) => void, repoRoot?: string, allPaths?: string[], ) => { const importMap = ctx.importMap; const packageMap = ctx.packageMap; const namedImportMap = ctx.namedImportMap; // Use allPaths (full repo) when available for cross-chunk resolution, else fall back to chunk files const allFileList = allPaths ?? files.map(f => f.path); const allFilePaths = new Set(allFileList); const parser = await loadParser(); const logSkipped = isVerboseIngestionEnabled(); const skippedByLang = logSkipped ? new Map<string, number>() : null; const resolveCache = new Map<string, string | null>(); // Pre-compute normalized file list once (forward slashes) const normalizedFileList = allFileList.map(p => p.replace(/\\/g, '/')); // Build suffix index for O(1) lookups const index = buildSuffixIndex(normalizedFileList, allFileList); // Track import statistics let totalImportsFound = 0; let totalImportsResolved = 0; // Load language-specific configs once before the file loop const effectiveRoot = repoRoot || ''; const configs: LanguageConfigs = { tsconfigPaths: await loadTsconfigPaths(effectiveRoot), goModule: await loadGoModulePath(effectiveRoot), composerConfig: await loadComposerConfig(effectiveRoot), swiftPackageConfig: await loadSwiftPackageConfig(effectiveRoot), csharpConfigs: await loadCSharpProjectConfig(effectiveRoot), }; const resolveCtx: ResolveCtx = { allFilePaths, allFileList, normalizedFileList, index, resolveCache }; // Helper: add an IMPORTS edge to the graph only (no ImportMap update) const addImportGraphEdge = (filePath: string, resolvedPath: string) => { const sourceId = generateId('File', filePath); const targetId = generateId('File', resolvedPath); const relId = generateId('IMPORTS', `${filePath}->${resolvedPath}`); totalImportsResolved++; graph.addRelationship({ id: relId, sourceId, targetId, type: 'IMPORTS', confidence: 1.0, reason: '', }); }; // Helper: add an IMPORTS edge + update import map const addImportEdge = (filePath: string, resolvedPath: string) => { addImportGraphEdge(filePath, resolvedPath); if (!importMap.has(filePath)) { importMap.set(filePath, new Set()); } importMap.get(filePath)!.add(resolvedPath); }; for (let i = 0; i < files.length; i++) { const file = files[i]; onProgress?.(i + 1, files.length); if (i % 20 === 0) await yieldToEventLoop(); // 1. Check language support first const language = getLanguageFromFilename(file.path); if (!language) continue; if (!isLanguageAvailable(language)) { if (skippedByLang) { skippedByLang.set(language, (skippedByLang.get(language) ?? 0) + 1); } continue; } const queryStr = LANGUAGE_QUERIES[language]; if (!queryStr) continue; // 2. ALWAYS load the language before querying (parser is stateful) await loadLanguage(language, file.path); // 3. Get AST (Try Cache First) let tree = astCache.get(file.path); let wasReparsed = false; if (!tree) { try { tree = parser.parse(file.content, undefined, { bufferSize: getTreeSitterBufferSize(file.content.length) }); } catch (parseError) { continue; } wasReparsed = true; // Cache re-parsed tree so call/heritage phases get hits astCache.set(file.path, tree); } let query; let matches; try { const lang = parser.getLanguage(); query = new Parser.Query(lang, queryStr); matches = query.matches(tree.rootNode); } catch (queryError: any) { if (isDev) { console.group(`🔴 Query Error: ${file.path}`); console.log('Language:', language); console.log('Query (first 200 chars):', queryStr.substring(0, 200) + '...'); console.log('Error:', queryError?.message || queryError); console.log('File content (first 300 chars):', file.content.substring(0, 300)); console.log('AST root type:', tree.rootNode?.type); console.log('AST has errors:', tree.rootNode?.hasError); console.groupEnd(); } if (wasReparsed) (tree as any).delete?.(); continue; } matches.forEach(match => { const captureMap: Record<string, any> = {}; match.captures.forEach(c => captureMap[c.name] = c.node); if (captureMap['import']) { const sourceNode = captureMap['import.source']; if (!sourceNode) { if (isDev) { console.log(`⚠️ Import captured but no source node in ${file.path}`); } return; } // Clean path (remove quotes and angle brackets for C/C++ includes) const rawImportPath = language === SupportedLanguages.Kotlin ? appendKotlinWildcard(sourceNode.text.replace(/['"<>]/g, ''), captureMap['import']) : sourceNode.text.replace(/['"<>]/g, ''); totalImportsFound++; const result = resolveLanguageImport(file.path, rawImportPath, language, configs, resolveCtx); const bindings = namedImportMap ? extractNamedBindings(captureMap['import'], language) : undefined; applyImportResult(result, file.path, importMap, packageMap, addImportEdge, addImportGraphEdge, bindings, namedImportMap); } // ---- Language-specific call-as-import routing (Ruby require, etc.) ---- if (captureMap['call']) { const callNameNode = captureMap['call.name']; if (callNameNode) { const callRouter = callRouters[language]; const routed = callRouter(callNameNode.text, captureMap['call']); if (routed && routed.kind === 'import') { totalImportsFound++; const result = resolveLanguageImport(file.path, routed.importPath, language, configs, resolveCtx); applyImportResult(result, file.path, importMap, packageMap, addImportEdge, addImportGraphEdge); } } } }); // Tree is now owned by the LRU cache — no manual delete needed } if (skippedByLang && skippedByLang.size > 0) { for (const [lang, count] of skippedByLang.entries()) { console.warn( `[ingestion] Skipped ${count} ${lang} file(s) in import processing — ${lang} parser not available.` ); } } if (isDev) { console.log(`📊 Import processing complete: ${totalImportsResolved}/${totalImportsFound} imports resolved to graph edges`); } }; // ============================================================================ // FAST PATH: Resolve pre-extracted imports (no parsing needed) // ============================================================================ export const processImportsFromExtracted = async ( graph: KnowledgeGraph, files: { path: string }[], extractedImports: ExtractedImport[], ctx: ResolutionContext, onProgress?: (current: number, total: number) => void, repoRoot?: string, prebuiltCtx?: ImportResolutionContext, ) => { const importMap = ctx.importMap; const packageMap = ctx.packageMap; const namedImportMap = ctx.namedImportMap; const importCtx = prebuiltCtx ?? buildImportResolutionContext(files.map(f => f.path)); const { allFilePaths, allFileList, normalizedFileList, suffixIndex: index, resolveCache } = importCtx; let totalImportsFound = 0; let totalImportsResolved = 0; const effectiveRoot = repoRoot || ''; const configs: LanguageConfigs = { tsconfigPaths: await loadTsconfigPaths(effectiveRoot), goModule: await loadGoModulePath(effectiveRoot), composerConfig: await loadComposerConfig(effectiveRoot), swiftPackageConfig: await loadSwiftPackageConfig(effectiveRoot), csharpConfigs: await loadCSharpProjectConfig(effectiveRoot), }; const resolveCtx: ResolveCtx = { allFilePaths, allFileList, normalizedFileList, index, resolveCache }; // Helper: add an IMPORTS edge to the graph only (no ImportMap update) const addImportGraphEdge = (filePath: string, resolvedPath: string) => { const sourceId = generateId('File', filePath); const targetId = generateId('File', resolvedPath); const relId = generateId('IMPORTS', `${filePath}->${resolvedPath}`); totalImportsResolved++; graph.addRelationship({ id: relId, sourceId, targetId, type: 'IMPORTS', confidence: 1.0, reason: '', }); }; const addImportEdge = (filePath: string, resolvedPath: string) => { addImportGraphEdge(filePath, resolvedPath); if (!importMap.has(filePath)) { importMap.set(filePath, new Set()); } importMap.get(filePath)!.add(resolvedPath); }; // Group by file for progress reporting (users see file count, not import count) const importsByFile = new Map<string, ExtractedImport[]>(); for (const imp of extractedImports) { let list = importsByFile.get(imp.filePath); if (!list) { list = []; importsByFile.set(imp.filePath, list); } list.push(imp); } const totalFiles = importsByFile.size; let filesProcessed = 0; for (const [filePath, fileImports] of importsByFile) { filesProcessed++; if (filesProcessed % 100 === 0) { onProgress?.(filesProcessed, totalFiles); await yieldToEventLoop(); } for (const imp of fileImports) { totalImportsFound++; const result = resolveLanguageImport(filePath, imp.rawImportPath, imp.language, configs, resolveCtx); applyImportResult(result, filePath, importMap, packageMap, addImportEdge, addImportGraphEdge, imp.namedBindings, namedImportMap); } } onProgress?.(totalFiles, totalFiles); if (isDev) { console.log(`📊 Import processing (fast path): ${totalImportsResolved}/${totalImportsFound} imports resolved to graph edges`); } }; ================================================ FILE: gitnexus/src/core/ingestion/language-config.ts ================================================ import fs from 'fs/promises'; import path from 'path'; const isDev = process.env.NODE_ENV === 'development'; // ============================================================================ // LANGUAGE-SPECIFIC CONFIG TYPES // ============================================================================ /** TypeScript path alias config parsed from tsconfig.json */ export interface TsconfigPaths { /** Map of alias prefix -> target prefix (e.g., "@/" -> "src/") */ aliases: Map<string, string>; /** Base URL for path resolution (relative to repo root) */ baseUrl: string; } /** Go module config parsed from go.mod */ export interface GoModuleConfig { /** Module path (e.g., "github.com/user/repo") */ modulePath: string; } /** PHP Composer PSR-4 autoload config */ export interface ComposerConfig { /** Map of namespace prefix -> directory (e.g., "App\\" -> "app/") */ psr4: Map<string, string>; } /** C# project config parsed from .csproj files */ export interface CSharpProjectConfig { /** Root namespace from <RootNamespace> or assembly name (default: project directory name) */ rootNamespace: string; /** Directory containing the .csproj file */ projectDir: string; } /** Swift Package Manager module config */ export interface SwiftPackageConfig { /** Map of target name -> source directory path (e.g., "SiuperModel" -> "Package/Sources/SiuperModel") */ targets: Map<string, string>; } // ============================================================================ // LANGUAGE-SPECIFIC CONFIG LOADERS // ============================================================================ /** * Parse tsconfig.json to extract path aliases. * Tries tsconfig.json, tsconfig.app.json, tsconfig.base.json in order. */ export async function loadTsconfigPaths(repoRoot: string): Promise<TsconfigPaths | null> { const candidates = ['tsconfig.json', 'tsconfig.app.json', 'tsconfig.base.json']; for (const filename of candidates) { try { const tsconfigPath = path.join(repoRoot, filename); const raw = await fs.readFile(tsconfigPath, 'utf-8'); // Strip JSON comments (// and /* */ style) for robustness const stripped = raw.replace(/\/\/.*$/gm, '').replace(/\/\*[\s\S]*?\*\//g, ''); const tsconfig = JSON.parse(stripped); const compilerOptions = tsconfig.compilerOptions; if (!compilerOptions?.paths) continue; const baseUrl = compilerOptions.baseUrl || '.'; const aliases = new Map<string, string>(); for (const [pattern, targets] of Object.entries(compilerOptions.paths)) { if (!Array.isArray(targets) || targets.length === 0) continue; const target = targets[0] as string; // Convert glob patterns: "@/*" -> "@/", "src/*" -> "src/" const aliasPrefix = pattern.endsWith('/*') ? pattern.slice(0, -1) : pattern; const targetPrefix = target.endsWith('/*') ? target.slice(0, -1) : target; aliases.set(aliasPrefix, targetPrefix); } if (aliases.size > 0) { if (isDev) { console.log(`📦 Loaded ${aliases.size} path aliases from ${filename}`); } return { aliases, baseUrl }; } } catch { // File doesn't exist or isn't valid JSON - try next } } return null; } /** * Parse go.mod to extract module path. */ export async function loadGoModulePath(repoRoot: string): Promise<GoModuleConfig | null> { try { const goModPath = path.join(repoRoot, 'go.mod'); const content = await fs.readFile(goModPath, 'utf-8'); const match = content.match(/^module\s+(\S+)/m); if (match) { if (isDev) { console.log(`📦 Loaded Go module path: ${match[1]}`); } return { modulePath: match[1] }; } } catch { // No go.mod } return null; } /** Parse composer.json to extract PSR-4 autoload mappings (including autoload-dev). */ export async function loadComposerConfig(repoRoot: string): Promise<ComposerConfig | null> { try { const composerPath = path.join(repoRoot, 'composer.json'); const raw = await fs.readFile(composerPath, 'utf-8'); const composer = JSON.parse(raw); const psr4Raw = composer.autoload?.['psr-4'] ?? {}; const psr4Dev = composer['autoload-dev']?.['psr-4'] ?? {}; const merged = { ...psr4Raw, ...psr4Dev }; const psr4 = new Map<string, string>(); for (const [ns, dir] of Object.entries(merged)) { const nsNorm = (ns as string).replace(/\\+$/, ''); const dirNorm = (dir as string).replace(/\\/g, '/').replace(/\/+$/, ''); psr4.set(nsNorm, dirNorm); } if (isDev) { console.log(`📦 Loaded ${psr4.size} PSR-4 mappings from composer.json`); } return { psr4 }; } catch { return null; } } /** * Parse .csproj files to extract RootNamespace. * Scans the repo root for .csproj files and returns configs for each. */ export async function loadCSharpProjectConfig(repoRoot: string): Promise<CSharpProjectConfig[]> { const configs: CSharpProjectConfig[] = []; // BFS scan for .csproj files up to 5 levels deep, cap at 100 dirs to avoid runaway scanning const scanQueue: { dir: string; depth: number }[] = [{ dir: repoRoot, depth: 0 }]; const maxDepth = 5; const maxDirs = 100; let dirsScanned = 0; while (scanQueue.length > 0 && dirsScanned < maxDirs) { const { dir, depth } = scanQueue.shift()!; dirsScanned++; try { const entries = await fs.readdir(dir, { withFileTypes: true }); for (const entry of entries) { if (entry.isDirectory() && depth < maxDepth) { // Skip common non-project directories if (entry.name === 'node_modules' || entry.name === '.git' || entry.name === 'bin' || entry.name === 'obj') continue; scanQueue.push({ dir: path.join(dir, entry.name), depth: depth + 1 }); } if (entry.isFile() && entry.name.endsWith('.csproj')) { try { const csprojPath = path.join(dir, entry.name); const content = await fs.readFile(csprojPath, 'utf-8'); const nsMatch = content.match(/<RootNamespace>\s*([^<]+)\s*<\/RootNamespace>/); const rootNamespace = nsMatch ? nsMatch[1].trim() : entry.name.replace(/\.csproj$/, ''); const projectDir = path.relative(repoRoot, dir).replace(/\\/g, '/'); configs.push({ rootNamespace, projectDir }); if (isDev) { console.log(`📦 Loaded C# project: ${entry.name} (namespace: ${rootNamespace}, dir: ${projectDir})`); } } catch { // Can't read .csproj } } } } catch { // Can't read directory } } return configs; } export async function loadSwiftPackageConfig(repoRoot: string): Promise<SwiftPackageConfig | null> { // Swift imports are module-name based (e.g., `import SiuperModel`) // SPM convention: Sources/<TargetName>/ or Package/Sources/<TargetName>/ // We scan for these directories to build a target map const targets = new Map<string, string>(); const sourceDirs = ['Sources', 'Package/Sources', 'src']; for (const sourceDir of sourceDirs) { try { const fullPath = path.join(repoRoot, sourceDir); const entries = await fs.readdir(fullPath, { withFileTypes: true }); for (const entry of entries) { if (entry.isDirectory()) { targets.set(entry.name, sourceDir + '/' + entry.name); } } } catch { // Directory doesn't exist } } if (targets.size > 0) { if (isDev) { console.log(`📦 Loaded ${targets.size} Swift package targets`); } return { targets }; } return null; } ================================================ FILE: gitnexus/src/core/ingestion/mro-processor.ts ================================================ /** * MRO (Method Resolution Order) Processor * * Walks the inheritance DAG (EXTENDS/IMPLEMENTS edges), collects methods from * each ancestor via HAS_METHOD edges, detects method-name collisions across * parents, and applies language-specific resolution rules to emit OVERRIDES edges. * * Language-specific rules: * - C++: leftmost base class in declaration order wins * - C#/Java: class method wins over interface default; multiple interface * methods with same name are ambiguous (null resolution) * - Python: C3 linearization determines MRO; first in linearized order wins * - Rust: no auto-resolution — requires qualified syntax, resolvedTo = null * - Default: single inheritance — first definition wins * * OVERRIDES edge direction: Class → Method (not Method → Method). * The source is the child class that inherits conflicting methods, * the target is the winning ancestor method node. * Cypher: MATCH (c:Class)-[r:CodeRelation {type: 'OVERRIDES'}]->(m:Method) */ import { KnowledgeGraph, GraphRelationship } from '../graph/types.js'; import { generateId } from '../../lib/utils.js'; import { SupportedLanguages } from '../../config/supported-languages.js'; // --------------------------------------------------------------------------- // Public types // --------------------------------------------------------------------------- export interface MROEntry { classId: string; className: string; language: SupportedLanguages; mro: string[]; // linearized parent names ambiguities: MethodAmbiguity[]; } export interface MethodAmbiguity { methodName: string; definedIn: Array<{ classId: string; className: string; methodId: string }>; resolvedTo: string | null; // winning methodId or null if truly ambiguous reason: string; } export interface MROResult { entries: MROEntry[]; overrideEdges: number; ambiguityCount: number; } // --------------------------------------------------------------------------- // Internal helpers // --------------------------------------------------------------------------- /** Collect EXTENDS, IMPLEMENTS, and HAS_METHOD adjacency from the graph. */ function buildAdjacency(graph: KnowledgeGraph) { // parentMap: childId → parentIds[] (in insertion / declaration order) const parentMap = new Map<string, string[]>(); // methodMap: classId → methodIds[] const methodMap = new Map<string, string[]>(); // Track which edge type each parent link came from const parentEdgeType = new Map<string, Map<string, 'EXTENDS' | 'IMPLEMENTS'>>(); graph.forEachRelationship((rel) => { if (rel.type === 'EXTENDS' || rel.type === 'IMPLEMENTS') { let parents = parentMap.get(rel.sourceId); if (!parents) { parents = []; parentMap.set(rel.sourceId, parents); } parents.push(rel.targetId); let edgeTypes = parentEdgeType.get(rel.sourceId); if (!edgeTypes) { edgeTypes = new Map(); parentEdgeType.set(rel.sourceId, edgeTypes); } edgeTypes.set(rel.targetId, rel.type); } if (rel.type === 'HAS_METHOD') { let methods = methodMap.get(rel.sourceId); if (!methods) { methods = []; methodMap.set(rel.sourceId, methods); } methods.push(rel.targetId); } }); return { parentMap, methodMap, parentEdgeType }; } /** * Gather all ancestor IDs in BFS / topological order. * Returns the linearized list of ancestor IDs (excluding the class itself). */ function gatherAncestors( classId: string, parentMap: Map<string, string[]>, ): string[] { const visited = new Set<string>(); const order: string[] = []; const queue: string[] = [...(parentMap.get(classId) ?? [])]; while (queue.length > 0) { const id = queue.shift()!; if (visited.has(id)) continue; visited.add(id); order.push(id); const grandparents = parentMap.get(id); if (grandparents) { for (const gp of grandparents) { if (!visited.has(gp)) queue.push(gp); } } } return order; } // --------------------------------------------------------------------------- // C3 linearization (Python MRO) // --------------------------------------------------------------------------- /** * Compute C3 linearization for a class given a parentMap. * Returns an array of ancestor IDs in C3 order (excluding the class itself), * or null if linearization fails (inconsistent or cyclic hierarchy). */ function c3Linearize( classId: string, parentMap: Map<string, string[]>, cache: Map<string, string[] | null>, inProgress?: Set<string>, ): string[] | null { if (cache.has(classId)) return cache.get(classId)!; // Cycle detection: if we're already computing this class, the hierarchy is cyclic const visiting = inProgress ?? new Set<string>(); if (visiting.has(classId)) { cache.set(classId, null); return null; } visiting.add(classId); const directParents = parentMap.get(classId); if (!directParents || directParents.length === 0) { visiting.delete(classId); cache.set(classId, []); return []; } // Compute linearization for each parent first const parentLinearizations: string[][] = []; for (const pid of directParents) { const pLin = c3Linearize(pid, parentMap, cache, visiting); if (pLin === null) { visiting.delete(classId); cache.set(classId, null); return null; } parentLinearizations.push([pid, ...pLin]); } // Add the direct parents list as the final sequence const sequences = [...parentLinearizations, [...directParents]]; const result: string[] = []; while (sequences.some(s => s.length > 0)) { // Find a good head: one that doesn't appear in the tail of any other sequence let head: string | null = null; for (const seq of sequences) { if (seq.length === 0) continue; const candidate = seq[0]; const inTail = sequences.some( other => other.length > 1 && other.indexOf(candidate, 1) !== -1 ); if (!inTail) { head = candidate; break; } } if (head === null) { // Inconsistent hierarchy visiting.delete(classId); cache.set(classId, null); return null; } result.push(head); // Remove the chosen head from all sequences for (const seq of sequences) { if (seq.length > 0 && seq[0] === head) { seq.shift(); } } } visiting.delete(classId); cache.set(classId, result); return result; } // --------------------------------------------------------------------------- // Language-specific resolution // --------------------------------------------------------------------------- type MethodDef = { classId: string; className: string; methodId: string }; type Resolution = { resolvedTo: string | null; reason: string }; /** Resolve by MRO order — first ancestor in linearized order wins. */ function resolveByMroOrder( methodName: string, defs: MethodDef[], mroOrder: string[], reasonPrefix: string, ): Resolution { for (const ancestorId of mroOrder) { const match = defs.find(d => d.classId === ancestorId); if (match) { return { resolvedTo: match.methodId, reason: `${reasonPrefix}: ${match.className}::${methodName}`, }; } } return { resolvedTo: defs[0].methodId, reason: `${reasonPrefix} fallback: first definition` }; } function resolveCsharpJava( methodName: string, defs: MethodDef[], parentEdgeTypes: Map<string, 'EXTENDS' | 'IMPLEMENTS'> | undefined, ): Resolution { const classDefs: MethodDef[] = []; const interfaceDefs: MethodDef[] = []; for (const def of defs) { const edgeType = parentEdgeTypes?.get(def.classId); if (edgeType === 'IMPLEMENTS') { interfaceDefs.push(def); } else { classDefs.push(def); } } if (classDefs.length > 0) { return { resolvedTo: classDefs[0].methodId, reason: `class method wins: ${classDefs[0].className}::${methodName}`, }; } if (interfaceDefs.length > 1) { return { resolvedTo: null, reason: `ambiguous: ${methodName} defined in multiple interfaces: ${interfaceDefs.map(d => d.className).join(', ')}`, }; } if (interfaceDefs.length === 1) { return { resolvedTo: interfaceDefs[0].methodId, reason: `single interface default: ${interfaceDefs[0].className}::${methodName}`, }; } return { resolvedTo: null, reason: 'no resolution found' }; } // --------------------------------------------------------------------------- // Main entry point // --------------------------------------------------------------------------- export function computeMRO(graph: KnowledgeGraph): MROResult { const { parentMap, methodMap, parentEdgeType } = buildAdjacency(graph); const c3Cache = new Map<string, string[] | null>(); const entries: MROEntry[] = []; let overrideEdges = 0; let ambiguityCount = 0; // Process every class that has at least one parent for (const [classId, directParents] of parentMap) { if (directParents.length === 0) continue; const classNode = graph.getNode(classId); if (!classNode) continue; const language = classNode.properties.language; if (!language) continue; const className = classNode.properties.name; // Compute linearized MRO depending on language let mroOrder: string[]; if (language === SupportedLanguages.Python) { const c3Result = c3Linearize(classId, parentMap, c3Cache); mroOrder = c3Result ?? gatherAncestors(classId, parentMap); } else { mroOrder = gatherAncestors(classId, parentMap); } // Get the parent names for the MRO entry const mroNames: string[] = mroOrder .map(id => graph.getNode(id)?.properties.name) .filter((n): n is string => n !== undefined); // Collect methods from all ancestors, grouped by method name const methodsByName = new Map<string, MethodDef[]>(); for (const ancestorId of mroOrder) { const ancestorNode = graph.getNode(ancestorId); if (!ancestorNode) continue; const methods = methodMap.get(ancestorId) ?? []; for (const methodId of methods) { const methodNode = graph.getNode(methodId); if (!methodNode) continue; // Properties don't participate in method resolution order if (methodNode.label === 'Property') continue; const methodName = methodNode.properties.name; let defs = methodsByName.get(methodName); if (!defs) { defs = []; methodsByName.set(methodName, defs); } // Avoid duplicates (same method seen via multiple paths) if (!defs.some(d => d.methodId === methodId)) { defs.push({ classId: ancestorId, className: ancestorNode.properties.name, methodId, }); } } } // Detect collisions: methods defined in 2+ different ancestors const ambiguities: MethodAmbiguity[] = []; // Compute transitive edge types once per class (only needed for C#/Java) const needsEdgeTypes = language === SupportedLanguages.CSharp || language === SupportedLanguages.Java || language === SupportedLanguages.Kotlin; const classEdgeTypes = needsEdgeTypes ? buildTransitiveEdgeTypes(classId, parentMap, parentEdgeType) : undefined; for (const [methodName, defs] of methodsByName) { if (defs.length < 2) continue; // Own method shadows inherited — no ambiguity const ownMethods = methodMap.get(classId) ?? []; const ownDefinesIt = ownMethods.some(mid => { const mn = graph.getNode(mid); return mn?.properties.name === methodName; }); if (ownDefinesIt) continue; let resolution: Resolution; switch (language) { case SupportedLanguages.CPlusPlus: resolution = resolveByMroOrder(methodName, defs, mroOrder, 'C++ leftmost base'); break; case SupportedLanguages.CSharp: case SupportedLanguages.Java: case SupportedLanguages.Kotlin: resolution = resolveCsharpJava(methodName, defs, classEdgeTypes); break; case SupportedLanguages.Python: resolution = resolveByMroOrder(methodName, defs, mroOrder, 'Python C3 MRO'); break; case SupportedLanguages.Rust: resolution = { resolvedTo: null, reason: `Rust requires qualified syntax: <Type as Trait>::${methodName}()`, }; break; default: resolution = resolveByMroOrder(methodName, defs, mroOrder, 'first definition'); break; } const ambiguity: MethodAmbiguity = { methodName, definedIn: defs, resolvedTo: resolution.resolvedTo, reason: resolution.reason, }; ambiguities.push(ambiguity); if (resolution.resolvedTo === null) { ambiguityCount++; } // Emit OVERRIDES edge if resolution found if (resolution.resolvedTo !== null) { graph.addRelationship({ id: generateId('OVERRIDES', `${classId}->${resolution.resolvedTo}`), sourceId: classId, targetId: resolution.resolvedTo, type: 'OVERRIDES', confidence: 1.0, reason: resolution.reason, }); overrideEdges++; } } entries.push({ classId, className, language, mro: mroNames, ambiguities, }); } return { entries, overrideEdges, ambiguityCount }; } /** * Build transitive edge types for a class using BFS from the class to all ancestors. * * Known limitation: BFS first-reach heuristic can misclassify an interface as * EXTENDS if it's reachable via a class chain before being seen via IMPLEMENTS. * E.g. if BaseClass also implements IFoo, IFoo may be classified as EXTENDS. * This affects C#/Java/Kotlin conflict resolution in rare diamond hierarchies. */ function buildTransitiveEdgeTypes( classId: string, parentMap: Map<string, string[]>, parentEdgeType: Map<string, Map<string, 'EXTENDS' | 'IMPLEMENTS'>>, ): Map<string, 'EXTENDS' | 'IMPLEMENTS'> { const result = new Map<string, 'EXTENDS' | 'IMPLEMENTS'>(); const directEdges = parentEdgeType.get(classId); if (!directEdges) return result; // BFS: propagate edge type from direct parents const queue: Array<{ id: string; edgeType: 'EXTENDS' | 'IMPLEMENTS' }> = []; const directParents = parentMap.get(classId) ?? []; for (const pid of directParents) { const et = directEdges.get(pid) ?? 'EXTENDS'; if (!result.has(pid)) { result.set(pid, et); queue.push({ id: pid, edgeType: et }); } } while (queue.length > 0) { const { id, edgeType } = queue.shift()!; const grandparents = parentMap.get(id) ?? []; for (const gp of grandparents) { if (!result.has(gp)) { result.set(gp, edgeType); queue.push({ id: gp, edgeType }); } } } return result; } ================================================ FILE: gitnexus/src/core/ingestion/named-binding-extraction.ts ================================================ import { SupportedLanguages } from '../../config/supported-languages.js'; import type { SymbolTable, SymbolDefinition } from './symbol-table.js'; import type { NamedImportMap } from './import-processor.js'; /** * Walk a named-binding re-export chain through NamedImportMap. * * When file A imports { User } from B, and B re-exports { User } from C, * the NamedImportMap for A points to B, but B has no User definition. * This function follows the chain: A→B→C until a definition is found. * * Returns the definitions found at the end of the chain, or null if the * chain breaks (missing binding, circular reference, or depth exceeded). * Max depth 5 to prevent infinite loops. * * @param allDefs Pre-computed `symbolTable.lookupFuzzy(name)` result — must be the * complete unfiltered result. Passing a file-filtered subset will cause * silent misses at depth=0 for non-aliased bindings. */ export function walkBindingChain( name: string, currentFilePath: string, symbolTable: SymbolTable, namedImportMap: NamedImportMap, allDefs: SymbolDefinition[], ): SymbolDefinition[] | null { let lookupFile = currentFilePath; let lookupName = name; const visited = new Set<string>(); for (let depth = 0; depth < 5; depth++) { const bindings = namedImportMap.get(lookupFile); if (!bindings) return null; const binding = bindings.get(lookupName); if (!binding) return null; const key = `${binding.sourcePath}:${binding.exportedName}`; if (visited.has(key)) return null; // circular visited.add(key); const targetName = binding.exportedName; const resolvedDefs = targetName !== lookupName || depth > 0 ? symbolTable.lookupFuzzy(targetName).filter(def => def.filePath === binding.sourcePath) : allDefs.filter(def => def.filePath === binding.sourcePath); if (resolvedDefs.length > 0) return resolvedDefs; // No definition in source file → follow re-export chain lookupFile = binding.sourcePath; lookupName = targetName; } return null; } /** * Extract named bindings from an import AST node. * Returns undefined if the import is not a named import (e.g., import * or default). * * TS: import { User, Repo as R } from './models' * → [{local:'User', exported:'User'}, {local:'R', exported:'Repo'}] * * Python: from models import User, Repo as R * → [{local:'User', exported:'User'}, {local:'R', exported:'Repo'}] */ export function extractNamedBindings( importNode: any, language: SupportedLanguages, ): { local: string; exported: string }[] | undefined { if (language === SupportedLanguages.TypeScript || language === SupportedLanguages.JavaScript) { return extractTsNamedBindings(importNode); } if (language === SupportedLanguages.Python) { return extractPythonNamedBindings(importNode); } if (language === SupportedLanguages.Kotlin) { return extractKotlinNamedBindings(importNode); } if (language === SupportedLanguages.Rust) { return extractRustNamedBindings(importNode); } if (language === SupportedLanguages.PHP) { return extractPhpNamedBindings(importNode); } if (language === SupportedLanguages.CSharp) { return extractCsharpNamedBindings(importNode); } if (language === SupportedLanguages.Java) { return extractJavaNamedBindings(importNode); } return undefined; } export function extractTsNamedBindings(importNode: any): { local: string; exported: string }[] | undefined { // import_statement > import_clause > named_imports > import_specifier* const importClause = findChild(importNode, 'import_clause'); if (importClause) { const namedImports = findChild(importClause, 'named_imports'); if (!namedImports) return undefined; // default import, namespace import, or side-effect const bindings: { local: string; exported: string }[] = []; for (let i = 0; i < namedImports.namedChildCount; i++) { const specifier = namedImports.namedChild(i); if (specifier?.type !== 'import_specifier') continue; const identifiers: string[] = []; for (let j = 0; j < specifier.namedChildCount; j++) { const child = specifier.namedChild(j); if (child?.type === 'identifier') identifiers.push(child.text); } if (identifiers.length === 1) { bindings.push({ local: identifiers[0], exported: identifiers[0] }); } else if (identifiers.length === 2) { // import { Foo as Bar } → exported='Foo', local='Bar' bindings.push({ local: identifiers[1], exported: identifiers[0] }); } } return bindings.length > 0 ? bindings : undefined; } // Re-export: export { X } from './y' → export_statement > export_clause > export_specifier const exportClause = findChild(importNode, 'export_clause'); if (exportClause) { const bindings: { local: string; exported: string }[] = []; for (let i = 0; i < exportClause.namedChildCount; i++) { const specifier = exportClause.namedChild(i); if (specifier?.type !== 'export_specifier') continue; const identifiers: string[] = []; for (let j = 0; j < specifier.namedChildCount; j++) { const child = specifier.namedChild(j); if (child?.type === 'identifier') identifiers.push(child.text); } if (identifiers.length === 1) { // export { User } from './base' → re-exports User as User bindings.push({ local: identifiers[0], exported: identifiers[0] }); } else if (identifiers.length === 2) { // export { Repo as Repository } from './models' → name=Repo, alias=Repository // For re-exports, the first id is the source name, second is what's exported // When another file imports { Repository }, they get Repo from the source bindings.push({ local: identifiers[1], exported: identifiers[0] }); } } return bindings.length > 0 ? bindings : undefined; } return undefined; } export function extractPythonNamedBindings(importNode: any): { local: string; exported: string }[] | undefined { // Only from import_from_statement, not plain import_statement if (importNode.type !== 'import_from_statement') return undefined; const bindings: { local: string; exported: string }[] = []; for (let i = 0; i < importNode.namedChildCount; i++) { const child = importNode.namedChild(i); if (!child) continue; if (child.type === 'dotted_name') { // Skip the module_name (first dotted_name is the source module) const fieldName = importNode.childForFieldName?.('module_name'); if (fieldName && child.startIndex === fieldName.startIndex) continue; // This is an imported name: from x import User const name = child.text; if (name) bindings.push({ local: name, exported: name }); } if (child.type === 'aliased_import') { // from x import Repo as R const dottedName = findChild(child, 'dotted_name'); const aliasIdent = findChild(child, 'identifier'); if (dottedName && aliasIdent) { bindings.push({ local: aliasIdent.text, exported: dottedName.text }); } } } return bindings.length > 0 ? bindings : undefined; } export function extractKotlinNamedBindings(importNode: any): { local: string; exported: string }[] | undefined { // import_header > identifier + import_alias > simple_identifier if (importNode.type !== 'import_header') return undefined; const fullIdent = findChild(importNode, 'identifier'); if (!fullIdent) return undefined; const fullText = fullIdent.text; const exportedName = fullText.includes('.') ? fullText.split('.').pop()! : fullText; const importAlias = findChild(importNode, 'import_alias'); if (importAlias) { // Aliased: import com.example.User as U const aliasIdent = findChild(importAlias, 'simple_identifier'); if (!aliasIdent) return undefined; return [{ local: aliasIdent.text, exported: exportedName }]; } // Non-aliased: import com.example.User → local="User", exported="User" // Skip wildcard imports (ending in *) if (fullText.endsWith('.*') || fullText.endsWith('*')) return undefined; // Skip lowercase last segments — those are member/function imports (e.g., // import util.OneArg.writeAudit), not class imports. Multiple member imports // with the same function name would collide in NamedImportMap, breaking // arity-based disambiguation. if (exportedName[0] && exportedName[0] === exportedName[0].toLowerCase()) return undefined; return [{ local: exportedName, exported: exportedName }]; } export function extractRustNamedBindings(importNode: any): { local: string; exported: string }[] | undefined { // use_declaration may contain use_as_clause at any depth if (importNode.type !== 'use_declaration') return undefined; const bindings: { local: string; exported: string }[] = []; collectRustBindings(importNode, bindings); return bindings.length > 0 ? bindings : undefined; } function collectRustBindings(node: any, bindings: { local: string; exported: string }[]): void { if (node.type === 'use_as_clause') { // First identifier = exported name, second identifier = local alias const idents: string[] = []; for (let i = 0; i < node.namedChildCount; i++) { const child = node.namedChild(i); if (child?.type === 'identifier') idents.push(child.text); // For scoped_identifier, extract the last segment if (child?.type === 'scoped_identifier') { const nameNode = child.childForFieldName?.('name'); if (nameNode) idents.push(nameNode.text); } } if (idents.length === 2) { bindings.push({ local: idents[1], exported: idents[0] }); } return; } // Terminal identifier in a use_list: use crate::models::{User, Repo} if (node.type === 'identifier' && node.parent?.type === 'use_list') { bindings.push({ local: node.text, exported: node.text }); return; } // Skip scoped_identifier that serves as path prefix in scoped_use_list // e.g. use crate::models::{User, Repo} — the path node "crate::models" is not an importable symbol if (node.type === 'scoped_identifier' && node.parent?.type === 'scoped_use_list') { return; // path prefix — the use_list sibling handles the actual symbols } // Terminal scoped_identifier: use crate::models::User; // Only extract if this is a leaf (no deeper use_list/use_as_clause/scoped_use_list) if (node.type === 'scoped_identifier') { let hasDeeper = false; for (let i = 0; i < node.namedChildCount; i++) { const child = node.namedChild(i); if (child?.type === 'use_list' || child?.type === 'use_as_clause' || child?.type === 'scoped_use_list') { hasDeeper = true; break; } } if (!hasDeeper) { const nameNode = node.childForFieldName?.('name'); if (nameNode) { bindings.push({ local: nameNode.text, exported: nameNode.text }); } return; } } // Recurse into children for (let i = 0; i < node.namedChildCount; i++) { const child = node.namedChild(i); if (child) collectRustBindings(child, bindings); } } export function extractPhpNamedBindings(importNode: any): { local: string; exported: string }[] | undefined { // namespace_use_declaration > namespace_use_clause* (flat) // namespace_use_declaration > namespace_use_group > namespace_use_clause* (grouped) if (importNode.type !== 'namespace_use_declaration') return undefined; const bindings: { local: string; exported: string }[] = []; // Collect all clauses — from direct children AND from namespace_use_group const clauses: any[] = []; for (let i = 0; i < importNode.namedChildCount; i++) { const child = importNode.namedChild(i); if (child?.type === 'namespace_use_clause') { clauses.push(child); } else if (child?.type === 'namespace_use_group') { for (let j = 0; j < child.namedChildCount; j++) { const groupChild = child.namedChild(j); if (groupChild?.type === 'namespace_use_clause') clauses.push(groupChild); } } } for (const clause of clauses) { // Flat imports: qualified_name + name (alias) let qualifiedName: any = null; const names: any[] = []; for (let j = 0; j < clause.namedChildCount; j++) { const child = clause.namedChild(j); if (child?.type === 'qualified_name') qualifiedName = child; else if (child?.type === 'name') names.push(child); } if (qualifiedName && names.length > 0) { // Flat aliased import: use App\Models\Repo as R; const fullText = qualifiedName.text; const exportedName = fullText.includes('\\') ? fullText.split('\\').pop()! : fullText; bindings.push({ local: names[0].text, exported: exportedName }); } else if (qualifiedName && names.length === 0) { // Flat non-aliased import: use App\Models\User; const fullText = qualifiedName.text; const lastSegment = fullText.includes('\\') ? fullText.split('\\').pop()! : fullText; bindings.push({ local: lastSegment, exported: lastSegment }); } else if (!qualifiedName && names.length >= 2) { // Grouped aliased import: {Repo as R} — first name = exported, second = alias bindings.push({ local: names[1].text, exported: names[0].text }); } else if (!qualifiedName && names.length === 1) { // Grouped non-aliased import: {User} in use App\Models\{User, Repo as R} bindings.push({ local: names[0].text, exported: names[0].text }); } } return bindings.length > 0 ? bindings : undefined; } export function extractCsharpNamedBindings(importNode: any): { local: string; exported: string }[] | undefined { // using_directive with identifier (alias) + qualified_name (target) if (importNode.type !== 'using_directive') return undefined; let aliasIdent: any = null; let qualifiedName: any = null; for (let i = 0; i < importNode.namedChildCount; i++) { const child = importNode.namedChild(i); if (child?.type === 'identifier' && !aliasIdent) aliasIdent = child; else if (child?.type === 'qualified_name') qualifiedName = child; } if (!aliasIdent || !qualifiedName) return undefined; const fullText = qualifiedName.text; const exportedName = fullText.includes('.') ? fullText.split('.').pop()! : fullText; return [{ local: aliasIdent.text, exported: exportedName }]; } export function extractJavaNamedBindings(importNode: any): { local: string; exported: string }[] | undefined { // import_declaration > scoped_identifier "com.example.models.User" // Wildcard imports (.*) don't produce named bindings if (importNode.type !== 'import_declaration') return undefined; // Check for asterisk (wildcard import) — skip those for (let i = 0; i < importNode.childCount; i++) { const child = importNode.child(i); if (child?.type === 'asterisk') return undefined; } const scopedId = findChild(importNode, 'scoped_identifier'); if (!scopedId) return undefined; const fullText = scopedId.text; const lastDot = fullText.lastIndexOf('.'); if (lastDot === -1) return undefined; const className = fullText.slice(lastDot + 1); // Skip lowercase names — those are package imports, not class imports if (className[0] && className[0] === className[0].toLowerCase()) return undefined; return [{ local: className, exported: className }]; } function findChild(node: any, type: string): any { for (let i = 0; i < node.namedChildCount; i++) { const child = node.namedChild(i); if (child?.type === type) return child; } return null; } ================================================ FILE: gitnexus/src/core/ingestion/parsing-processor.ts ================================================ import { KnowledgeGraph, GraphNode, GraphRelationship, type NodeLabel } from '../graph/types.js'; import Parser from 'tree-sitter'; import { loadParser, loadLanguage, isLanguageAvailable } from '../tree-sitter/parser-loader.js'; import { LANGUAGE_QUERIES } from './tree-sitter-queries.js'; import { generateId } from '../../lib/utils.js'; import { SymbolTable } from './symbol-table.js'; import { ASTCache } from './ast-cache.js'; import { getLanguageFromFilename, yieldToEventLoop, getDefinitionNodeFromCaptures, findEnclosingClassId, extractMethodSignature } from './utils.js'; import { extractPropertyDeclaredType } from './type-extractors/shared.js'; import { isNodeExported } from './export-detection.js'; import { detectFrameworkFromAST } from './framework-detection.js'; import { typeConfigs } from './type-extractors/index.js'; import { SupportedLanguages } from '../../config/supported-languages.js'; import { WorkerPool } from './workers/worker-pool.js'; import type { ParseWorkerResult, ParseWorkerInput, ExtractedImport, ExtractedCall, ExtractedAssignment, ExtractedHeritage, ExtractedRoute, FileConstructorBindings } from './workers/parse-worker.js'; import { getTreeSitterBufferSize, TREE_SITTER_MAX_BUFFER } from './constants.js'; export type FileProgressCallback = (current: number, total: number, filePath: string) => void; export interface WorkerExtractedData { imports: ExtractedImport[]; calls: ExtractedCall[]; assignments: ExtractedAssignment[]; heritage: ExtractedHeritage[]; routes: ExtractedRoute[]; constructorBindings: FileConstructorBindings[]; } // isNodeExported imported from ./export-detection.js (shared module) // Re-export for backward compatibility with any external consumers export { isNodeExported } from './export-detection.js'; // ============================================================================ // Worker-based parallel parsing // ============================================================================ const processParsingWithWorkers = async ( graph: KnowledgeGraph, files: { path: string; content: string }[], symbolTable: SymbolTable, astCache: ASTCache, workerPool: WorkerPool, onFileProgress?: FileProgressCallback, ): Promise<WorkerExtractedData> => { // Filter to parseable files only const parseableFiles: ParseWorkerInput[] = []; for (const file of files) { const lang = getLanguageFromFilename(file.path); if (lang) parseableFiles.push({ path: file.path, content: file.content }); } if (parseableFiles.length === 0) return { imports: [], calls: [], assignments: [], heritage: [], routes: [], constructorBindings: [] }; const total = files.length; // Dispatch to worker pool — pool handles splitting into chunks and sub-batching const chunkResults = await workerPool.dispatch<ParseWorkerInput, ParseWorkerResult>( parseableFiles, (filesProcessed) => { onFileProgress?.(Math.min(filesProcessed, total), total, 'Parsing...'); }, ); // Merge results from all workers into graph and symbol table const allImports: ExtractedImport[] = []; const allCalls: ExtractedCall[] = []; const allAssignments: ExtractedAssignment[] = []; const allHeritage: ExtractedHeritage[] = []; const allRoutes: ExtractedRoute[] = []; const allConstructorBindings: FileConstructorBindings[] = []; for (const result of chunkResults) { for (const node of result.nodes) { graph.addNode({ id: node.id, label: node.label as any, properties: node.properties, }); } for (const rel of result.relationships) { graph.addRelationship(rel); } for (const sym of result.symbols) { symbolTable.add(sym.filePath, sym.name, sym.nodeId, sym.type, { parameterCount: sym.parameterCount, requiredParameterCount: sym.requiredParameterCount, parameterTypes: sym.parameterTypes, returnType: sym.returnType, declaredType: sym.declaredType, ownerId: sym.ownerId, }); } allImports.push(...result.imports); allCalls.push(...result.calls); allAssignments.push(...result.assignments); allHeritage.push(...result.heritage); allRoutes.push(...result.routes); allConstructorBindings.push(...result.constructorBindings); } // Merge and log skipped languages from workers const skippedLanguages = new Map<string, number>(); for (const result of chunkResults) { for (const [lang, count] of Object.entries(result.skippedLanguages)) { skippedLanguages.set(lang, (skippedLanguages.get(lang) || 0) + count); } } if (skippedLanguages.size > 0) { const summary = Array.from(skippedLanguages.entries()) .map(([lang, count]) => `${lang}: ${count}`) .join(', '); console.warn(` Skipped unsupported languages: ${summary}`); } // Final progress onFileProgress?.(total, total, 'done'); return { imports: allImports, calls: allCalls, assignments: allAssignments, heritage: allHeritage, routes: allRoutes, constructorBindings: allConstructorBindings }; }; // ============================================================================ // Sequential fallback (original implementation) // ============================================================================ const processParsingSequential = async ( graph: KnowledgeGraph, files: { path: string; content: string }[], symbolTable: SymbolTable, astCache: ASTCache, onFileProgress?: FileProgressCallback ) => { const parser = await loadParser(); const total = files.length; const skippedLanguages = new Map<string, number>(); for (let i = 0; i < files.length; i++) { const file = files[i]; onFileProgress?.(i + 1, total, file.path); if (i % 20 === 0) await yieldToEventLoop(); const language = getLanguageFromFilename(file.path); if (!language) continue; // Skip unsupported languages (e.g. Swift when tree-sitter-swift not installed) if (!isLanguageAvailable(language)) { skippedLanguages.set(language, (skippedLanguages.get(language) || 0) + 1); continue; } // Skip files larger than the max tree-sitter buffer (32 MB) if (file.content.length > TREE_SITTER_MAX_BUFFER) continue; try { await loadLanguage(language, file.path); } catch { continue; // parser unavailable — safety net } let tree; try { tree = parser.parse(file.content, undefined, { bufferSize: getTreeSitterBufferSize(file.content.length) }); } catch (parseError) { console.warn(`Skipping unparseable file: ${file.path}`); continue; } astCache.set(file.path, tree); const queryString = LANGUAGE_QUERIES[language]; if (!queryString) { continue; } let query; let matches; try { const language = parser.getLanguage(); query = new Parser.Query(language, queryString); matches = query.matches(tree.rootNode); } catch (queryError) { console.warn(`Query error for ${file.path}:`, queryError); continue; } matches.forEach(match => { const captureMap: Record<string, any> = {}; match.captures.forEach(c => { captureMap[c.name] = c.node; }); if (captureMap['import']) { return; } if (captureMap['call']) { return; } const nameNode = captureMap['name']; // Synthesize name for constructors without explicit @name capture (e.g. Swift init) if (!nameNode && !captureMap['definition.constructor']) return; const nodeName = nameNode ? nameNode.text : 'init'; let nodeLabel: NodeLabel = 'CodeElement'; if (captureMap['definition.function']) { // C/C++: @definition.function is broad and also matches inline class methods (inside // a class/struct body). Those are already captured by @definition.method, so skip // the duplicate Function entry to prevent double-indexing in globalIndex. if (language === SupportedLanguages.CPlusPlus || language === SupportedLanguages.C) { let ancestor = captureMap['definition.function']?.parent; while (ancestor) { if (ancestor.type === 'class_specifier' || ancestor.type === 'struct_specifier') { break; } ancestor = ancestor.parent; } if (ancestor) return; // inside a class body — handled by @definition.method } nodeLabel = 'Function'; } else if (captureMap['definition.class']) nodeLabel = 'Class'; else if (captureMap['definition.interface']) nodeLabel = 'Interface'; else if (captureMap['definition.method']) nodeLabel = 'Method'; else if (captureMap['definition.struct']) nodeLabel = 'Struct'; else if (captureMap['definition.enum']) nodeLabel = 'Enum'; else if (captureMap['definition.namespace']) nodeLabel = 'Namespace'; else if (captureMap['definition.module']) nodeLabel = 'Module'; else if (captureMap['definition.trait']) nodeLabel = 'Trait'; else if (captureMap['definition.impl']) nodeLabel = 'Impl'; else if (captureMap['definition.type']) nodeLabel = 'TypeAlias'; else if (captureMap['definition.const']) nodeLabel = 'Const'; else if (captureMap['definition.static']) nodeLabel = 'Static'; else if (captureMap['definition.typedef']) nodeLabel = 'Typedef'; else if (captureMap['definition.macro']) nodeLabel = 'Macro'; else if (captureMap['definition.union']) nodeLabel = 'Union'; else if (captureMap['definition.property']) nodeLabel = 'Property'; else if (captureMap['definition.record']) nodeLabel = 'Record'; else if (captureMap['definition.delegate']) nodeLabel = 'Delegate'; else if (captureMap['definition.annotation']) nodeLabel = 'Annotation'; else if (captureMap['definition.constructor']) nodeLabel = 'Constructor'; else if (captureMap['definition.template']) nodeLabel = 'Template'; const definitionNodeForRange = getDefinitionNodeFromCaptures(captureMap); const startLine = definitionNodeForRange ? definitionNodeForRange.startPosition.row : (nameNode ? nameNode.startPosition.row : 0); const nodeId = generateId(nodeLabel, `${file.path}:${nodeName}`); const definitionNode = getDefinitionNodeFromCaptures(captureMap); const frameworkHint = definitionNode ? detectFrameworkFromAST(language, (definitionNode.text || '').slice(0, 300)) : null; // Extract method signature for Method/Constructor nodes const methodSig = (nodeLabel === 'Function' || nodeLabel === 'Method' || nodeLabel === 'Constructor') ? extractMethodSignature(definitionNode) : undefined; // Language-specific return type fallback (e.g. Ruby YARD @return [Type]) // Also upgrades uninformative AST types like PHP `array` with PHPDoc `@return User[]` if (methodSig && (!methodSig.returnType || methodSig.returnType === 'array' || methodSig.returnType === 'iterable') && definitionNode) { const tc = typeConfigs[language as keyof typeof typeConfigs]; if (tc?.extractReturnType) { const docReturn = tc.extractReturnType(definitionNode); if (docReturn) methodSig.returnType = docReturn; } } const node: GraphNode = { id: nodeId, label: nodeLabel as any, properties: { name: nodeName, filePath: file.path, startLine: definitionNodeForRange ? definitionNodeForRange.startPosition.row : startLine, endLine: definitionNodeForRange ? definitionNodeForRange.endPosition.row : startLine, language: language, isExported: isNodeExported(nameNode || definitionNodeForRange, nodeName, language), ...(frameworkHint ? { astFrameworkMultiplier: frameworkHint.entryPointMultiplier, astFrameworkReason: frameworkHint.reason, } : {}), ...(methodSig ? { parameterCount: methodSig.parameterCount, ...(methodSig.requiredParameterCount !== undefined ? { requiredParameterCount: methodSig.requiredParameterCount } : {}), ...(methodSig.parameterTypes ? { parameterTypes: methodSig.parameterTypes } : {}), returnType: methodSig.returnType, } : {}), }, }; graph.addNode(node); // Compute enclosing class for Method/Constructor/Property/Function — used for both ownerId and HAS_METHOD // Function is included because Kotlin/Rust/Python capture class methods as Function nodes const needsOwner = nodeLabel === 'Method' || nodeLabel === 'Constructor' || nodeLabel === 'Property' || nodeLabel === 'Function'; const enclosingClassId = needsOwner ? findEnclosingClassId(nameNode || definitionNodeForRange, file.path) : null; // Extract declared type for Property nodes (field/property type annotations) const declaredType = (nodeLabel === 'Property' && definitionNode) ? extractPropertyDeclaredType(definitionNode) : undefined; symbolTable.add(file.path, nodeName, nodeId, nodeLabel, { parameterCount: methodSig?.parameterCount, requiredParameterCount: methodSig?.requiredParameterCount, parameterTypes: methodSig?.parameterTypes, returnType: methodSig?.returnType, declaredType, ownerId: enclosingClassId ?? undefined, }); const fileId = generateId('File', file.path); const relId = generateId('DEFINES', `${fileId}->${nodeId}`); const relationship: GraphRelationship = { id: relId, sourceId: fileId, targetId: nodeId, type: 'DEFINES', confidence: 1.0, reason: '', }; graph.addRelationship(relationship); // ── HAS_METHOD / HAS_PROPERTY: link member to enclosing class ── if (enclosingClassId) { const memberEdgeType = nodeLabel === 'Property' ? 'HAS_PROPERTY' : 'HAS_METHOD'; graph.addRelationship({ id: generateId(memberEdgeType, `${enclosingClassId}->${nodeId}`), sourceId: enclosingClassId, targetId: nodeId, type: memberEdgeType, confidence: 1.0, reason: '', }); } }); } if (skippedLanguages.size > 0) { const summary = Array.from(skippedLanguages.entries()) .map(([lang, count]) => `${lang}: ${count}`) .join(', '); console.warn(` Skipped unsupported languages: ${summary}`); } }; // ============================================================================ // Public API // ============================================================================ export const processParsing = async ( graph: KnowledgeGraph, files: { path: string; content: string }[], symbolTable: SymbolTable, astCache: ASTCache, onFileProgress?: FileProgressCallback, workerPool?: WorkerPool, ): Promise<WorkerExtractedData | null> => { if (workerPool) { try { return await processParsingWithWorkers(graph, files, symbolTable, astCache, workerPool, onFileProgress); } catch (err) { console.warn('Worker pool parsing failed, falling back to sequential:', err instanceof Error ? err.message : err); } } // Fallback: sequential parsing (no pre-extracted data) await processParsingSequential(graph, files, symbolTable, astCache, onFileProgress); return null; }; ================================================ FILE: gitnexus/src/core/ingestion/pipeline.ts ================================================ import { createKnowledgeGraph } from '../graph/graph.js'; import { processStructure } from './structure-processor.js'; import { processParsing } from './parsing-processor.js'; import { processImports, processImportsFromExtracted, buildImportResolutionContext } from './import-processor.js'; import { processCalls, processCallsFromExtracted, processAssignmentsFromExtracted, processRoutesFromExtracted } from './call-processor.js'; import { processHeritage, processHeritageFromExtracted } from './heritage-processor.js'; import { computeMRO } from './mro-processor.js'; import { processCommunities } from './community-processor.js'; import { processProcesses } from './process-processor.js'; import { createResolutionContext } from './resolution-context.js'; import { createASTCache } from './ast-cache.js'; import { PipelineProgress, PipelineResult } from '../../types/pipeline.js'; import { walkRepositoryPaths, readFileContents } from './filesystem-walker.js'; import { getLanguageFromFilename } from './utils.js'; import { isLanguageAvailable } from '../tree-sitter/parser-loader.js'; import { createWorkerPool, WorkerPool } from './workers/worker-pool.js'; import fs from 'node:fs'; import path from 'node:path'; import { fileURLToPath, pathToFileURL } from 'node:url'; const isDev = process.env.NODE_ENV === 'development'; /** Max bytes of source content to load per parse chunk. Each chunk's source + * parsed ASTs + extracted records + worker serialization overhead all live in * memory simultaneously, so this must be conservative. 20MB source ≈ 200-400MB * peak working memory per chunk after parse expansion. */ const CHUNK_BYTE_BUDGET = 20 * 1024 * 1024; // 20MB /** Max AST trees to keep in LRU cache */ const AST_CACHE_CAP = 50; export interface PipelineOptions { /** Skip MRO, community detection, and process extraction for faster test runs. */ skipGraphPhases?: boolean; } export const runPipelineFromRepo = async ( repoPath: string, onProgress: (progress: PipelineProgress) => void, options?: PipelineOptions, ): Promise<PipelineResult> => { const graph = createKnowledgeGraph(); const ctx = createResolutionContext(); const symbolTable = ctx.symbols; let astCache = createASTCache(AST_CACHE_CAP); const cleanup = () => { astCache.clear(); ctx.clear(); }; try { // ── Phase 1: Scan paths only (no content read) ───────────────────── onProgress({ phase: 'extracting', percent: 0, message: 'Scanning repository...', }); const scannedFiles = await walkRepositoryPaths(repoPath, (current, total, filePath) => { const scanProgress = Math.round((current / total) * 15); onProgress({ phase: 'extracting', percent: scanProgress, message: 'Scanning repository...', detail: filePath, stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount }, }); }); const totalFiles = scannedFiles.length; onProgress({ phase: 'extracting', percent: 15, message: 'Repository scanned successfully', stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount }, }); // ── Phase 2: Structure (paths only — no content needed) ──────────── onProgress({ phase: 'structure', percent: 15, message: 'Analyzing project structure...', stats: { filesProcessed: 0, totalFiles, nodesCreated: graph.nodeCount }, }); const allPaths = scannedFiles.map(f => f.path); processStructure(graph, allPaths); onProgress({ phase: 'structure', percent: 20, message: 'Project structure analyzed', stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount }, }); // ── Phase 3+4: Chunked read + parse ──────────────────────────────── // Group parseable files into byte-budget chunks so only ~20MB of source // is in memory at a time. Each chunk is: read → parse → extract → free. const parseableScanned = scannedFiles.filter(f => { const lang = getLanguageFromFilename(f.path); return lang && isLanguageAvailable(lang); }); // Warn about files skipped due to unavailable parsers const skippedByLang = new Map<string, number>(); for (const f of scannedFiles) { const lang = getLanguageFromFilename(f.path); if (lang && !isLanguageAvailable(lang)) { skippedByLang.set(lang, (skippedByLang.get(lang) || 0) + 1); } } for (const [lang, count] of skippedByLang) { console.warn(`Skipping ${count} ${lang} file(s) — ${lang} parser not available (native binding may not have built). Try: npm rebuild tree-sitter-${lang}`); } const totalParseable = parseableScanned.length; if (totalParseable === 0) { onProgress({ phase: 'parsing', percent: 82, message: 'No parseable files found — skipping parsing phase', stats: { filesProcessed: 0, totalFiles: 0, nodesCreated: graph.nodeCount }, }); } // Build byte-budget chunks const chunks: string[][] = []; let currentChunk: string[] = []; let currentBytes = 0; for (const file of parseableScanned) { if (currentChunk.length > 0 && currentBytes + file.size > CHUNK_BYTE_BUDGET) { chunks.push(currentChunk); currentChunk = []; currentBytes = 0; } currentChunk.push(file.path); currentBytes += file.size; } if (currentChunk.length > 0) chunks.push(currentChunk); const numChunks = chunks.length; if (isDev) { const totalMB = parseableScanned.reduce((s, f) => s + f.size, 0) / (1024 * 1024); console.log(`📂 Scan: ${totalFiles} paths, ${totalParseable} parseable (${totalMB.toFixed(0)}MB), ${numChunks} chunks @ ${CHUNK_BYTE_BUDGET / (1024 * 1024)}MB budget`); } onProgress({ phase: 'parsing', percent: 20, message: `Parsing ${totalParseable} files in ${numChunks} chunk${numChunks !== 1 ? 's' : ''}...`, stats: { filesProcessed: 0, totalFiles: totalParseable, nodesCreated: graph.nodeCount }, }); // Don't spawn workers for tiny repos — overhead exceeds benefit const MIN_FILES_FOR_WORKERS = 15; const MIN_BYTES_FOR_WORKERS = 512 * 1024; const totalBytes = parseableScanned.reduce((s, f) => s + f.size, 0); // Create worker pool once, reuse across chunks let workerPool: WorkerPool | undefined; if (totalParseable >= MIN_FILES_FOR_WORKERS || totalBytes >= MIN_BYTES_FOR_WORKERS) { try { let workerUrl = new URL('./workers/parse-worker.js', import.meta.url); // When running under vitest, import.meta.url points to src/ where no .js exists. // Fall back to the compiled dist/ worker so the pool can spawn real worker threads. const thisDir = fileURLToPath(new URL('.', import.meta.url)); if (!fs.existsSync(fileURLToPath(workerUrl))) { const distWorker = path.resolve(thisDir, '..', '..', '..', 'dist', 'core', 'ingestion', 'workers', 'parse-worker.js'); if (fs.existsSync(distWorker)) { workerUrl = pathToFileURL(distWorker) as URL; } } workerPool = createWorkerPool(workerUrl); } catch (err) { if (isDev) console.warn('Worker pool creation failed, using sequential fallback:', (err as Error).message); } } let filesParsedSoFar = 0; // AST cache sized for one chunk (sequential fallback uses it for import/call/heritage) const maxChunkFiles = chunks.reduce((max, c) => Math.max(max, c.length), 0); astCache = createASTCache(maxChunkFiles); // Build import resolution context once — suffix index, file lists, resolve cache. // Reused across all chunks to avoid rebuilding O(files × path_depth) structures. const importCtx = buildImportResolutionContext(allPaths); const allPathObjects = allPaths.map(p => ({ path: p })); // Single-pass: parse + resolve imports/calls/heritage per chunk. // Calls/heritage use the symbol table built so far (symbols from earlier chunks // are already registered). This trades ~5% cross-chunk resolution accuracy for // 200-400MB less memory — critical for Linux-kernel-scale repos. const sequentialChunkPaths: string[][] = []; try { for (let chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) { const chunkPaths = chunks[chunkIdx]; // Read content for this chunk only const chunkContents = await readFileContents(repoPath, chunkPaths); const chunkFiles = chunkPaths .filter(p => chunkContents.has(p)) .map(p => ({ path: p, content: chunkContents.get(p)! })); // Parse this chunk (workers or sequential fallback) const chunkWorkerData = await processParsing( graph, chunkFiles, symbolTable, astCache, (current, _total, filePath) => { const globalCurrent = filesParsedSoFar + current; const parsingProgress = 20 + ((globalCurrent / totalParseable) * 62); onProgress({ phase: 'parsing', percent: Math.round(parsingProgress), message: `Parsing chunk ${chunkIdx + 1}/${numChunks}...`, detail: filePath, stats: { filesProcessed: globalCurrent, totalFiles: totalParseable, nodesCreated: graph.nodeCount }, }); }, workerPool, ); const chunkBasePercent = 20 + ((filesParsedSoFar / totalParseable) * 62); if (chunkWorkerData) { // Imports await processImportsFromExtracted(graph, allPathObjects, chunkWorkerData.imports, ctx, (current, total) => { onProgress({ phase: 'parsing', percent: Math.round(chunkBasePercent), message: `Resolving imports (chunk ${chunkIdx + 1}/${numChunks})...`, detail: `${current}/${total} files`, stats: { filesProcessed: filesParsedSoFar, totalFiles: totalParseable, nodesCreated: graph.nodeCount }, }); }, repoPath, importCtx); // Calls + Heritage + Routes — resolve in parallel (no shared mutable state between them) // This is safe because each writes disjoint relationship types into idempotent id-keyed Maps, // and the single-threaded event loop prevents races between synchronous addRelationship calls. await Promise.all([ processCallsFromExtracted( graph, chunkWorkerData.calls, ctx, (current, total) => { onProgress({ phase: 'parsing', percent: Math.round(chunkBasePercent), message: `Resolving calls (chunk ${chunkIdx + 1}/${numChunks})...`, detail: `${current}/${total} files`, stats: { filesProcessed: filesParsedSoFar, totalFiles: totalParseable, nodesCreated: graph.nodeCount }, }); }, chunkWorkerData.constructorBindings, ), processHeritageFromExtracted( graph, chunkWorkerData.heritage, ctx, (current, total) => { onProgress({ phase: 'parsing', percent: Math.round(chunkBasePercent), message: `Resolving heritage (chunk ${chunkIdx + 1}/${numChunks})...`, detail: `${current}/${total} records`, stats: { filesProcessed: filesParsedSoFar, totalFiles: totalParseable, nodesCreated: graph.nodeCount }, }); }, ), processRoutesFromExtracted( graph, chunkWorkerData.routes ?? [], ctx, (current, total) => { onProgress({ phase: 'parsing', percent: Math.round(chunkBasePercent), message: `Resolving routes (chunk ${chunkIdx + 1}/${numChunks})...`, detail: `${current}/${total} routes`, stats: { filesProcessed: filesParsedSoFar, totalFiles: totalParseable, nodesCreated: graph.nodeCount }, }); }, ), ]); // Process field write assignments (synchronous, runs after calls resolve) if (chunkWorkerData.assignments?.length) { processAssignmentsFromExtracted(graph, chunkWorkerData.assignments, ctx, chunkWorkerData.constructorBindings); } } else { await processImports(graph, chunkFiles, astCache, ctx, undefined, repoPath, allPaths); sequentialChunkPaths.push(chunkPaths); } filesParsedSoFar += chunkFiles.length; // Clear AST cache between chunks to free memory astCache.clear(); // chunkContents + chunkFiles + chunkWorkerData go out of scope → GC reclaims } } finally { await workerPool?.terminate(); } // Sequential fallback chunks: re-read source for call/heritage resolution for (const chunkPaths of sequentialChunkPaths) { const chunkContents = await readFileContents(repoPath, chunkPaths); const chunkFiles = chunkPaths .filter(p => chunkContents.has(p)) .map(p => ({ path: p, content: chunkContents.get(p)! })); astCache = createASTCache(chunkFiles.length); const rubyHeritage = await processCalls(graph, chunkFiles, astCache, ctx); await processHeritage(graph, chunkFiles, astCache, ctx); if (rubyHeritage.length > 0) { await processHeritageFromExtracted(graph, rubyHeritage, ctx); } astCache.clear(); } // Log resolution cache stats if (isDev) { const rcStats = ctx.getStats(); const total = rcStats.cacheHits + rcStats.cacheMisses; const hitRate = total > 0 ? ((rcStats.cacheHits / total) * 100).toFixed(1) : '0'; console.log(`🔍 Resolution cache: ${rcStats.cacheHits} hits, ${rcStats.cacheMisses} misses (${hitRate}% hit rate)`); } // Free import resolution context — suffix index + resolve cache no longer needed // (allPathObjects and importCtx hold ~94MB+ for large repos) allPathObjects.length = 0; importCtx.resolveCache.clear(); (importCtx as any).suffixIndex = null; (importCtx as any).normalizedFileList = null; let communityResult: Awaited<ReturnType<typeof processCommunities>> | undefined; let processResult: Awaited<ReturnType<typeof processProcesses>> | undefined; if (!options?.skipGraphPhases) { // ── Phase 4.5: Method Resolution Order ────────────────────────────── onProgress({ phase: 'parsing', percent: 81, message: 'Computing method resolution order...', stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount }, }); const mroResult = computeMRO(graph); if (isDev && mroResult.entries.length > 0) { console.log(`🔀 MRO: ${mroResult.entries.length} classes analyzed, ${mroResult.ambiguityCount} ambiguities found, ${mroResult.overrideEdges} OVERRIDES edges`); } // ── Phase 5: Communities ─────────────────────────────────────────── onProgress({ phase: 'communities', percent: 82, message: 'Detecting code communities...', stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount }, }); communityResult = await processCommunities(graph, (message, progress) => { const communityProgress = 82 + (progress * 0.10); onProgress({ phase: 'communities', percent: Math.round(communityProgress), message, stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount }, }); }); if (isDev) { console.log(`🏘️ Community detection: ${communityResult.stats.totalCommunities} communities found (modularity: ${communityResult.stats.modularity.toFixed(3)})`); } communityResult.communities.forEach(comm => { graph.addNode({ id: comm.id, label: 'Community' as const, properties: { name: comm.label, filePath: '', heuristicLabel: comm.heuristicLabel, cohesion: comm.cohesion, symbolCount: comm.symbolCount, } }); }); communityResult.memberships.forEach(membership => { graph.addRelationship({ id: `${membership.nodeId}_member_of_${membership.communityId}`, type: 'MEMBER_OF', sourceId: membership.nodeId, targetId: membership.communityId, confidence: 1.0, reason: 'leiden-algorithm', }); }); // ── Phase 6: Processes ───────────────────────────────────────────── onProgress({ phase: 'processes', percent: 94, message: 'Detecting execution flows...', stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount }, }); let symbolCount = 0; graph.forEachNode(n => { if (n.label !== 'File') symbolCount++; }); const dynamicMaxProcesses = Math.max(20, Math.min(300, Math.round(symbolCount / 10))); processResult = await processProcesses( graph, communityResult.memberships, (message, progress) => { const processProgress = 94 + (progress * 0.05); onProgress({ phase: 'processes', percent: Math.round(processProgress), message, stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount }, }); }, { maxProcesses: dynamicMaxProcesses, minSteps: 3 } ); if (isDev) { console.log(`🔄 Process detection: ${processResult.stats.totalProcesses} processes found (${processResult.stats.crossCommunityCount} cross-community)`); } processResult.processes.forEach(proc => { graph.addNode({ id: proc.id, label: 'Process' as const, properties: { name: proc.label, filePath: '', heuristicLabel: proc.heuristicLabel, processType: proc.processType, stepCount: proc.stepCount, communities: proc.communities, entryPointId: proc.entryPointId, terminalId: proc.terminalId, } }); }); processResult.steps.forEach(step => { graph.addRelationship({ id: `${step.nodeId}_step_${step.step}_${step.processId}`, type: 'STEP_IN_PROCESS', sourceId: step.nodeId, targetId: step.processId, confidence: 1.0, reason: 'trace-detection', step: step.step, }); }); } onProgress({ phase: 'complete', percent: 100, message: communityResult && processResult ? `Graph complete! ${communityResult.stats.totalCommunities} communities, ${processResult.stats.totalProcesses} processes detected.` : 'Graph complete! (graph phases skipped)', stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount }, }); astCache.clear(); return { graph, repoPath, totalFileCount: totalFiles, communityResult, processResult }; } catch (error) { cleanup(); throw error; } }; ================================================ FILE: gitnexus/src/core/ingestion/process-processor.ts ================================================ /** * Process Detection Processor * * Detects execution flows (Processes) in the code graph by: * 1. Finding entry points (functions with no internal callers) * 2. Tracing forward via CALLS edges (BFS) * 3. Grouping and deduplicating similar paths * 4. Labeling with heuristic names * * Processes help agents understand how features work through the codebase. */ import { KnowledgeGraph, GraphNode, GraphRelationship, NodeLabel } from '../graph/types.js'; import { CommunityMembership } from './community-processor.js'; import { calculateEntryPointScore, isTestFile } from './entry-point-scoring.js'; import { SupportedLanguages } from '../../config/supported-languages.js'; const isDev = process.env.NODE_ENV === 'development'; // ============================================================================ // CONFIGURATION // ============================================================================ export interface ProcessDetectionConfig { maxTraceDepth: number; // Maximum steps to trace (default: 10) maxBranching: number; // Max branches to follow per node (default: 3) maxProcesses: number; // Maximum processes to detect (default: 50) minSteps: number; // Minimum steps for a valid process (default: 2) } const DEFAULT_CONFIG: ProcessDetectionConfig = { maxTraceDepth: 10, maxBranching: 4, maxProcesses: 75, minSteps: 3, // 3+ steps = genuine multi-hop flow (2-step is just "A calls B") }; // ============================================================================ // TYPES // ============================================================================ export interface ProcessNode { id: string; // "proc_handleLogin_createSession" label: string; // "HandleLogin → CreateSession" heuristicLabel: string; processType: 'intra_community' | 'cross_community'; stepCount: number; communities: string[]; // Community IDs touched entryPointId: string; terminalId: string; trace: string[]; // Ordered array of node IDs } export interface ProcessStep { nodeId: string; processId: string; step: number; // 1-indexed position in trace } export interface ProcessDetectionResult { processes: ProcessNode[]; steps: ProcessStep[]; stats: { totalProcesses: number; crossCommunityCount: number; avgStepCount: number; entryPointsFound: number; }; } // ============================================================================ // MAIN PROCESSOR // ============================================================================ /** * Detect processes (execution flows) in the knowledge graph * * This runs AFTER community detection, using CALLS edges to trace flows. */ export const processProcesses = async ( knowledgeGraph: KnowledgeGraph, memberships: CommunityMembership[], onProgress?: (message: string, progress: number) => void, config: Partial<ProcessDetectionConfig> = {} ): Promise<ProcessDetectionResult> => { const cfg = { ...DEFAULT_CONFIG, ...config }; onProgress?.('Finding entry points...', 0); // Build lookup maps const membershipMap = new Map<string, string>(); memberships.forEach(m => membershipMap.set(m.nodeId, m.communityId)); const callsEdges = buildCallsGraph(knowledgeGraph); const reverseCallsEdges = buildReverseCallsGraph(knowledgeGraph); const nodeMap = new Map<string, GraphNode>(); for (const n of knowledgeGraph.iterNodes()) nodeMap.set(n.id, n); // Step 1: Find entry points (functions that call others but have few callers) const entryPoints = findEntryPoints(knowledgeGraph, reverseCallsEdges, callsEdges); onProgress?.(`Found ${entryPoints.length} entry points, tracing flows...`, 20); onProgress?.(`Found ${entryPoints.length} entry points, tracing flows...`, 20); // Step 2: Trace processes from each entry point const allTraces: string[][] = []; for (let i = 0; i < entryPoints.length && allTraces.length < cfg.maxProcesses * 2; i++) { const entryId = entryPoints[i]; const traces = traceFromEntryPoint(entryId, callsEdges, cfg); // Filter out traces that are too short traces.filter(t => t.length >= cfg.minSteps).forEach(t => allTraces.push(t)); if (i % 10 === 0) { onProgress?.(`Tracing entry point ${i + 1}/${entryPoints.length}...`, 20 + (i / entryPoints.length) * 40); } } onProgress?.(`Found ${allTraces.length} traces, deduplicating...`, 60); // Step 3: Deduplicate similar traces (subset removal) const uniqueTraces = deduplicateTraces(allTraces); // Step 3b: Deduplicate by entry+terminal pair (keep longest path per pair) const endpointDeduped = deduplicateByEndpoints(uniqueTraces); onProgress?.(`Deduped ${uniqueTraces.length} → ${endpointDeduped.length} unique endpoint pairs`, 70); // Step 4: Limit to max processes (prioritize longer traces) const limitedTraces = endpointDeduped .sort((a, b) => b.length - a.length) .slice(0, cfg.maxProcesses); onProgress?.(`Creating ${limitedTraces.length} process nodes...`, 80); // Step 5: Create process nodes const processes: ProcessNode[] = []; const steps: ProcessStep[] = []; limitedTraces.forEach((trace, idx) => { const entryPointId = trace[0]; const terminalId = trace[trace.length - 1]; // Get communities touched const communitiesSet = new Set<string>(); trace.forEach(nodeId => { const comm = membershipMap.get(nodeId); if (comm) communitiesSet.add(comm); }); const communities = Array.from(communitiesSet); // Determine process type const processType: 'intra_community' | 'cross_community' = communities.length > 1 ? 'cross_community' : 'intra_community'; // Generate label const entryNode = nodeMap.get(entryPointId); const terminalNode = nodeMap.get(terminalId); const entryName = entryNode?.properties.name || 'Unknown'; const terminalName = terminalNode?.properties.name || 'Unknown'; const heuristicLabel = `${capitalize(entryName)} → ${capitalize(terminalName)}`; const processId = `proc_${idx}_${sanitizeId(entryName)}`; processes.push({ id: processId, label: heuristicLabel, heuristicLabel, processType, stepCount: trace.length, communities, entryPointId, terminalId, trace, }); // Create step relationships trace.forEach((nodeId, stepIdx) => { steps.push({ nodeId, processId, step: stepIdx + 1, // 1-indexed }); }); }); onProgress?.('Process detection complete!', 100); // Calculate stats const crossCommunityCount = processes.filter(p => p.processType === 'cross_community').length; const avgStepCount = processes.length > 0 ? processes.reduce((sum, p) => sum + p.stepCount, 0) / processes.length : 0; return { processes, steps, stats: { totalProcesses: processes.length, crossCommunityCount, avgStepCount: Math.round(avgStepCount * 10) / 10, entryPointsFound: entryPoints.length, }, }; }; // ============================================================================ // HELPER: Build CALLS adjacency list // ============================================================================ type AdjacencyList = Map<string, string[]>; /** * Minimum edge confidence for process tracing. * Filters out ambiguous fuzzy-global matches (0.3) that cause * traces to jump across unrelated code areas. */ const MIN_TRACE_CONFIDENCE = 0.5; const buildCallsGraph = (graph: KnowledgeGraph): AdjacencyList => { const adj = new Map<string, string[]>(); for (const rel of graph.iterRelationships()) { if (rel.type === 'CALLS' && rel.confidence >= MIN_TRACE_CONFIDENCE) { if (!adj.has(rel.sourceId)) { adj.set(rel.sourceId, []); } adj.get(rel.sourceId)!.push(rel.targetId); } } return adj; }; const buildReverseCallsGraph = (graph: KnowledgeGraph): AdjacencyList => { const adj = new Map<string, string[]>(); for (const rel of graph.iterRelationships()) { if (rel.type === 'CALLS' && rel.confidence >= MIN_TRACE_CONFIDENCE) { if (!adj.has(rel.targetId)) { adj.set(rel.targetId, []); } adj.get(rel.targetId)!.push(rel.sourceId); } } return adj; }; /** * Find functions/methods that are good entry points for tracing. * * Entry points are scored based on: * 1. Call ratio (calls many, called by few) * 2. Export status (exported/public functions rank higher) * 3. Name patterns (handle*, on*, *Controller, etc.) * * Test files are excluded entirely. */ const findEntryPoints = ( graph: KnowledgeGraph, reverseCallsEdges: AdjacencyList, callsEdges: AdjacencyList ): string[] => { const symbolTypes = new Set<NodeLabel>(['Function', 'Method']); const entryPointCandidates: { id: string; score: number; reasons: string[]; }[] = []; for (const node of graph.iterNodes()) { if (!symbolTypes.has(node.label)) continue; const filePath = node.properties.filePath || ''; // Skip test files entirely if (isTestFile(filePath)) continue; const callers = reverseCallsEdges.get(node.id) || []; const callees = callsEdges.get(node.id) || []; // Must have at least 1 outgoing call to trace forward if (callees.length === 0) continue; // Calculate entry point score using new scoring system const { score: baseScore, reasons } = calculateEntryPointScore( node.properties.name, node.properties.language ?? SupportedLanguages.JavaScript, node.properties.isExported ?? false, callers.length, callees.length, filePath // Pass filePath for framework detection ); let score = baseScore; const astFrameworkMultiplier = node.properties.astFrameworkMultiplier ?? 1.0; if (astFrameworkMultiplier > 1.0) { score *= astFrameworkMultiplier; reasons.push(`framework-ast:${node.properties.astFrameworkReason || 'decorator'}`); } if (score > 0) { entryPointCandidates.push({ id: node.id, score, reasons }); } } // Sort by score descending and return top candidates const sorted = entryPointCandidates.sort((a, b) => b.score - a.score); // DEBUG: Log top candidates with new scoring details if (sorted.length > 0 && isDev) { console.log(`[Process] Top 10 entry point candidates (new scoring):`); sorted.slice(0, 10).forEach((c, i) => { const node = graph.getNode(c.id); const exported = node?.properties.isExported ? '✓' : '✗'; const shortPath = node?.properties.filePath?.split('/').slice(-2).join('/') || ''; console.log(` ${i+1}. ${node?.properties.name} [exported:${exported}] (${shortPath})`); console.log(` score: ${c.score.toFixed(2)} = [${c.reasons.join(' × ')}]`); }); } return sorted .slice(0, 200) // Limit to prevent explosion .map(c => c.id); }; // ============================================================================ // HELPER: Trace from entry point (BFS) // ============================================================================ /** * Trace forward from an entry point using BFS. * Returns all distinct paths up to maxDepth. */ const traceFromEntryPoint = ( entryId: string, callsEdges: AdjacencyList, config: ProcessDetectionConfig ): string[][] => { const traces: string[][] = []; // BFS with path tracking // Each queue item: [currentNodeId, pathSoFar] const queue: [string, string[]][] = [[entryId, [entryId]]]; while (queue.length > 0 && traces.length < config.maxBranching * 3) { const [currentId, path] = queue.shift()!; // Get outgoing calls const callees = callsEdges.get(currentId) || []; if (callees.length === 0) { // Terminal node - this is a complete trace if (path.length >= config.minSteps) { traces.push([...path]); } } else if (path.length >= config.maxTraceDepth) { // Max depth reached - save what we have if (path.length >= config.minSteps) { traces.push([...path]); } } else { // Continue tracing - limit branching const limitedCallees = callees.slice(0, config.maxBranching); let addedBranch = false; for (const calleeId of limitedCallees) { // Avoid cycles if (!path.includes(calleeId)) { queue.push([calleeId, [...path, calleeId]]); addedBranch = true; } } // If all branches were cycles, save current path as terminal if (!addedBranch && path.length >= config.minSteps) { traces.push([...path]); } } } return traces; }; // ============================================================================ // HELPER: Deduplicate traces // ============================================================================ /** * Merge traces that are subsets of other traces. * Keep longer traces, remove redundant shorter ones. */ const deduplicateTraces = (traces: string[][]): string[][] => { if (traces.length === 0) return []; // Sort by length descending const sorted = [...traces].sort((a, b) => b.length - a.length); const unique: string[][] = []; for (const trace of sorted) { // Check if this trace is a subset of any already-added trace const traceKey = trace.join('->'); const isSubset = unique.some(existing => { const existingKey = existing.join('->'); return existingKey.includes(traceKey); }); if (!isSubset) { unique.push(trace); } } return unique; }; // ============================================================================ // HELPER: Deduplicate by entry+terminal endpoints // ============================================================================ /** * Keep only the longest trace per unique entry→terminal pair. * Multiple paths between the same two endpoints are redundant for agents. */ const deduplicateByEndpoints = (traces: string[][]): string[][] => { if (traces.length === 0) return []; const byEndpoints = new Map<string, string[]>(); // Sort longest first so the first seen per key is the longest const sorted = [...traces].sort((a, b) => b.length - a.length); for (const trace of sorted) { const key = `${trace[0]}::${trace[trace.length - 1]}`; if (!byEndpoints.has(key)) { byEndpoints.set(key, trace); } } return Array.from(byEndpoints.values()); }; // ============================================================================ // HELPER: String utilities // ============================================================================ const capitalize = (s: string): string => { if (!s) return s; return s.charAt(0).toUpperCase() + s.slice(1); }; const sanitizeId = (s: string): string => { return s.replace(/[^a-zA-Z0-9]/g, '_').substring(0, 20).toLowerCase(); }; ================================================ FILE: gitnexus/src/core/ingestion/resolution-context.ts ================================================ /** * Resolution Context * * Single implementation of tiered name resolution. Replaces the duplicated * tier-selection logic previously split between symbol-resolver.ts and * call-processor.ts. * * Resolution tiers (highest confidence first): * 1. Same file (lookupExactFull — authoritative) * 2a-named. Named binding chain (walkBindingChain via NamedImportMap) * 2a. Import-scoped (lookupFuzzy filtered by ImportMap) * 2b. Package-scoped (lookupFuzzy filtered by PackageMap) * 3. Global (all candidates — consumers must check candidate count) */ import type { SymbolTable, SymbolDefinition } from './symbol-table.js'; import { createSymbolTable } from './symbol-table.js'; import type { NamedImportBinding } from './import-processor.js'; import { isFileInPackageDir } from './import-processor.js'; import { walkBindingChain } from './named-binding-extraction.js'; /** Resolution tier for tracking, logging, and test assertions. */ export type ResolutionTier = 'same-file' | 'import-scoped' | 'global'; /** Tier-selected candidates with metadata. */ export interface TieredCandidates { readonly candidates: readonly SymbolDefinition[]; readonly tier: ResolutionTier; } /** Confidence scores per resolution tier. */ export const TIER_CONFIDENCE: Record<ResolutionTier, number> = { 'same-file': 0.95, 'import-scoped': 0.9, 'global': 0.5, }; // --- Map types --- export type ImportMap = Map<string, Set<string>>; export type PackageMap = Map<string, Set<string>>; export type NamedImportMap = Map<string, Map<string, NamedImportBinding>>; export interface ResolutionContext { /** * The only resolution API. Returns all candidates at the winning tier. * * Tier 3 ('global') returns ALL candidates regardless of count — * consumers must check candidates.length and refuse ambiguous matches. */ resolve(name: string, fromFile: string): TieredCandidates | null; // --- Data access (for pipeline wiring, not resolution) --- /** Symbol table — used by parsing-processor to populate symbols. */ readonly symbols: SymbolTable; /** Raw maps — used by import-processor to populate import data. */ readonly importMap: ImportMap; readonly packageMap: PackageMap; readonly namedImportMap: NamedImportMap; // --- Per-file cache lifecycle --- enableCache(filePath: string): void; clearCache(): void; // --- Operational --- getStats(): { fileCount: number; globalSymbolCount: number; cacheHits: number; cacheMisses: number }; clear(): void; } export const createResolutionContext = (): ResolutionContext => { const symbols = createSymbolTable(); const importMap: ImportMap = new Map(); const packageMap: PackageMap = new Map(); const namedImportMap: NamedImportMap = new Map(); // Per-file cache state let cacheFile: string | null = null; let cache: Map<string, TieredCandidates | null> | null = null; let cacheHits = 0; let cacheMisses = 0; // --- Core resolution (single implementation of tier logic) --- const resolveUncached = (name: string, fromFile: string): TieredCandidates | null => { // Tier 1: Same file — authoritative match (returns all overloads) const localDefs = symbols.lookupExactAll(fromFile, name); if (localDefs.length > 0) { return { candidates: localDefs, tier: 'same-file' }; } // Get all global definitions for subsequent tiers const allDefs = symbols.lookupFuzzy(name); // Tier 2a-named: Check named bindings BEFORE empty-allDefs early return // because aliased imports mean lookupFuzzy('U') returns empty but we // can resolve via the exported name. const chainResult = walkBindingChain(name, fromFile, symbols, namedImportMap, allDefs); if (chainResult && chainResult.length > 0) { return { candidates: chainResult, tier: 'import-scoped' }; } if (allDefs.length === 0) return null; // Tier 2a: Import-scoped — definition in a file imported by fromFile const importedFiles = importMap.get(fromFile); if (importedFiles) { const importedDefs = allDefs.filter(def => importedFiles.has(def.filePath)); if (importedDefs.length > 0) { return { candidates: importedDefs, tier: 'import-scoped' }; } } // Tier 2b: Package-scoped — definition in a package dir imported by fromFile const importedPackages = packageMap.get(fromFile); if (importedPackages) { const packageDefs = allDefs.filter(def => { for (const dirSuffix of importedPackages) { if (isFileInPackageDir(def.filePath, dirSuffix)) return true; } return false; }); if (packageDefs.length > 0) { return { candidates: packageDefs, tier: 'import-scoped' }; } } // Tier 3: Global — pass all candidates through. // Consumers must check candidate count and refuse ambiguous matches. return { candidates: allDefs, tier: 'global' }; }; const resolve = (name: string, fromFile: string): TieredCandidates | null => { // Check cache (only when enabled AND fromFile matches cached file) if (cache && cacheFile === fromFile) { if (cache.has(name)) { cacheHits++; return cache.get(name)!; } cacheMisses++; } const result = resolveUncached(name, fromFile); // Store in cache if active and file matches if (cache && cacheFile === fromFile) { cache.set(name, result); } return result; }; // --- Cache lifecycle --- const enableCache = (filePath: string): void => { cacheFile = filePath; if (!cache) cache = new Map(); else cache.clear(); }; const clearCache = (): void => { cacheFile = null; // Reuse the Map instance — just clear entries to reduce GC pressure at scale. cache?.clear(); }; const getStats = () => ({ ...symbols.getStats(), cacheHits, cacheMisses, }); const clear = (): void => { symbols.clear(); importMap.clear(); packageMap.clear(); namedImportMap.clear(); clearCache(); cacheHits = 0; cacheMisses = 0; }; return { resolve, symbols, importMap, packageMap, namedImportMap, enableCache, clearCache, getStats, clear, }; }; ================================================ FILE: gitnexus/src/core/ingestion/resolvers/csharp.ts ================================================ /** * C# namespace import resolution. * Handles using-directive resolution via .csproj root namespace stripping. */ import type { SuffixIndex } from './utils.js'; import { suffixResolve } from './utils.js'; /** C# project config parsed from .csproj files */ export interface CSharpProjectConfig { /** Root namespace from <RootNamespace> or assembly name (default: project directory name) */ rootNamespace: string; /** Directory containing the .csproj file */ projectDir: string; } /** * Resolve a C# using-directive import path to matching .cs files. * Tries single-file match first, then directory match for namespace imports. */ export function resolveCSharpImport( importPath: string, csharpConfigs: CSharpProjectConfig[], normalizedFileList: string[], allFileList: string[], index?: SuffixIndex, ): string[] { const namespacePath = importPath.replace(/\./g, '/'); const results: string[] = []; for (const config of csharpConfigs) { const nsPath = config.rootNamespace.replace(/\./g, '/'); let relative: string; if (namespacePath.startsWith(nsPath + '/')) { relative = namespacePath.slice(nsPath.length + 1); } else if (namespacePath === nsPath) { // The import IS the root namespace — resolve to all .cs files in project root relative = ''; } else { continue; } const dirPrefix = config.projectDir ? (relative ? config.projectDir + '/' + relative : config.projectDir) : relative; // 1. Try as single file: relative.cs (e.g., "Models/DlqMessage.cs") if (relative) { const candidate = dirPrefix + '.cs'; if (index) { const result = index.get(candidate) || index.getInsensitive(candidate); if (result) return [result]; } // Also try suffix match const suffixResult = index?.get(relative + '.cs') || index?.getInsensitive(relative + '.cs'); if (suffixResult) return [suffixResult]; } // 2. Try as directory: all .cs files directly inside (namespace import) if (index) { const dirFiles = index.getFilesInDir(dirPrefix, '.cs'); for (const f of dirFiles) { const normalized = f.replace(/\\/g, '/'); // Check it's a direct child by finding the dirPrefix and ensuring no deeper slashes const prefixIdx = normalized.indexOf(dirPrefix + '/'); if (prefixIdx < 0) continue; const afterDir = normalized.substring(prefixIdx + dirPrefix.length + 1); if (!afterDir.includes('/')) { results.push(f); } } if (results.length > 0) return results; } // 3. Linear scan fallback for directory matching if (results.length === 0) { const dirTrail = dirPrefix + '/'; for (let i = 0; i < normalizedFileList.length; i++) { const normalized = normalizedFileList[i]; if (!normalized.endsWith('.cs')) continue; const prefixIdx = normalized.indexOf(dirTrail); if (prefixIdx < 0) continue; const afterDir = normalized.substring(prefixIdx + dirTrail.length); if (!afterDir.includes('/')) { results.push(allFileList[i]); } } if (results.length > 0) return results; } } // Fallback: suffix matching without namespace stripping (single file) const pathParts = namespacePath.split('/').filter(Boolean); const fallback = suffixResolve(pathParts, normalizedFileList, allFileList, index); return fallback ? [fallback] : []; } /** * Compute the directory suffix for a C# namespace import (for PackageMap). * Returns a suffix like "/ProjectDir/Models/" or null if no config matches. */ export function resolveCSharpNamespaceDir( importPath: string, csharpConfigs: CSharpProjectConfig[], ): string | null { const namespacePath = importPath.replace(/\./g, '/'); for (const config of csharpConfigs) { const nsPath = config.rootNamespace.replace(/\./g, '/'); let relative: string; if (namespacePath.startsWith(nsPath + '/')) { relative = namespacePath.slice(nsPath.length + 1); } else if (namespacePath === nsPath) { relative = ''; } else { continue; } const dirPrefix = config.projectDir ? (relative ? config.projectDir + '/' + relative : config.projectDir) : relative; if (!dirPrefix) continue; return '/' + dirPrefix + '/'; } return null; } ================================================ FILE: gitnexus/src/core/ingestion/resolvers/go.ts ================================================ /** * Go package import resolution. * Handles Go module path-based package imports. */ /** Go module config parsed from go.mod */ export interface GoModuleConfig { /** Module path (e.g., "github.com/user/repo") */ modulePath: string; } /** * Extract the package directory suffix from a Go import path. * Returns the suffix string (e.g., "/internal/auth/") or null if invalid. */ export function resolveGoPackageDir( importPath: string, goModule: GoModuleConfig, ): string | null { if (!importPath.startsWith(goModule.modulePath)) return null; const relativePkg = importPath.slice(goModule.modulePath.length + 1); if (!relativePkg) return null; return '/' + relativePkg + '/'; } /** * Resolve a Go internal package import to all .go files in the package directory. * Returns an array of file paths. */ export function resolveGoPackage( importPath: string, goModule: GoModuleConfig, normalizedFileList: string[], allFileList: string[], ): string[] { if (!importPath.startsWith(goModule.modulePath)) return []; // Strip module path to get relative package path const relativePkg = importPath.slice(goModule.modulePath.length + 1); // e.g., "internal/auth" if (!relativePkg) return []; const pkgSuffix = '/' + relativePkg + '/'; const matches: string[] = []; for (let i = 0; i < normalizedFileList.length; i++) { // Prepend '/' so paths like "internal/auth/service.go" match suffix "/internal/auth/" const normalized = '/' + normalizedFileList[i]; // File must be directly in the package directory (not a subdirectory) if (normalized.includes(pkgSuffix) && normalized.endsWith('.go') && !normalized.endsWith('_test.go')) { const afterPkg = normalized.substring(normalized.indexOf(pkgSuffix) + pkgSuffix.length); if (!afterPkg.includes('/')) { matches.push(allFileList[i]); } } } return matches; } ================================================ FILE: gitnexus/src/core/ingestion/resolvers/index.ts ================================================ /** * Language-specific import resolvers. * Extracted from import-processor.ts for maintainability. */ export { EXTENSIONS, tryResolveWithExtensions, buildSuffixIndex, suffixResolve } from './utils.js'; export type { SuffixIndex } from './utils.js'; export { KOTLIN_EXTENSIONS, appendKotlinWildcard, resolveJvmWildcard, resolveJvmMemberImport } from './jvm.js'; export { resolveGoPackageDir, resolveGoPackage } from './go.js'; export type { GoModuleConfig } from './go.js'; export { resolveCSharpImport, resolveCSharpNamespaceDir } from './csharp.js'; export type { CSharpProjectConfig } from './csharp.js'; export { resolvePhpImport } from './php.js'; export type { ComposerConfig } from './php.js'; export { resolveRustImport, tryRustModulePath } from './rust.js'; export { resolveRubyImport } from './ruby.js'; export { resolvePythonImport } from './python.js'; export { resolveImportPath, RESOLVE_CACHE_CAP } from './standard.js'; export type { TsconfigPaths } from './standard.js'; ================================================ FILE: gitnexus/src/core/ingestion/resolvers/jvm.ts ================================================ /** * JVM import resolution (Java + Kotlin). * Handles wildcard imports, member/static imports, and Kotlin-specific patterns. */ import type { SuffixIndex } from './utils.js'; /** Kotlin file extensions for JVM resolver reuse */ export const KOTLIN_EXTENSIONS: readonly string[] = ['.kt', '.kts']; /** * Append .* to a Kotlin import path if the AST has a wildcard_import sibling node. * Pure function — returns a new string without mutating the input. */ export const appendKotlinWildcard = (importPath: string, importNode: any): string => { for (let i = 0; i < importNode.childCount; i++) { if (importNode.child(i)?.type === 'wildcard_import') { return importPath.endsWith('.*') ? importPath : `${importPath}.*`; } } return importPath; }; /** * Resolve a JVM wildcard import (com.example.*) to all matching files. * Works for both Java (.java) and Kotlin (.kt, .kts). */ export function resolveJvmWildcard( importPath: string, normalizedFileList: string[], allFileList: string[], extensions: readonly string[], index?: SuffixIndex, ): string[] { // "com.example.util.*" -> "com/example/util" const packagePath = importPath.slice(0, -2).replace(/\./g, '/'); if (index) { const candidates = extensions.flatMap(ext => index.getFilesInDir(packagePath, ext)); // Filter to only direct children (no subdirectories) const packageSuffix = '/' + packagePath + '/'; return candidates.filter(f => { const normalized = f.replace(/\\/g, '/'); const idx = normalized.indexOf(packageSuffix); if (idx < 0) return false; const afterPkg = normalized.substring(idx + packageSuffix.length); return !afterPkg.includes('/'); }); } // Fallback: linear scan const packageSuffix = '/' + packagePath + '/'; const matches: string[] = []; for (let i = 0; i < normalizedFileList.length; i++) { const normalized = normalizedFileList[i]; if (normalized.includes(packageSuffix) && extensions.some(ext => normalized.endsWith(ext))) { const afterPackage = normalized.substring(normalized.indexOf(packageSuffix) + packageSuffix.length); if (!afterPackage.includes('/')) { matches.push(allFileList[i]); } } } return matches; } /** * Try to resolve a JVM member/static import by stripping the member name. * Java: "com.example.Constants.VALUE" -> resolve "com.example.Constants" * Kotlin: "com.example.Constants.VALUE" -> resolve "com.example.Constants" */ export function resolveJvmMemberImport( importPath: string, normalizedFileList: string[], allFileList: string[], extensions: readonly string[], index?: SuffixIndex, ): string | null { // Member imports: com.example.Constants.VALUE or com.example.Constants.* // The last segment is a member name if it starts with lowercase, is ALL_CAPS, or is a wildcard const segments = importPath.split('.'); if (segments.length < 3) return null; const lastSeg = segments[segments.length - 1]; if (lastSeg === '*' || /^[a-z]/.test(lastSeg) || /^[A-Z_]+$/.test(lastSeg)) { const classPath = segments.slice(0, -1).join('/'); for (const ext of extensions) { const classSuffix = classPath + ext; if (index) { const result = index.get(classSuffix) || index.getInsensitive(classSuffix); if (result) return result; } else { const fullSuffix = '/' + classSuffix; for (let i = 0; i < normalizedFileList.length; i++) { if (normalizedFileList[i].endsWith(fullSuffix) || normalizedFileList[i].toLowerCase().endsWith(fullSuffix.toLowerCase())) { return allFileList[i]; } } } } } return null; } ================================================ FILE: gitnexus/src/core/ingestion/resolvers/php.ts ================================================ /** * PHP PSR-4 import resolution. * Handles use-statement resolution via composer.json autoload mappings. */ import type { SuffixIndex } from './utils.js'; import { suffixResolve } from './utils.js'; /** PHP Composer PSR-4 autoload config */ export interface ComposerConfig { /** Map of namespace prefix -> directory (e.g., "App\\" -> "app/") */ psr4: Map<string, string>; } /** * Resolve a PHP use-statement import path using PSR-4 mappings. * e.g. "App\Http\Controllers\UserController" -> "app/Http/Controllers/UserController.php" */ export function resolvePhpImport( importPath: string, composerConfig: ComposerConfig | null, allFiles: Set<string>, normalizedFileList: string[], allFileList: string[], index?: SuffixIndex, ): string | null { // Normalize: replace backslashes with forward slashes const normalized = importPath.replace(/\\/g, '/'); // Try PSR-4 resolution if composer.json was found if (composerConfig) { // Sort namespaces by length descending (longest match wins) const sorted = [...composerConfig.psr4.entries()].sort((a, b) => b[0].length - a[0].length); for (const [nsPrefix, dirPrefix] of sorted) { const nsPrefixSlash = nsPrefix.replace(/\\/g, '/'); if (normalized.startsWith(nsPrefixSlash + '/') || normalized === nsPrefixSlash) { const remainder = normalized.slice(nsPrefixSlash.length).replace(/^\//, ''); const filePath = dirPrefix + (remainder ? '/' + remainder : '') + '.php'; if (allFiles.has(filePath)) return filePath; if (index) { const result = index.getInsensitive(filePath); if (result) return result; } } } } // Fallback: suffix matching (works without composer.json) const pathParts = normalized.split('/').filter(Boolean); return suffixResolve(pathParts, normalizedFileList, allFileList, index); } ================================================ FILE: gitnexus/src/core/ingestion/resolvers/python.ts ================================================ /** * Python import resolution — PEP 328 relative imports and proximity-based bare imports. * Import system spec: PEP 302 (original), PEP 451 (current). */ import { tryResolveWithExtensions } from './utils.js'; /** * Resolve a Python import to a file path. * * 1. Relative (PEP 328): `.module`, `..module` — 1 dot = current package, each extra dot goes up one level. * 2. Proximity bare import: static heuristic — checks the importer's own directory first. * Approximates the common case where co-located files find each other without an installed package. * Single-segment only — multi-segment (e.g. `os.path`) falls through to suffixResolve. * Checks package (__init__.py) before module (.py), matching CPython's finder order (PEP 451 §4). * Coexistence of both is physically impossible (same name = file vs directory), so the order * only matters for spec compliance. * Note: namespace packages (PEP 420, directory without __init__.py) are not handled. * * Returns null to let the caller fall through to suffixResolve. */ export function resolvePythonImport( currentFile: string, importPath: string, allFiles: Set<string>, ): string | null { // Relative import — PEP 328 (https://peps.python.org/pep-0328/) if (importPath.startsWith('.')) { const dotMatch = importPath.match(/^(\.+)(.*)/); if (!dotMatch) return null; const dotCount = dotMatch[1].length; const modulePart = dotMatch[2]; const dirParts = currentFile.split('/').slice(0, -1); // PEP 328: more dots than directory levels → beyond top-level package → invalid if (dotCount - 1 > dirParts.length) return null; for (let i = 1; i < dotCount; i++) dirParts.pop(); if (modulePart) { dirParts.push(...modulePart.replace(/\./g, '/').split('/')); } return tryResolveWithExtensions(dirParts.join('/'), allFiles); } // Proximity bare import — single-segment only; package before module (PEP 451 §4) const pathLike = importPath.replace(/\./g, '/'); if (pathLike.includes('/')) return null; // Normalize for Windows backslashes const importerDir = currentFile.replace(/\\/g, '/').split('/').slice(0, -1).join('/'); if (!importerDir) return null; if (allFiles.has(`${importerDir}/${pathLike}/__init__.py`)) return `${importerDir}/${pathLike}/__init__.py`; if (allFiles.has(`${importerDir}/${pathLike}.py`)) return `${importerDir}/${pathLike}.py`; return null; } ================================================ FILE: gitnexus/src/core/ingestion/resolvers/ruby.ts ================================================ /** * Ruby require/require_relative import resolution. * Handles path resolution for Ruby's require and require_relative calls. */ import type { SuffixIndex } from './utils.js'; import { suffixResolve } from './utils.js'; /** * Resolve a Ruby require/require_relative path to a matching .rb file. * * require_relative paths are pre-normalized to './' prefix by the caller. * require paths use suffix matching (gem-style paths like 'json', 'net/http'). */ export function resolveRubyImport( importPath: string, normalizedFileList: string[], allFileList: string[], index?: SuffixIndex, ): string | null { const pathParts = importPath.replace(/^\.\//, '').split('/').filter(Boolean); return suffixResolve(pathParts, normalizedFileList, allFileList, index); } ================================================ FILE: gitnexus/src/core/ingestion/resolvers/rust.ts ================================================ /** * Rust module import resolution. * Handles crate::, super::, self:: prefix paths and :: separators. */ /** * Resolve Rust use-path to a file. * Handles crate::, super::, self:: prefixes and :: path separators. */ export function resolveRustImport( currentFile: string, importPath: string, allFiles: Set<string>, ): string | null { let rustPath: string; if (importPath.startsWith('crate::')) { // crate:: resolves from src/ directory (standard Rust layout) rustPath = importPath.slice(7).replace(/::/g, '/'); // Try from src/ (standard layout) const fromSrc = tryRustModulePath('src/' + rustPath, allFiles); if (fromSrc) return fromSrc; // Try from repo root (non-standard) const fromRoot = tryRustModulePath(rustPath, allFiles); if (fromRoot) return fromRoot; return null; } if (importPath.startsWith('super::')) { // super:: = parent directory of current file's module const currentDir = currentFile.split('/').slice(0, -1); currentDir.pop(); // Go up one level for super:: rustPath = importPath.slice(7).replace(/::/g, '/'); const fullPath = [...currentDir, rustPath].join('/'); return tryRustModulePath(fullPath, allFiles); } if (importPath.startsWith('self::')) { // self:: = current module's directory const currentDir = currentFile.split('/').slice(0, -1); rustPath = importPath.slice(6).replace(/::/g, '/'); const fullPath = [...currentDir, rustPath].join('/'); return tryRustModulePath(fullPath, allFiles); } // Bare path without prefix (e.g., from a use in a nested module) // Convert :: to / and try suffix matching if (importPath.includes('::')) { rustPath = importPath.replace(/::/g, '/'); return tryRustModulePath(rustPath, allFiles); } return null; } /** * Try to resolve a Rust module path to a file. * Tries: path.rs, path/mod.rs, and with the last segment stripped * (last segment might be a symbol name, not a module). */ export function tryRustModulePath(modulePath: string, allFiles: Set<string>): string | null { // Try direct: path.rs if (allFiles.has(modulePath + '.rs')) return modulePath + '.rs'; // Try directory: path/mod.rs if (allFiles.has(modulePath + '/mod.rs')) return modulePath + '/mod.rs'; // Try path/lib.rs (for crate root) if (allFiles.has(modulePath + '/lib.rs')) return modulePath + '/lib.rs'; // The last segment might be a symbol (function, struct, etc.), not a module. // Strip it and try again. const lastSlash = modulePath.lastIndexOf('/'); if (lastSlash > 0) { const parentPath = modulePath.substring(0, lastSlash); if (allFiles.has(parentPath + '.rs')) return parentPath + '.rs'; if (allFiles.has(parentPath + '/mod.rs')) return parentPath + '/mod.rs'; } return null; } ================================================ FILE: gitnexus/src/core/ingestion/resolvers/standard.ts ================================================ /** * Standard import path resolution. * Handles relative imports, path alias rewriting, and generic suffix matching. * Used as the fallback when language-specific resolvers don't match. */ import type { SuffixIndex } from './utils.js'; import { tryResolveWithExtensions, suffixResolve } from './utils.js'; import { resolveRustImport } from './rust.js'; import { SupportedLanguages } from '../../../config/supported-languages.js'; /** TypeScript path alias config parsed from tsconfig.json */ export interface TsconfigPaths { /** Map of alias prefix -> target prefix (e.g., "@/" -> "src/") */ aliases: Map<string, string>; /** Base URL for path resolution (relative to repo root) */ baseUrl: string; } /** Max entries in the resolve cache. Beyond this, entries are evicted. * 100K entries ≈ 15MB — covers the most common import patterns. */ export const RESOLVE_CACHE_CAP = 100_000; /** * Resolve an import path to a file path in the repository. * * Language-specific preprocessing is applied before the generic resolution: * - TypeScript/JavaScript: rewrites tsconfig path aliases * - Rust: converts crate::/super::/self:: to relative paths * * Java wildcards and Go package imports are handled separately in processImports * because they resolve to multiple files. */ export const resolveImportPath = ( currentFile: string, importPath: string, allFiles: Set<string>, allFileList: string[], normalizedFileList: string[], resolveCache: Map<string, string | null>, language: SupportedLanguages, tsconfigPaths: TsconfigPaths | null, index?: SuffixIndex, ): string | null => { const cacheKey = `${currentFile}::${importPath}`; if (resolveCache.has(cacheKey)) return resolveCache.get(cacheKey) ?? null; const cache = (result: string | null): string | null => { // Evict oldest 20% when cap is reached instead of clearing all if (resolveCache.size >= RESOLVE_CACHE_CAP) { const evictCount = Math.floor(RESOLVE_CACHE_CAP * 0.2); const iter = resolveCache.keys(); for (let i = 0; i < evictCount; i++) { const key = iter.next().value; if (key !== undefined) resolveCache.delete(key); } } resolveCache.set(cacheKey, result); return result; }; // ---- TypeScript/JavaScript: rewrite path aliases ---- if ( (language === SupportedLanguages.TypeScript || language === SupportedLanguages.JavaScript) && tsconfigPaths && !importPath.startsWith('.') ) { for (const [aliasPrefix, targetPrefix] of tsconfigPaths.aliases) { if (importPath.startsWith(aliasPrefix)) { const remainder = importPath.slice(aliasPrefix.length); // Build the rewritten path relative to baseUrl const rewritten = tsconfigPaths.baseUrl === '.' ? targetPrefix + remainder : tsconfigPaths.baseUrl + '/' + targetPrefix + remainder; // Try direct resolution from repo root const resolved = tryResolveWithExtensions(rewritten, allFiles); if (resolved) return cache(resolved); // Try suffix matching as fallback const parts = rewritten.split('/').filter(Boolean); const suffixResult = suffixResolve(parts, normalizedFileList, allFileList, index); if (suffixResult) return cache(suffixResult); } } } // ---- Rust: convert module path syntax to file paths ---- if (language === SupportedLanguages.Rust) { // Handle grouped imports: use crate::module::{Foo, Bar, Baz} // Extract the prefix path before ::{...} and resolve the module, not the symbols let rustImportPath = importPath; const braceIdx = importPath.indexOf('::{'); if (braceIdx !== -1) { rustImportPath = importPath.substring(0, braceIdx); } else if (importPath.startsWith('{') && importPath.endsWith('}')) { // Top-level grouped imports: use {crate::a, crate::b} // Iterate each part and return the first that resolves. This function returns a single // string, so callers that need ALL edges must intercept before reaching here (see the // Rust grouped-import blocks in processImports / processImportsBatch). This fallback // handles any path that reaches resolveImportPath directly. const inner = importPath.slice(1, -1); const parts = inner.split(',').map(p => p.trim()).filter(Boolean); for (const part of parts) { const partResult = resolveRustImport(currentFile, part, allFiles); if (partResult) return cache(partResult); } return cache(null); } const rustResult = resolveRustImport(currentFile, rustImportPath, allFiles); if (rustResult) return cache(rustResult); // Fall through to generic resolution if Rust-specific didn't match } // ---- Generic relative import resolution (./ and ../) ---- const currentDir = currentFile.split('/').slice(0, -1); const parts = importPath.split('/'); for (const part of parts) { if (part === '.') continue; if (part === '..') { currentDir.pop(); } else { currentDir.push(part); } } const basePath = currentDir.join('/'); if (importPath.startsWith('.')) { const resolved = tryResolveWithExtensions(basePath, allFiles); return cache(resolved); } // ---- Generic package/absolute import resolution (suffix matching) ---- // Java wildcards are handled in processImports, not here if (importPath.endsWith('.*')) { return cache(null); } // C/C++ includes use actual file paths (e.g. "animal.h") — don't convert dots to slashes const isCpp = language === SupportedLanguages.C || language === SupportedLanguages.CPlusPlus; const pathLike = importPath.includes('/') || isCpp ? importPath : importPath.replace(/\./g, '/'); const pathParts = pathLike.split('/').filter(Boolean); const resolved = suffixResolve(pathParts, normalizedFileList, allFileList, index); return cache(resolved); }; ================================================ FILE: gitnexus/src/core/ingestion/resolvers/utils.ts ================================================ /** * Shared utilities for import resolution. * Extracted from import-processor.ts to reduce file size. */ /** All file extensions to try during resolution */ export const EXTENSIONS = [ '', // TypeScript/JavaScript '.tsx', '.ts', '.jsx', '.js', '/index.tsx', '/index.ts', '/index.jsx', '/index.js', // Python '.py', '/__init__.py', // Java '.java', // Kotlin '.kt', '.kts', // C/C++ '.c', '.h', '.cpp', '.hpp', '.cc', '.cxx', '.hxx', '.hh', // C# '.cs', // Go '.go', // Rust '.rs', '/mod.rs', // PHP '.php', '.phtml', // Swift '.swift', // Ruby '.rb', ]; /** * Try to match a path (with extensions) against the known file set. * Returns the matched file path or null. */ export function tryResolveWithExtensions( basePath: string, allFiles: Set<string>, ): string | null { for (const ext of EXTENSIONS) { const candidate = basePath + ext; if (allFiles.has(candidate)) return candidate; } return null; } /** * Build a suffix index for O(1) endsWith lookups. * Maps every possible path suffix to its original file path. * e.g. for "src/com/example/Foo.java": * "Foo.java" -> "src/com/example/Foo.java" * "example/Foo.java" -> "src/com/example/Foo.java" * "com/example/Foo.java" -> "src/com/example/Foo.java" * etc. */ export interface SuffixIndex { /** Exact suffix lookup (case-sensitive) */ get(suffix: string): string | undefined; /** Case-insensitive suffix lookup */ getInsensitive(suffix: string): string | undefined; /** Get all files in a directory suffix */ getFilesInDir(dirSuffix: string, extension: string): string[]; } export function buildSuffixIndex(normalizedFileList: string[], allFileList: string[]): SuffixIndex { // Map: normalized suffix -> original file path const exactMap = new Map<string, string>(); // Map: lowercase suffix -> original file path const lowerMap = new Map<string, string>(); // Map: directory suffix -> list of file paths in that directory const dirMap = new Map<string, string[]>(); for (let i = 0; i < normalizedFileList.length; i++) { const normalized = normalizedFileList[i]; const original = allFileList[i]; const parts = normalized.split('/'); // Index all suffixes: "a/b/c.java" -> ["c.java", "b/c.java", "a/b/c.java"] for (let j = parts.length - 1; j >= 0; j--) { const suffix = parts.slice(j).join('/'); // Only store first match (longest path wins for ambiguous suffixes) if (!exactMap.has(suffix)) { exactMap.set(suffix, original); } const lower = suffix.toLowerCase(); if (!lowerMap.has(lower)) { lowerMap.set(lower, original); } } // Index directory membership const lastSlash = normalized.lastIndexOf('/'); if (lastSlash >= 0) { // Build all directory suffixes const dirParts = parts.slice(0, -1); const fileName = parts[parts.length - 1]; const ext = fileName.substring(fileName.lastIndexOf('.')); for (let j = dirParts.length - 1; j >= 0; j--) { const dirSuffix = dirParts.slice(j).join('/'); const key = `${dirSuffix}:${ext}`; let list = dirMap.get(key); if (!list) { list = []; dirMap.set(key, list); } list.push(original); } } } return { get: (suffix: string) => exactMap.get(suffix), getInsensitive: (suffix: string) => lowerMap.get(suffix.toLowerCase()), getFilesInDir: (dirSuffix: string, extension: string) => { return dirMap.get(`${dirSuffix}:${extension}`) || []; }, }; } /** * Suffix-based resolution using index. O(1) per lookup instead of O(files). */ export function suffixResolve( pathParts: string[], normalizedFileList: string[], allFileList: string[], index?: SuffixIndex, ): string | null { if (index) { for (let i = 0; i < pathParts.length; i++) { const suffix = pathParts.slice(i).join('/'); for (const ext of EXTENSIONS) { const suffixWithExt = suffix + ext; const result = index.get(suffixWithExt) || index.getInsensitive(suffixWithExt); if (result) return result; } } return null; } // Fallback: linear scan (for backward compatibility) for (let i = 0; i < pathParts.length; i++) { const suffix = pathParts.slice(i).join('/'); for (const ext of EXTENSIONS) { const suffixWithExt = suffix + ext; const suffixPattern = '/' + suffixWithExt; const matchIdx = normalizedFileList.findIndex(filePath => filePath.endsWith(suffixPattern) || filePath.toLowerCase().endsWith(suffixPattern.toLowerCase()) ); if (matchIdx !== -1) { return allFileList[matchIdx]; } } } return null; } ================================================ FILE: gitnexus/src/core/ingestion/structure-processor.ts ================================================ import { generateId } from "../../lib/utils.js"; import { KnowledgeGraph, GraphNode, GraphRelationship } from "../graph/types.js"; export const processStructure = ( graph: KnowledgeGraph, paths: string[])=>{ paths.forEach( path => { const parts = path.split('/') let currentPath = '' let parentId = '' parts.forEach( (part, index ) => { const isFile = index === parts.length - 1 const label = isFile ? 'File' : 'Folder' currentPath = currentPath ? `${currentPath}/${part}` : part const nodeId=generateId(label, currentPath) const node: GraphNode = { id: nodeId, label: label, properties: { name: part, filePath: currentPath } } graph.addNode(node) if(parentId){ const relId = generateId('CONTAINS', `${parentId}->${nodeId}`) const relationship: GraphRelationship={ id: relId, type: 'CONTAINS', sourceId: parentId, targetId: nodeId, confidence: 1.0, reason: '', } graph.addRelationship(relationship) } parentId = nodeId }) }) } ================================================ FILE: gitnexus/src/core/ingestion/symbol-table.ts ================================================ import type { NodeLabel } from '../graph/types.js'; export interface SymbolDefinition { nodeId: string; filePath: string; type: NodeLabel; parameterCount?: number; /** Number of required (non-optional, non-default) parameters. * Enables range-based arity filtering: argCount >= requiredParameterCount && argCount <= parameterCount. */ requiredParameterCount?: number; /** Per-parameter type names for overload disambiguation (e.g. ['int', 'String']). * Populated when parameter types are resolvable from AST (any typed language). * Used for disambiguation in overloading languages (Java, Kotlin, C#, C++). */ parameterTypes?: string[]; /** Raw return type text extracted from AST (e.g. 'User', 'Promise<User>') */ returnType?: string; /** Declared type for non-callable symbols — fields/properties (e.g. 'Address', 'List<User>') */ declaredType?: string; /** Links Method/Constructor/Property to owning Class/Struct/Trait nodeId */ ownerId?: string; } export interface SymbolTable { /** * Register a new symbol definition */ add: ( filePath: string, name: string, nodeId: string, type: NodeLabel, metadata?: { parameterCount?: number; requiredParameterCount?: number; parameterTypes?: string[]; returnType?: string; declaredType?: string; ownerId?: string } ) => void; /** * High Confidence: Look for a symbol specifically inside a file * Returns the Node ID if found */ lookupExact: (filePath: string, name: string) => string | undefined; /** * High Confidence: Look for a symbol in a specific file, returning full definition. * Includes type information needed for heritage resolution (Class vs Interface). * Returns first matching definition — use lookupExactAll for overloaded methods. */ lookupExactFull: (filePath: string, name: string) => SymbolDefinition | undefined; /** * High Confidence: Look for ALL symbols with this name in a specific file. * Returns all definitions, including overloaded methods with the same name. * Used by resolution-context to pass all same-file overloads to candidate filtering. */ lookupExactAll: (filePath: string, name: string) => SymbolDefinition[]; /** * Low Confidence: Look for a symbol anywhere in the project * Used when imports are missing or for framework magic */ lookupFuzzy: (name: string) => SymbolDefinition[]; /** * Low Confidence: Look for callable symbols (Function/Method/Constructor) by name. * Faster than `lookupFuzzy` + filter — backed by a lazy callable-only index. * Used by ReturnTypeLookup to resolve callee → return type. */ lookupFuzzyCallable: (name: string) => SymbolDefinition[]; /** * Look up a field/property by its owning class nodeId and field name. * O(1) via dedicated eagerly-populated index keyed by `ownerNodeId\0fieldName`. * Returns undefined when no matching property exists or the owner is ambiguous. */ lookupFieldByOwner: (ownerNodeId: string, fieldName: string) => SymbolDefinition | undefined; /** * Debugging: See how many symbols are tracked */ getStats: () => { fileCount: number; globalSymbolCount: number }; /** * Cleanup memory */ clear: () => void; } export const createSymbolTable = (): SymbolTable => { // 1. File-Specific Index — stores full SymbolDefinition(s) for O(1) lookup. // Structure: FilePath -> (SymbolName -> SymbolDefinition[]) // Array allows overloaded methods (same name, different signatures) to coexist. const fileIndex = new Map<string, Map<string, SymbolDefinition[]>>(); // 2. Global Reverse Index (The "Backup") // Structure: SymbolName -> [List of Definitions] const globalIndex = new Map<string, SymbolDefinition[]>(); // 3. Lazy Callable Index — populated on first lookupFuzzyCallable call. // Structure: SymbolName -> [Callable Definitions] // Only Function, Method, Constructor symbols are indexed. let callableIndex: Map<string, SymbolDefinition[]> | null = null; // 4. Eagerly-populated Field/Property Index — keyed by "ownerNodeId\0fieldName". // Only Property symbols with ownerId and declaredType are indexed. const fieldByOwner = new Map<string, SymbolDefinition>(); const CALLABLE_TYPES = new Set(['Function', 'Method', 'Constructor']); const add = ( filePath: string, name: string, nodeId: string, type: NodeLabel, metadata?: { parameterCount?: number; requiredParameterCount?: number; parameterTypes?: string[]; returnType?: string; declaredType?: string; ownerId?: string } ) => { const def: SymbolDefinition = { nodeId, filePath, type, ...(metadata?.parameterCount !== undefined ? { parameterCount: metadata.parameterCount } : {}), ...(metadata?.requiredParameterCount !== undefined ? { requiredParameterCount: metadata.requiredParameterCount } : {}), ...(metadata?.parameterTypes !== undefined ? { parameterTypes: metadata.parameterTypes } : {}), ...(metadata?.returnType !== undefined ? { returnType: metadata.returnType } : {}), ...(metadata?.declaredType !== undefined ? { declaredType: metadata.declaredType } : {}), ...(metadata?.ownerId !== undefined ? { ownerId: metadata.ownerId } : {}), }; // A. Add to File Index (shared reference — zero additional memory) if (!fileIndex.has(filePath)) { fileIndex.set(filePath, new Map()); } const fileMap = fileIndex.get(filePath)!; if (!fileMap.has(name)) { fileMap.set(name, [def]); } else { fileMap.get(name)!.push(def); } // B. Properties go to fieldByOwner index only — skip globalIndex to prevent // namespace pollution for common names like 'id', 'name', 'type'. // Index ALL properties (even without declaredType) so write-access tracking // can resolve field ownership for dynamically-typed languages (Ruby, JS). if (type === 'Property' && metadata?.ownerId) { fieldByOwner.set(`${metadata.ownerId}\0${name}`, def); // Still add to fileIndex above (for lookupExact), but skip globalIndex return; } // C. Add to Global Index (same object reference) if (!globalIndex.has(name)) { globalIndex.set(name, []); } globalIndex.get(name)!.push(def); // D. Invalidate the lazy callable index only when adding callable types if (CALLABLE_TYPES.has(type)) { callableIndex = null; } }; const lookupExact = (filePath: string, name: string): string | undefined => { const defs = fileIndex.get(filePath)?.get(name); return defs?.[0]?.nodeId; }; const lookupExactFull = (filePath: string, name: string): SymbolDefinition | undefined => { const defs = fileIndex.get(filePath)?.get(name); return defs?.[0]; }; const lookupExactAll = (filePath: string, name: string): SymbolDefinition[] => { return fileIndex.get(filePath)?.get(name) ?? []; }; const lookupFuzzy = (name: string): SymbolDefinition[] => { return globalIndex.get(name) || []; }; const lookupFuzzyCallable = (name: string): SymbolDefinition[] => { if (!callableIndex) { // Build the callable index lazily on first use callableIndex = new Map(); for (const [symName, defs] of globalIndex) { const callables = defs.filter(d => CALLABLE_TYPES.has(d.type)); if (callables.length > 0) callableIndex.set(symName, callables); } } return callableIndex.get(name) ?? []; }; const lookupFieldByOwner = (ownerNodeId: string, fieldName: string): SymbolDefinition | undefined => { return fieldByOwner.get(`${ownerNodeId}\0${fieldName}`); }; const getStats = () => ({ fileCount: fileIndex.size, globalSymbolCount: globalIndex.size }); const clear = () => { fileIndex.clear(); globalIndex.clear(); callableIndex = null; fieldByOwner.clear(); }; return { add, lookupExact, lookupExactFull, lookupExactAll, lookupFuzzy, lookupFuzzyCallable, lookupFieldByOwner, getStats, clear }; }; ================================================ FILE: gitnexus/src/core/ingestion/tree-sitter-queries.ts ================================================ import { SupportedLanguages } from '../../config/supported-languages.js'; /* * Tree-sitter queries for extracting code definitions. * * Note: Different grammars (typescript vs tsx vs javascript) may have * slightly different node types. These queries are designed to be * compatible with the standard tree-sitter grammars. */ // TypeScript queries - works with tree-sitter-typescript export const TYPESCRIPT_QUERIES = ` (class_declaration name: (type_identifier) @name) @definition.class (interface_declaration name: (type_identifier) @name) @definition.interface (function_declaration name: (identifier) @name) @definition.function ; TypeScript overload signatures (function_signature is a separate node type from function_declaration) (function_signature name: (identifier) @name) @definition.function (method_definition name: (property_identifier) @name) @definition.method (lexical_declaration (variable_declarator name: (identifier) @name value: (arrow_function))) @definition.function (lexical_declaration (variable_declarator name: (identifier) @name value: (function_expression))) @definition.function (export_statement declaration: (lexical_declaration (variable_declarator name: (identifier) @name value: (arrow_function)))) @definition.function (export_statement declaration: (lexical_declaration (variable_declarator name: (identifier) @name value: (function_expression)))) @definition.function (import_statement source: (string) @import.source) @import ; Re-export statements: export { X } from './y' (export_statement source: (string) @import.source) @import (call_expression function: (identifier) @call.name) @call (call_expression function: (member_expression property: (property_identifier) @call.name)) @call ; Constructor calls: new Foo() (new_expression constructor: (identifier) @call.name) @call ; Class properties — public_field_definition covers most TS class fields (public_field_definition name: (property_identifier) @name) @definition.property ; Private class fields: #address: Address (public_field_definition name: (private_property_identifier) @name) @definition.property ; Constructor parameter properties: constructor(public address: Address) (required_parameter (accessibility_modifier) pattern: (identifier) @name) @definition.property ; Heritage queries - class extends (class_declaration name: (type_identifier) @heritage.class (class_heritage (extends_clause value: (identifier) @heritage.extends))) @heritage ; Heritage queries - class implements interface (class_declaration name: (type_identifier) @heritage.class (class_heritage (implements_clause (type_identifier) @heritage.implements))) @heritage.impl ; Write access: obj.field = value (assignment_expression left: (member_expression object: (_) @assignment.receiver property: (property_identifier) @assignment.property) right: (_)) @assignment ; Write access: obj.field += value (compound assignment) (augmented_assignment_expression left: (member_expression object: (_) @assignment.receiver property: (property_identifier) @assignment.property) right: (_)) @assignment `; // JavaScript queries - works with tree-sitter-javascript export const JAVASCRIPT_QUERIES = ` (class_declaration name: (identifier) @name) @definition.class (function_declaration name: (identifier) @name) @definition.function (method_definition name: (property_identifier) @name) @definition.method (lexical_declaration (variable_declarator name: (identifier) @name value: (arrow_function))) @definition.function (lexical_declaration (variable_declarator name: (identifier) @name value: (function_expression))) @definition.function (export_statement declaration: (lexical_declaration (variable_declarator name: (identifier) @name value: (arrow_function)))) @definition.function (export_statement declaration: (lexical_declaration (variable_declarator name: (identifier) @name value: (function_expression)))) @definition.function (import_statement source: (string) @import.source) @import ; Re-export statements: export { X } from './y' (export_statement source: (string) @import.source) @import (call_expression function: (identifier) @call.name) @call (call_expression function: (member_expression property: (property_identifier) @call.name)) @call ; Constructor calls: new Foo() (new_expression constructor: (identifier) @call.name) @call ; Class fields — field_definition captures JS class fields (class User { address = ... }) (field_definition property: (property_identifier) @name) @definition.property ; Heritage queries - class extends (JavaScript uses different AST than TypeScript) ; In tree-sitter-javascript, class_heritage directly contains the parent identifier (class_declaration name: (identifier) @heritage.class (class_heritage (identifier) @heritage.extends)) @heritage ; Write access: obj.field = value (assignment_expression left: (member_expression object: (_) @assignment.receiver property: (property_identifier) @assignment.property) right: (_)) @assignment ; Write access: obj.field += value (compound assignment) (augmented_assignment_expression left: (member_expression object: (_) @assignment.receiver property: (property_identifier) @assignment.property) right: (_)) @assignment `; // Python queries - works with tree-sitter-python export const PYTHON_QUERIES = ` (class_definition name: (identifier) @name) @definition.class (function_definition name: (identifier) @name) @definition.function (import_statement name: (dotted_name) @import.source) @import (import_from_statement module_name: (dotted_name) @import.source) @import (import_from_statement module_name: (relative_import) @import.source) @import (call function: (identifier) @call.name) @call (call function: (attribute attribute: (identifier) @call.name)) @call ; Class attribute type annotations — PEP 526: address: Address or address: Address = Address() ; Both bare annotations (address: Address) and annotated assignments (name: str = "test") ; are parsed as (assignment left: ... type: ...) in tree-sitter-python. (expression_statement (assignment left: (identifier) @name type: (type)) @definition.property) ; Heritage queries - Python class inheritance (class_definition name: (identifier) @heritage.class superclasses: (argument_list (identifier) @heritage.extends)) @heritage ; Write access: obj.field = value (assignment left: (attribute object: (_) @assignment.receiver attribute: (identifier) @assignment.property) right: (_)) @assignment ; Write access: obj.field += value (compound assignment) (augmented_assignment left: (attribute object: (_) @assignment.receiver attribute: (identifier) @assignment.property) right: (_)) @assignment `; // Java queries - works with tree-sitter-java export const JAVA_QUERIES = ` ; Classes, Interfaces, Enums, Annotations (class_declaration name: (identifier) @name) @definition.class (interface_declaration name: (identifier) @name) @definition.interface (enum_declaration name: (identifier) @name) @definition.enum (annotation_type_declaration name: (identifier) @name) @definition.annotation ; Methods & Constructors (method_declaration name: (identifier) @name) @definition.method (constructor_declaration name: (identifier) @name) @definition.constructor ; Fields — typed field declarations inside class bodies (field_declaration declarator: (variable_declarator name: (identifier) @name)) @definition.property ; Imports - capture any import declaration child as source (import_declaration (_) @import.source) @import ; Calls (method_invocation name: (identifier) @call.name) @call (method_invocation object: (_) name: (identifier) @call.name) @call ; Constructor calls: new Foo() (object_creation_expression type: (type_identifier) @call.name) @call ; Heritage - extends class (class_declaration name: (identifier) @heritage.class (superclass (type_identifier) @heritage.extends)) @heritage ; Heritage - implements interfaces (class_declaration name: (identifier) @heritage.class (super_interfaces (type_list (type_identifier) @heritage.implements))) @heritage.impl ; Write access: obj.field = value (assignment_expression left: (field_access object: (_) @assignment.receiver field: (identifier) @assignment.property) right: (_)) @assignment `; // C queries - works with tree-sitter-c export const C_QUERIES = ` ; Functions (direct declarator) (function_definition declarator: (function_declarator declarator: (identifier) @name)) @definition.function (declaration declarator: (function_declarator declarator: (identifier) @name)) @definition.function ; Functions returning pointers (pointer_declarator wraps function_declarator) (function_definition declarator: (pointer_declarator declarator: (function_declarator declarator: (identifier) @name))) @definition.function (declaration declarator: (pointer_declarator declarator: (function_declarator declarator: (identifier) @name))) @definition.function ; Functions returning double pointers (nested pointer_declarator) (function_definition declarator: (pointer_declarator declarator: (pointer_declarator declarator: (function_declarator declarator: (identifier) @name)))) @definition.function ; Structs, Unions, Enums, Typedefs (struct_specifier name: (type_identifier) @name) @definition.struct (union_specifier name: (type_identifier) @name) @definition.union (enum_specifier name: (type_identifier) @name) @definition.enum (type_definition declarator: (type_identifier) @name) @definition.typedef ; Macros (preproc_function_def name: (identifier) @name) @definition.macro (preproc_def name: (identifier) @name) @definition.macro ; Includes (preproc_include path: (_) @import.source) @import ; Calls (call_expression function: (identifier) @call.name) @call (call_expression function: (field_expression field: (field_identifier) @call.name)) @call `; // Go queries - works with tree-sitter-go export const GO_QUERIES = ` ; Functions & Methods (function_declaration name: (identifier) @name) @definition.function (method_declaration name: (field_identifier) @name) @definition.method ; Types (type_declaration (type_spec name: (type_identifier) @name type: (struct_type))) @definition.struct (type_declaration (type_spec name: (type_identifier) @name type: (interface_type))) @definition.interface ; Imports (import_declaration (import_spec path: (interpreted_string_literal) @import.source)) @import (import_declaration (import_spec_list (import_spec path: (interpreted_string_literal) @import.source))) @import ; Struct fields — named field declarations inside struct types (field_declaration_list (field_declaration name: (field_identifier) @name) @definition.property) ; Struct embedding (anonymous fields = inheritance) (type_declaration (type_spec name: (type_identifier) @heritage.class type: (struct_type (field_declaration_list (field_declaration type: (type_identifier) @heritage.extends))))) @definition.struct ; Calls (call_expression function: (identifier) @call.name) @call (call_expression function: (selector_expression field: (field_identifier) @call.name)) @call ; Struct literal construction: User{Name: "Alice"} (composite_literal type: (type_identifier) @call.name) @call ; Write access: obj.field = value (assignment_statement left: (expression_list (selector_expression operand: (_) @assignment.receiver field: (field_identifier) @assignment.property)) right: (_)) @assignment ; Write access: obj.field++ / obj.field-- (inc_statement (selector_expression operand: (_) @assignment.receiver field: (field_identifier) @assignment.property)) @assignment (dec_statement (selector_expression operand: (_) @assignment.receiver field: (field_identifier) @assignment.property)) @assignment `; // C++ queries - works with tree-sitter-cpp export const CPP_QUERIES = ` ; Classes, Structs, Namespaces (class_specifier name: (type_identifier) @name) @definition.class (struct_specifier name: (type_identifier) @name) @definition.struct (namespace_definition name: (namespace_identifier) @name) @definition.namespace (enum_specifier name: (type_identifier) @name) @definition.enum ; Typedefs and unions (common in C-style headers and mixed C/C++ code) (type_definition declarator: (type_identifier) @name) @definition.typedef (union_specifier name: (type_identifier) @name) @definition.union ; Macros (preproc_function_def name: (identifier) @name) @definition.macro (preproc_def name: (identifier) @name) @definition.macro ; Functions & Methods (direct declarator) (function_definition declarator: (function_declarator declarator: (identifier) @name)) @definition.function (function_definition declarator: (function_declarator declarator: (qualified_identifier name: (identifier) @name))) @definition.method ; Functions/methods returning pointers (pointer_declarator wraps function_declarator) (function_definition declarator: (pointer_declarator declarator: (function_declarator declarator: (identifier) @name))) @definition.function (function_definition declarator: (pointer_declarator declarator: (function_declarator declarator: (qualified_identifier name: (identifier) @name)))) @definition.method ; Functions/methods returning double pointers (nested pointer_declarator) (function_definition declarator: (pointer_declarator declarator: (pointer_declarator declarator: (function_declarator declarator: (identifier) @name)))) @definition.function (function_definition declarator: (pointer_declarator declarator: (pointer_declarator declarator: (function_declarator declarator: (qualified_identifier name: (identifier) @name))))) @definition.method ; Functions/methods returning references (reference_declarator wraps function_declarator) (function_definition declarator: (reference_declarator (function_declarator declarator: (identifier) @name))) @definition.function (function_definition declarator: (reference_declarator (function_declarator declarator: (qualified_identifier name: (identifier) @name)))) @definition.method ; Destructors (destructor_name is distinct from identifier in tree-sitter-cpp) (function_definition declarator: (function_declarator declarator: (qualified_identifier name: (destructor_name) @name))) @definition.method ; Function declarations / prototypes (common in headers) (declaration declarator: (function_declarator declarator: (identifier) @name)) @definition.function (declaration declarator: (pointer_declarator declarator: (function_declarator declarator: (identifier) @name))) @definition.function ; Class/struct data member fields (Address address; int count;) ; Uses field_identifier to exclude method declarations (which use function_declarator) (field_declaration declarator: (field_identifier) @name) @definition.property ; Pointer member fields (Address* address;) (field_declaration declarator: (pointer_declarator declarator: (field_identifier) @name)) @definition.property ; Reference member fields (Address& address;) (field_declaration declarator: (reference_declarator (field_identifier) @name)) @definition.property ; Inline class method declarations (inside class body, no body: void Foo();) (field_declaration declarator: (function_declarator declarator: (identifier) @name)) @definition.method ; Inline class method definitions (inside class body, with body: void Foo() { ... }) (field_declaration_list (function_definition declarator: (function_declarator declarator: [(field_identifier) (identifier) (operator_name) (destructor_name)] @name)) @definition.method) ; Inline class methods returning a pointer type (User* lookup(int id) { ... }) (field_declaration_list (function_definition declarator: (pointer_declarator declarator: (function_declarator declarator: [(field_identifier) (identifier) (operator_name)] @name))) @definition.method) ; Inline class methods returning a reference type (User& lookup(int id) { ... }) (field_declaration_list (function_definition declarator: (reference_declarator (function_declarator declarator: [(field_identifier) (identifier) (operator_name)] @name))) @definition.method) ; Templates (template_declaration (class_specifier name: (type_identifier) @name)) @definition.template (template_declaration (function_definition declarator: (function_declarator declarator: (identifier) @name))) @definition.template ; Includes (preproc_include path: (_) @import.source) @import ; Calls (call_expression function: (identifier) @call.name) @call (call_expression function: (field_expression field: (field_identifier) @call.name)) @call (call_expression function: (qualified_identifier name: (identifier) @call.name)) @call (call_expression function: (template_function name: (identifier) @call.name)) @call ; Constructor calls: new User() (new_expression type: (type_identifier) @call.name) @call ; Heritage (class_specifier name: (type_identifier) @heritage.class (base_class_clause (type_identifier) @heritage.extends)) @heritage (class_specifier name: (type_identifier) @heritage.class (base_class_clause (access_specifier) (type_identifier) @heritage.extends)) @heritage ; Write access: obj.field = value (assignment_expression left: (field_expression argument: (_) @assignment.receiver field: (field_identifier) @assignment.property) right: (_)) @assignment `; // C# queries - works with tree-sitter-c-sharp export const CSHARP_QUERIES = ` ; Types (class_declaration name: (identifier) @name) @definition.class (interface_declaration name: (identifier) @name) @definition.interface (struct_declaration name: (identifier) @name) @definition.struct (enum_declaration name: (identifier) @name) @definition.enum (record_declaration name: (identifier) @name) @definition.record (delegate_declaration name: (identifier) @name) @definition.delegate ; Namespaces (block form and C# 10+ file-scoped form) (namespace_declaration name: (identifier) @name) @definition.namespace (namespace_declaration name: (qualified_name) @name) @definition.namespace (file_scoped_namespace_declaration name: (identifier) @name) @definition.namespace (file_scoped_namespace_declaration name: (qualified_name) @name) @definition.namespace ; Methods & Properties (method_declaration name: (identifier) @name) @definition.method (local_function_statement name: (identifier) @name) @definition.function (constructor_declaration name: (identifier) @name) @definition.constructor (property_declaration name: (identifier) @name) @definition.property ; Primary constructors (C# 12): class User(string name, int age) { } (class_declaration name: (identifier) @name (parameter_list) @definition.constructor) (record_declaration name: (identifier) @name (parameter_list) @definition.constructor) ; Using (using_directive (qualified_name) @import.source) @import (using_directive (identifier) @import.source) @import ; Calls (invocation_expression function: (identifier) @call.name) @call (invocation_expression function: (member_access_expression name: (identifier) @call.name)) @call ; Null-conditional method calls: user?.Save() ; Parses as: invocation_expression → conditional_access_expression → member_binding_expression → identifier (invocation_expression function: (conditional_access_expression (member_binding_expression (identifier) @call.name))) @call ; Constructor calls: new Foo() and new Foo { Props } (object_creation_expression type: (identifier) @call.name) @call ; Target-typed new (C# 9): User u = new("x", 5) (variable_declaration type: (identifier) @call.name (variable_declarator (implicit_object_creation_expression) @call)) ; Heritage (class_declaration name: (identifier) @heritage.class (base_list (identifier) @heritage.extends)) @heritage (class_declaration name: (identifier) @heritage.class (base_list (generic_name (identifier) @heritage.extends))) @heritage ; Write access: obj.field = value (assignment_expression left: (member_access_expression expression: (_) @assignment.receiver name: (identifier) @assignment.property) right: (_)) @assignment `; // Rust queries - works with tree-sitter-rust export const RUST_QUERIES = ` ; Functions & Items (function_item name: (identifier) @name) @definition.function (struct_item name: (type_identifier) @name) @definition.struct (enum_item name: (type_identifier) @name) @definition.enum (trait_item name: (type_identifier) @name) @definition.trait (impl_item type: (type_identifier) @name !trait) @definition.impl (impl_item type: (generic_type type: (type_identifier) @name) !trait) @definition.impl (mod_item name: (identifier) @name) @definition.module ; Type aliases, const, static, macros (type_item name: (type_identifier) @name) @definition.type (const_item name: (identifier) @name) @definition.const (static_item name: (identifier) @name) @definition.static (macro_definition name: (identifier) @name) @definition.macro ; Use statements (use_declaration argument: (_) @import.source) @import ; Calls (call_expression function: (identifier) @call.name) @call (call_expression function: (field_expression field: (field_identifier) @call.name)) @call (call_expression function: (scoped_identifier name: (identifier) @call.name)) @call (call_expression function: (generic_function function: (identifier) @call.name)) @call ; Struct literal construction: User { name: value } (struct_expression name: (type_identifier) @call.name) @call ; Struct fields — named field declarations inside struct bodies (field_declaration_list (field_declaration name: (field_identifier) @name) @definition.property) ; Heritage (trait implementation) — all combinations of concrete/generic trait × concrete/generic type (impl_item trait: (type_identifier) @heritage.trait type: (type_identifier) @heritage.class) @heritage (impl_item trait: (generic_type type: (type_identifier) @heritage.trait) type: (type_identifier) @heritage.class) @heritage (impl_item trait: (type_identifier) @heritage.trait type: (generic_type type: (type_identifier) @heritage.class)) @heritage (impl_item trait: (generic_type type: (type_identifier) @heritage.trait) type: (generic_type type: (type_identifier) @heritage.class)) @heritage ; Write access: obj.field = value (assignment_expression left: (field_expression value: (_) @assignment.receiver field: (field_identifier) @assignment.property) right: (_)) @assignment ; Write access: obj.field += value (compound assignment) (compound_assignment_expr left: (field_expression value: (_) @assignment.receiver field: (field_identifier) @assignment.property) right: (_)) @assignment `; // PHP queries - works with tree-sitter-php (php_only grammar) export const PHP_QUERIES = ` ; ── Namespace ──────────────────────────────────────────────────────────────── (namespace_definition name: (namespace_name) @name) @definition.namespace ; ── Classes ────────────────────────────────────────────────────────────────── (class_declaration name: (name) @name) @definition.class ; ── Interfaces ─────────────────────────────────────────────────────────────── (interface_declaration name: (name) @name) @definition.interface ; ── Traits ─────────────────────────────────────────────────────────────────── (trait_declaration name: (name) @name) @definition.trait ; ── Enums (PHP 8.1) ────────────────────────────────────────────────────────── (enum_declaration name: (name) @name) @definition.enum ; ── Top-level functions ─────────────────────────────────────────────────────── (function_definition name: (name) @name) @definition.function ; ── Methods (including constructors) ───────────────────────────────────────── (method_declaration name: (name) @name) @definition.method ; ── Class properties (including Eloquent $fillable, $casts, etc.) ──────────── (property_declaration (property_element (variable_name (name) @name))) @definition.property ; Constructor property promotion (PHP 8.0+: public Address $address in __construct) (method_declaration parameters: (formal_parameters (property_promotion_parameter name: (variable_name (name) @name)))) @definition.property ; ── Imports: use statements ────────────────────────────────────────────────── ; Simple: use App\\Models\\User; (namespace_use_declaration (namespace_use_clause (qualified_name) @import.source)) @import ; ── Function/method calls ──────────────────────────────────────────────────── ; Regular function call: foo() (function_call_expression function: (name) @call.name) @call ; Method call: $obj->method() (member_call_expression name: (name) @call.name) @call ; Nullsafe method call: $obj?->method() (nullsafe_member_call_expression name: (name) @call.name) @call ; Static call: Foo::bar() (php_only uses scoped_call_expression) (scoped_call_expression name: (name) @call.name) @call ; Constructor call: new User() (object_creation_expression (name) @call.name) @call ; ── Heritage: extends ──────────────────────────────────────────────────────── (class_declaration name: (name) @heritage.class (base_clause [(name) (qualified_name)] @heritage.extends)) @heritage ; ── Heritage: implements ───────────────────────────────────────────────────── (class_declaration name: (name) @heritage.class (class_interface_clause [(name) (qualified_name)] @heritage.implements)) @heritage.impl ; ── Heritage: use trait (must capture enclosing class name) ────────────────── (class_declaration name: (name) @heritage.class body: (declaration_list (use_declaration [(name) (qualified_name)] @heritage.trait))) @heritage ; Write access: $obj->field = value (assignment_expression left: (member_access_expression object: (_) @assignment.receiver name: (name) @assignment.property) right: (_)) @assignment ; Write access: ClassName::$field = value (static property) (assignment_expression left: (scoped_property_access_expression scope: (_) @assignment.receiver name: (variable_name (name) @assignment.property)) right: (_)) @assignment `; // Ruby queries - works with tree-sitter-ruby // NOTE: Ruby uses `call` for require, include, extend, prepend, attr_* etc. // These are all captured as @call and routed in JS post-processing: // - require/require_relative → import extraction // - include/extend/prepend → heritage (mixin) extraction // - attr_accessor/attr_reader/attr_writer → property definition extraction // - everything else → regular call extraction export const RUBY_QUERIES = ` ; ── Modules ────────────────────────────────────────────────────────────────── (module name: (constant) @name) @definition.module ; ── Classes ────────────────────────────────────────────────────────────────── (class name: (constant) @name) @definition.class ; ── Instance methods ───────────────────────────────────────────────────────── (method name: (identifier) @name) @definition.method ; ── Singleton (class-level) methods ────────────────────────────────────────── (singleton_method name: (identifier) @name) @definition.method ; ── All calls (require, include, attr_*, and regular calls routed in JS) ───── (call method: (identifier) @call.name) @call ; ── Bare calls without parens (identifiers at statement level are method calls) ─ ; NOTE: This may over-capture variable reads as calls (e.g. 'result' at ; statement level). Ruby's grammar makes bare identifiers ambiguous — they ; could be local variables or zero-arity method calls. Post-processing via ; isBuiltInOrNoise and symbol resolution filtering suppresses most false ; positives, but a variable name that coincidentally matches a method name ; elsewhere may produce a false CALLS edge. (body_statement (identifier) @call.name @call) ; ── Heritage: class < SuperClass ───────────────────────────────────────────── (class name: (constant) @heritage.class superclass: (superclass (constant) @heritage.extends)) @heritage ; Write access: obj.field = value (Ruby setter — syntactically a method call to field=) (assignment left: (call receiver: (_) @assignment.receiver method: (identifier) @assignment.property) right: (_)) @assignment ; Write access: obj.field += value (compound assignment — operator_assignment node, not assignment) (operator_assignment left: (call receiver: (_) @assignment.receiver method: (identifier) @assignment.property) right: (_)) @assignment `; // Kotlin queries - works with tree-sitter-kotlin (fwcd/tree-sitter-kotlin) // Based on official tags.scm; functions use simple_identifier, classes use type_identifier export const KOTLIN_QUERIES = ` ; ── Interfaces ───────────────────────────────────────────────────────────── ; tree-sitter-kotlin (fwcd) has no interface_declaration node type. ; Interfaces are class_declaration nodes with an anonymous "interface" keyword child. (class_declaration "interface" (type_identifier) @name) @definition.interface ; ── Classes (regular, data, sealed, enum) ──────────────────────────────── ; All have the anonymous "class" keyword child. enum class has both ; "enum" and "class" children — the "class" child still matches. (class_declaration "class" (type_identifier) @name) @definition.class ; ── Object declarations (Kotlin singletons) ────────────────────────────── (object_declaration (type_identifier) @name) @definition.class ; ── Companion objects (named only) ─────────────────────────────────────── (companion_object (type_identifier) @name) @definition.class ; ── Functions (top-level, member, extension) ────────────────────────────── (function_declaration (simple_identifier) @name) @definition.function ; ── Properties ─────────────────────────────────────────────────────────── (property_declaration (variable_declaration (simple_identifier) @name)) @definition.property ; Primary constructor val/var parameters (data class, value class, regular class) ; binding_pattern_kind contains "val" or "var" — without it, the param is not a property (class_parameter (binding_pattern_kind) (simple_identifier) @name) @definition.property ; ── Enum entries ───────────────────────────────────────────────────────── (enum_entry (simple_identifier) @name) @definition.enum ; ── Type aliases ───────────────────────────────────────────────────────── (type_alias (type_identifier) @name) @definition.type ; ── Imports ────────────────────────────────────────────────────────────── (import_header (identifier) @import.source) @import ; ── Function calls (direct) ────────────────────────────────────────────── (call_expression (simple_identifier) @call.name) @call ; ── Method calls (via navigation: obj.method()) ────────────────────────── (call_expression (navigation_expression (navigation_suffix (simple_identifier) @call.name))) @call ; ── Constructor invocations ────────────────────────────────────────────── (constructor_invocation (user_type (type_identifier) @call.name)) @call ; ── Infix function calls (e.g., a to b, x until y) ────────────────────── (infix_expression (simple_identifier) @call.name) @call ; ── Heritage: extends / implements via delegation_specifier ────────────── ; Interface implementation (bare user_type): class Foo : Bar (class_declaration (type_identifier) @heritage.class (delegation_specifier (user_type (type_identifier) @heritage.extends))) @heritage ; Class extension (constructor_invocation): class Foo : Bar() (class_declaration (type_identifier) @heritage.class (delegation_specifier (constructor_invocation (user_type (type_identifier) @heritage.extends)))) @heritage ; Write access: obj.field = value (assignment (directly_assignable_expression (_) @assignment.receiver (navigation_suffix (simple_identifier) @assignment.property)) (_)) @assignment `; // Swift queries - works with tree-sitter-swift export const SWIFT_QUERIES = ` ; Classes (class_declaration "class" name: (type_identifier) @name) @definition.class ; Structs (class_declaration "struct" name: (type_identifier) @name) @definition.struct ; Enums (class_declaration "enum" name: (type_identifier) @name) @definition.enum ; Extensions (mapped to class — no dedicated label in schema) (class_declaration "extension" name: (user_type (type_identifier) @name)) @definition.class ; Actors (class_declaration "actor" name: (type_identifier) @name) @definition.class ; Protocols (mapped to interface) (protocol_declaration name: (type_identifier) @name) @definition.interface ; Type aliases (typealias_declaration name: (type_identifier) @name) @definition.type ; Functions (top-level and methods) (function_declaration name: (simple_identifier) @name) @definition.function ; Protocol method declarations (protocol_function_declaration name: (simple_identifier) @name) @definition.method ; Initializers (init_declaration) @definition.constructor ; Properties (stored and computed) (property_declaration (pattern (simple_identifier) @name)) @definition.property ; Imports (import_declaration (identifier (simple_identifier) @import.source)) @import ; Calls - direct function calls (call_expression (simple_identifier) @call.name) @call ; Calls - member/navigation calls (obj.method()) (call_expression (navigation_expression (navigation_suffix (simple_identifier) @call.name))) @call ; Heritage - class/struct/enum inheritance and protocol conformance (class_declaration name: (type_identifier) @heritage.class (inheritance_specifier inherits_from: (user_type (type_identifier) @heritage.extends))) @heritage ; Heritage - protocol inheritance (protocol_declaration name: (type_identifier) @heritage.class (inheritance_specifier inherits_from: (user_type (type_identifier) @heritage.extends))) @heritage ; Heritage - extension protocol conformance (e.g. extension Foo: SomeProtocol) ; Extensions wrap the name in user_type unlike class/struct/enum declarations (class_declaration "extension" name: (user_type (type_identifier) @heritage.class) (inheritance_specifier inherits_from: (user_type (type_identifier) @heritage.extends))) @heritage ; Write access: obj.field = value (assignment (directly_assignable_expression (_) @assignment.receiver (navigation_suffix (simple_identifier) @assignment.property)) (_)) @assignment `; export const LANGUAGE_QUERIES: Record<SupportedLanguages, string> = { [SupportedLanguages.TypeScript]: TYPESCRIPT_QUERIES, [SupportedLanguages.JavaScript]: JAVASCRIPT_QUERIES, [SupportedLanguages.Python]: PYTHON_QUERIES, [SupportedLanguages.Java]: JAVA_QUERIES, [SupportedLanguages.C]: C_QUERIES, [SupportedLanguages.Go]: GO_QUERIES, [SupportedLanguages.CPlusPlus]: CPP_QUERIES, [SupportedLanguages.CSharp]: CSHARP_QUERIES, [SupportedLanguages.Ruby]: RUBY_QUERIES, [SupportedLanguages.Rust]: RUST_QUERIES, [SupportedLanguages.PHP]: PHP_QUERIES, [SupportedLanguages.Kotlin]: KOTLIN_QUERIES, [SupportedLanguages.Swift]: SWIFT_QUERIES, }; ================================================ FILE: gitnexus/src/core/ingestion/type-env.ts ================================================ import type { SyntaxNode } from './utils.js'; import { FUNCTION_NODE_TYPES, extractFunctionName, CLASS_CONTAINER_TYPES, CALL_EXPRESSION_TYPES, isBuiltInOrNoise } from './utils.js'; import { SupportedLanguages } from '../../config/supported-languages.js'; import { typeConfigs, TYPED_PARAMETER_TYPES } from './type-extractors/index.js'; import type { ClassNameLookup, ReturnTypeLookup, ForLoopExtractorContext, PendingAssignment } from './type-extractors/types.js'; import { extractSimpleTypeName, extractVarName, stripNullable, extractReturnTypeName } from './type-extractors/shared.js'; import type { SymbolTable } from './symbol-table.js'; /** * Per-file scoped type environment: maps (scope, variableName) → typeName. * Scope-aware: variables inside functions are keyed by function name, * file-level variables use the '' (empty string) scope. * * Design constraints: * - Explicit-only: Tier 0 uses type annotations; Tier 1 infers from constructors * - Tier 2: single-pass assignment chain propagation in source order — resolves * `const b = a` when `a` already has a type from Tier 0/1 * - Scope-aware: function-local variables don't collide across functions * - Conservative: complex/generic types extract the base name only * - Per-file: built once, used for receiver resolution, then discarded */ export type TypeEnv = Map<string, Map<string, string>>; /** File-level scope key */ const FILE_SCOPE = ''; /** Fallback for languages where class names aren't in a 'name' field (e.g. Kotlin uses type_identifier). */ const findTypeIdentifierChild = (node: SyntaxNode): SyntaxNode | null => { for (let i = 0; i < node.childCount; i++) { const child = node.child(i); if (child && child.type === 'type_identifier') return child; } return null; }; /** * Per-file type environment with receiver resolution. * Built once per file via `buildTypeEnv`, used for receiver-type filtering, * then discarded. Encapsulates scope-aware type lookup and self/this/super * AST resolution behind a single `.lookup()` method. */ export interface TypeEnvironment { /** Look up a variable's resolved type, with self/this/super AST resolution. */ lookup(varName: string, callNode: SyntaxNode): string | undefined; /** Unverified cross-file constructor bindings for SymbolTable verification. */ readonly constructorBindings: readonly ConstructorBinding[]; /** Raw per-scope type bindings — for testing and debugging. */ readonly env: TypeEnv; /** Maps `scope\0varName` → constructor type for virtual dispatch override. * Populated when a variable has BOTH a declared base type AND a more specific * constructor type (e.g., `Animal a = new Dog()` → key maps to 'Dog'). */ readonly constructorTypeMap: ReadonlyMap<string, string>; } /** * Position-indexed pattern binding: active only within a specific AST range. * Used for smart-cast narrowing in mutually exclusive branches (e.g., Kotlin when arms). */ interface PatternOverride { rangeStart: number; rangeEnd: number; typeName: string; } /** scope → varName → overrides (checked in order, first range match wins) */ type PatternOverrides = Map<string, Map<string, PatternOverride[]>>; /** AST node types that represent mutually exclusive branch containers for pattern bindings. * Includes both multi-arm pattern-match branches AND if-statement bodies for null-check narrowing. */ const NARROWING_BRANCH_TYPES = new Set([ 'when_entry', // Kotlin when 'switch_block_label', // Java switch (enhanced) 'if_statement', // TS/JS, Java, C/C++ 'if_expression', // Kotlin (if is an expression) 'statement_block', // TS/JS: { ... } body of if 'control_structure_body', // Kotlin: body of if ]); /** Walk up the AST from a pattern node to find the enclosing branch container. */ const findNarrowingBranchScope = (node: SyntaxNode): SyntaxNode | undefined => { let current = node.parent; while (current) { if (NARROWING_BRANCH_TYPES.has(current.type)) return current; if (FUNCTION_NODE_TYPES.has(current.type)) return undefined; current = current.parent; } return undefined; }; /** Bare nullable keywords that fastStripNullable must reject. */ const FAST_NULLABLE_KEYWORDS = new Set(['null', 'undefined', 'void', 'None', 'nil']); /** * Fast-path nullable check: 90%+ of type names are simple identifiers (e.g. "User") * that don't need the full stripNullable parse. Only call stripNullable when the * string contains nullable markers ('|' for union types, '?' for nullable suffix). */ const fastStripNullable = (typeName: string): string | undefined => { if (FAST_NULLABLE_KEYWORDS.has(typeName)) return undefined; return (typeName.indexOf('|') === -1 && typeName.indexOf('?') === -1) ? typeName : stripNullable(typeName); }; /** Implementation of the lookup logic — shared between TypeEnvironment and the legacy export. */ const lookupInEnv = ( env: TypeEnv, varName: string, callNode: SyntaxNode, patternOverrides?: PatternOverrides, ): string | undefined => { // Self/this receiver: resolve to enclosing class name via AST walk if (varName === 'self' || varName === 'this' || varName === '$this') { return findEnclosingClassName(callNode); } // Super/base/parent receiver: resolve to the parent class name via AST walk. // Walks up to the enclosing class, then extracts the superclass from its heritage node. if (varName === 'super' || varName === 'base' || varName === 'parent') { return findEnclosingParentClassName(callNode); } // Determine the enclosing function scope for the call const scopeKey = findEnclosingScopeKey(callNode); // Check position-indexed pattern overrides first (e.g., Kotlin when/is smart casts). // These take priority over flat scopeEnv because they represent per-branch narrowing. if (scopeKey && patternOverrides) { const varOverrides = patternOverrides.get(scopeKey)?.get(varName); if (varOverrides) { const pos = callNode.startIndex; for (const override of varOverrides) { if (pos >= override.rangeStart && pos <= override.rangeEnd) { return fastStripNullable(override.typeName); } } } } // Try function-local scope first if (scopeKey) { const scopeEnv = env.get(scopeKey); if (scopeEnv) { const result = scopeEnv.get(varName); if (result) return fastStripNullable(result); } } // Fall back to file-level scope const fileEnv = env.get(FILE_SCOPE); const raw = fileEnv?.get(varName); return raw ? fastStripNullable(raw) : undefined; }; /** * Walk up the AST from a node to find the enclosing class/module name. * Used to resolve `self`/`this` receivers to their containing type. */ const findEnclosingClassName = (node: SyntaxNode): string | undefined => { let current = node.parent; while (current) { if (CLASS_CONTAINER_TYPES.has(current.type)) { const nameNode = current.childForFieldName('name') ?? findTypeIdentifierChild(current); if (nameNode) return nameNode.text; } current = current.parent; } return undefined; }; /** Keywords that refer to the current instance across languages. */ const THIS_RECEIVERS = new Set(['this', 'self', '$this', 'Me']); /** * If a pending assignment's receiver is this/self/$this/Me, substitute the * enclosing class name. Returns the item unchanged for non-receiver kinds * or when the receiver is not a this-keyword. Properties are readonly in the * discriminated union, so a new object is returned when substitution occurs. */ const substituteThisReceiver = (item: PendingAssignment, node: SyntaxNode): PendingAssignment => { if (item.kind !== 'fieldAccess' && item.kind !== 'methodCallResult') return item; if (!THIS_RECEIVERS.has(item.receiver)) return item; const className = findEnclosingClassName(node); if (!className) return item; return { ...item, receiver: className }; }; /** * Walk up the AST to find the enclosing class, then extract its parent class name * from the heritage/superclass AST node. Used to resolve `super`/`base`/`parent`. * * Supported patterns per tree-sitter grammar: * - Java/Ruby: `superclass` field → type_identifier/constant * - Python: `superclasses` field → argument_list → first identifier * - TypeScript/JS: unnamed `class_heritage` child → `extends_clause` → identifier * - C#: unnamed `base_list` child → first identifier * - PHP: unnamed `base_clause` child → name * - Kotlin: unnamed `delegation_specifier` child → constructor_invocation → user_type → type_identifier * - C++: unnamed `base_class_clause` child → type_identifier * - Swift: unnamed `inheritance_specifier` child → user_type → type_identifier */ const findEnclosingParentClassName = (node: SyntaxNode): string | undefined => { let current = node.parent; while (current) { if (CLASS_CONTAINER_TYPES.has(current.type)) { return extractParentClassFromNode(current); } current = current.parent; } return undefined; }; /** Extract the parent/superclass name from a class declaration AST node. */ const extractParentClassFromNode = (classNode: SyntaxNode): string | undefined => { // 1. Named fields: Java (superclass), Ruby (superclass), Python (superclasses) const superclassNode = classNode.childForFieldName('superclass'); if (superclassNode) { // Java: superclass > type_identifier or generic_type, Ruby: superclass > constant const inner = superclassNode.childForFieldName('type') ?? superclassNode.firstNamedChild ?? superclassNode; return extractSimpleTypeName(inner) ?? inner.text; } const superclassesNode = classNode.childForFieldName('superclasses'); if (superclassesNode) { // Python: argument_list with identifiers or attribute nodes (e.g. models.Model) const first = superclassesNode.firstNamedChild; if (first) return extractSimpleTypeName(first) ?? first.text; } // 2. Unnamed children: walk class node's children looking for heritage nodes for (let i = 0; i < classNode.childCount; i++) { const child = classNode.child(i); if (!child) continue; switch (child.type) { // TypeScript: class_heritage > extends_clause > type_identifier // JavaScript: class_heritage > identifier (no extends_clause wrapper) case 'class_heritage': { for (let j = 0; j < child.childCount; j++) { const clause = child.child(j); if (clause?.type === 'extends_clause') { const typeNode = clause.firstNamedChild; if (typeNode) return extractSimpleTypeName(typeNode) ?? typeNode.text; } // JS: direct identifier child (no extends_clause wrapper) if (clause?.type === 'identifier' || clause?.type === 'type_identifier') { return clause.text; } } break; } // C#: base_list > identifier or generic_name > identifier case 'base_list': { const first = child.firstNamedChild; if (first) { // generic_name wraps the identifier: BaseClass<T> if (first.type === 'generic_name') { const inner = first.childForFieldName('name') ?? first.firstNamedChild; if (inner) return inner.text; } return first.text; } break; } // PHP: base_clause > name case 'base_clause': { const name = child.firstNamedChild; if (name) return name.text; break; } // C++: base_class_clause > type_identifier (with optional access_specifier before it) case 'base_class_clause': { for (let j = 0; j < child.childCount; j++) { const inner = child.child(j); if (inner?.type === 'type_identifier') return inner.text; } break; } // Kotlin: delegation_specifier > constructor_invocation > user_type > type_identifier case 'delegation_specifier': { const delegate = child.firstNamedChild; if (delegate?.type === 'constructor_invocation') { const userType = delegate.firstNamedChild; if (userType?.type === 'user_type') { const typeId = userType.firstNamedChild; if (typeId) return typeId.text; } } // Also handle plain user_type (interface conformance without parentheses) if (delegate?.type === 'user_type') { const typeId = delegate.firstNamedChild; if (typeId) return typeId.text; } break; } // Swift: inheritance_specifier > user_type > type_identifier case 'inheritance_specifier': { const userType = child.childForFieldName('inherits_from') ?? child.firstNamedChild; if (userType?.type === 'user_type') { const typeId = userType.firstNamedChild; if (typeId) return typeId.text; } break; } } } return undefined; }; /** Find the enclosing function name for scope lookup. */ const findEnclosingScopeKey = (node: SyntaxNode): string | undefined => { let current = node.parent; while (current) { if (FUNCTION_NODE_TYPES.has(current.type)) { const { funcName } = extractFunctionName(current); if (funcName) return `${funcName}@${current.startIndex}`; } current = current.parent; } return undefined; }; /** * Create a lookup that checks both local AST class names AND the SymbolTable's * global index. This allows extractInitializer functions to distinguish * constructor calls from function calls (e.g. Kotlin `User()` vs `getUser()`) * using cross-file type information when available. * * Only `.has()` is exposed — the SymbolTable doesn't support iteration. * Results are memoized to avoid redundant lookupFuzzy scans across declarations. */ const createClassNameLookup = ( localNames: Set<string>, symbolTable?: SymbolTable, ): ClassNameLookup => { if (!symbolTable) return localNames; const memo = new Map<string, boolean>(); return { has(name: string): boolean { if (localNames.has(name)) return true; const cached = memo.get(name); if (cached !== undefined) return cached; const result = symbolTable.lookupFuzzy(name).some(def => def.type === 'Class' || def.type === 'Enum' || def.type === 'Struct', ); memo.set(name, result); return result; }, }; }; /** * Build a TypeEnvironment from a tree-sitter AST for a given language. * Single-pass: collects class/struct names, type bindings, AND constructor * bindings that couldn't be resolved locally — all in one AST walk. * * When a symbolTable is provided (call-processor path), class names from across * the project are available for constructor inference in languages like Kotlin * where constructors are syntactically identical to function calls. */ /** * Node types whose subtrees can NEVER contain type-relevant descendants * (declarations, parameters, for-loops, class definitions, pattern bindings). * Conservative leaf-only set — verified safe across all 12 supported language grammars. * IMPORTANT: Do NOT add expression containers (arguments, binary_expression, etc.) — * they can contain arrow functions with typed parameters. */ const SKIP_SUBTREE_TYPES = new Set([ // Plain string literals (NOT template_string — it contains interpolated expressions // that can hold arrow functions with typed parameters, e.g. `${(x: T) => x}`) 'string', 'string_literal', 'string_content', 'string_fragment', 'heredoc_body', // Comments 'comment', 'line_comment', 'block_comment', // Numeric/boolean/null literals 'number', 'integer_literal', 'float_literal', 'true', 'false', 'null', // Regex 'regex', 'regex_pattern', ]); const CLASS_LIKE_TYPES = new Set(['Class', 'Struct', 'Interface']); /** Memoize class definition lookups during fixpoint iteration. * SymbolTable is immutable during type resolution, so results never change. * Eliminates redundant array allocations + filter scans across iterations. */ const createClassDefCache = (symbolTable?: SymbolTable) => { const cache = new Map<string, Array<{ nodeId: string; type: string }>>(); return (typeName: string) => { let result = cache.get(typeName); if (result === undefined) { result = symbolTable ? symbolTable.lookupFuzzy(typeName).filter(d => CLASS_LIKE_TYPES.has(d.type)) : []; cache.set(typeName, result); } return result; }; }; /** AST node types representing constructor expressions across languages. * Note: C# also has `implicit_object_creation_expression` (`new()` with type * inference) which is NOT captured — the type is inferred, not explicit. * Kotlin constructors use `call_expression` (no `new` keyword) — not detected. */ const CONSTRUCTOR_EXPR_TYPES = new Set([ 'new_expression', // TS/JS/C++: new Dog() 'object_creation_expression', // Java/C#: new Dog() ]); /** Extract the constructor class name from a declaration node's initializer. * Searches for new_expression / object_creation_expression in the node's subtree. * Returns the class name or undefined if no constructor is found. * Depth-limited to 5 to avoid expensive traversals. */ const extractConstructorTypeName = (node: SyntaxNode, depth = 0): string | undefined => { if (depth > 5) return undefined; if (CONSTRUCTOR_EXPR_TYPES.has(node.type)) { // Java/C#: object_creation_expression has 'type' field const typeField = node.childForFieldName('type'); if (typeField) return extractSimpleTypeName(typeField); // TS/JS: new_expression has 'constructor' field (but tree-sitter often just has identifier child) const ctorField = node.childForFieldName('constructor'); if (ctorField) return extractSimpleTypeName(ctorField); // Fallback: first named child is often the class identifier if (node.firstNamedChild) return extractSimpleTypeName(node.firstNamedChild); } for (let i = 0; i < node.namedChildCount; i++) { const child = node.namedChild(i); if (!child) continue; // Don't descend into nested functions/classes or call expressions (prevents // finding constructor args inside method calls, e.g. processAll(new Dog())) if (FUNCTION_NODE_TYPES.has(child.type) || CLASS_CONTAINER_TYPES.has(child.type) || CALL_EXPRESSION_TYPES.has(child.type)) continue; const result = extractConstructorTypeName(child, depth + 1); if (result) return result; } return undefined; }; /** Max depth for MRO parent chain walking. Real-world inheritance rarely exceeds 3-4 levels. */ const MAX_MRO_DEPTH = 5; /** Check if `child` is a subclass of `parent` using the parentMap. * BFS up from child, depth-limited (5), cycle-safe. */ export const isSubclassOf = ( child: string, parent: string, parentMap: ReadonlyMap<string, readonly string[]> | undefined, ): boolean => { if (!parentMap || child === parent) return false; const visited = new Set<string>([child]); let current = [child]; for (let depth = 0; depth < MAX_MRO_DEPTH && current.length > 0; depth++) { const next: string[] = []; for (const cls of current) { const parents = parentMap.get(cls); if (!parents) continue; for (const p of parents) { if (p === parent) return true; if (!visited.has(p)) { visited.add(p); next.push(p); } } } current = next; } return false; }; /** Walk up the parent class chain to find a field or method on an ancestor. * BFS-like traversal with depth limit and cycle detection. First match wins. * Used by resolveFieldType and resolveMethodReturnType when direct lookup fails. */ const walkParentChain = <T>( typeName: string, parentMap: ReadonlyMap<string, readonly string[]> | undefined, getClassDefs: (name: string) => Array<{ nodeId: string; type: string }>, lookupOnClass: (nodeId: string) => T | undefined, ): T | undefined => { if (!parentMap) return undefined; const visited = new Set<string>([typeName]); let current = [typeName]; for (let depth = 0; depth < MAX_MRO_DEPTH && current.length > 0; depth++) { const next: string[] = []; for (const cls of current) { const parents = parentMap.get(cls); if (!parents) continue; for (const parent of parents) { if (visited.has(parent)) continue; visited.add(parent); const parentDefs = getClassDefs(parent); if (parentDefs.length === 1) { const result = lookupOnClass(parentDefs[0].nodeId); if (result !== undefined) return result; } next.push(parent); } } current = next; } return undefined; }; /** Resolve a field's declared type given a receiver variable and field name. * Uses SymbolTable to find the class nodeId for the receiver's type, then * looks up the field via the eagerly-populated fieldByOwner index. * Falls back to MRO parent chain walking if direct lookup fails (Phase 11A). */ const resolveFieldType = ( receiver: string, field: string, scopeEnv: ReadonlyMap<string, string>, symbolTable?: SymbolTable, getClassDefs?: (typeName: string) => Array<{ nodeId: string; type: string }>, parentMap?: ReadonlyMap<string, readonly string[]>, ): string | undefined => { if (!symbolTable) return undefined; const receiverType = scopeEnv.get(receiver); if (!receiverType) return undefined; const lookup = getClassDefs ?? ((name: string) => symbolTable.lookupFuzzy(name).filter(d => CLASS_LIKE_TYPES.has(d.type))); const classDefs = lookup(receiverType); if (classDefs.length !== 1) return undefined; // Direct lookup first const fieldDef = symbolTable.lookupFieldByOwner(classDefs[0].nodeId, field); if (fieldDef?.declaredType) return extractReturnTypeName(fieldDef.declaredType); // MRO parent chain walking on miss const inherited = walkParentChain(receiverType, parentMap, lookup, (nodeId) => { const f = symbolTable.lookupFieldByOwner(nodeId, field); return f?.declaredType ? extractReturnTypeName(f.declaredType) : undefined; }); return inherited; }; /** Resolve a method's return type given a receiver variable and method name. * Uses SymbolTable to find class nodeIds for the receiver's type, then * looks up the method via lookupFuzzyCallable filtered by ownerId. * Falls back to MRO parent chain walking if direct lookup fails (Phase 11A). */ const resolveMethodReturnType = ( receiver: string, method: string, scopeEnv: ReadonlyMap<string, string>, symbolTable?: SymbolTable, getClassDefs?: (typeName: string) => Array<{ nodeId: string; type: string }>, parentMap?: ReadonlyMap<string, readonly string[]>, ): string | undefined => { if (!symbolTable) return undefined; const receiverType = scopeEnv.get(receiver); if (!receiverType) return undefined; const lookup = getClassDefs ?? ((name: string) => symbolTable.lookupFuzzy(name).filter(d => CLASS_LIKE_TYPES.has(d.type))); const classDefs = lookup(receiverType); if (classDefs.length === 0) return undefined; // Direct lookup first const classNodeIds = new Set(classDefs.map(d => d.nodeId)); const methods = symbolTable.lookupFuzzyCallable(method) .filter(d => d.ownerId && classNodeIds.has(d.ownerId)); if (methods.length === 1 && methods[0].returnType) { return extractReturnTypeName(methods[0].returnType); } // MRO parent chain walking on miss if (methods.length === 0) { const inherited = walkParentChain(receiverType, parentMap, lookup, (nodeId) => { const parentMethods = symbolTable.lookupFuzzyCallable(method) .filter(d => d.ownerId === nodeId); if (parentMethods.length !== 1 || !parentMethods[0].returnType) return undefined; return extractReturnTypeName(parentMethods[0].returnType); }); return inherited; } return undefined; }; /** * Unified fixpoint propagation: iterate over ALL pending items (copy, callResult, * fieldAccess, methodCallResult) until no new bindings are produced. * Handles arbitrary-depth mixed chains: * const user = getUser(); // callResult → User * const addr = user.address; // fieldAccess → Address (depends on user) * const city = addr.getCity(); // methodCallResult → City (depends on addr) * const alias = city; // copy → City (depends on city) * Data flow: SymbolTable (immutable) + scopeEnv → resolve → scopeEnv. * Termination: finite entries, each bound at most once (first-writer-wins), max 10 iterations. */ const MAX_FIXPOINT_ITERATIONS = 10; const resolveFixpointBindings = ( pendingItems: Array<{ scope: string } & PendingAssignment>, env: TypeEnv, returnTypeLookup: ReturnTypeLookup, symbolTable?: SymbolTable, parentMap?: ReadonlyMap<string, readonly string[]>, ): void => { if (pendingItems.length === 0) return; const getClassDefs = createClassDefCache(symbolTable); const resolved = new Set<number>(); for (let iter = 0; iter < MAX_FIXPOINT_ITERATIONS; iter++) { let changed = false; for (let i = 0; i < pendingItems.length; i++) { if (resolved.has(i)) continue; const item = pendingItems[i]; const scopeEnv = env.get(item.scope); if (!scopeEnv || scopeEnv.has(item.lhs)) { resolved.add(i); continue; } let typeName: string | undefined; switch (item.kind) { case 'callResult': typeName = returnTypeLookup.lookupReturnType(item.callee); break; case 'copy': typeName = scopeEnv.get(item.rhs) ?? env.get(FILE_SCOPE)?.get(item.rhs); break; case 'fieldAccess': typeName = resolveFieldType(item.receiver, item.field, scopeEnv, symbolTable, getClassDefs, parentMap); break; case 'methodCallResult': typeName = resolveMethodReturnType(item.receiver, item.method, scopeEnv, symbolTable, getClassDefs, parentMap); break; default: { // Exhaustive check: TypeScript will error here if a new PendingAssignment // kind is added without handling it in the switch. const _exhaustive: never = item; break; } } if (typeName) { scopeEnv.set(item.lhs, typeName); resolved.add(i); changed = true; } } if (!changed) break; if (iter === MAX_FIXPOINT_ITERATIONS - 1 && process.env.GITNEXUS_DEBUG) { const unresolved = pendingItems.length - resolved.size; if (unresolved > 0) { console.warn(`[type-env] fixpoint hit iteration cap (${MAX_FIXPOINT_ITERATIONS}), ${unresolved} items unresolved`); } } } }; /** * Options for buildTypeEnv. * Uses an options object to allow future extensions without positional parameter sprawl. */ export interface BuildTypeEnvOptions { symbolTable?: SymbolTable; parentMap?: ReadonlyMap<string, readonly string[]>; } export const buildTypeEnv = ( tree: { rootNode: SyntaxNode }, language: SupportedLanguages, options?: BuildTypeEnvOptions, ): TypeEnvironment => { const symbolTable = options?.symbolTable; const parentMap = options?.parentMap; const env: TypeEnv = new Map(); const patternOverrides: PatternOverrides = new Map(); // Phase P: maps `scope\0varName` → constructor type when a declaration has BOTH // a base type annotation AND a more specific constructor initializer. // e.g., `Animal a = new Dog()` → constructorTypeMap.set('func@42\0a', 'Dog') const constructorTypeMap = new Map<string, string>(); const localClassNames = new Set<string>(); const classNames = createClassNameLookup(localClassNames, symbolTable); const config = typeConfigs[language]; const bindings: ConstructorBinding[] = []; // Build ReturnTypeLookup from optional SymbolTable. // Conservative: returns undefined when callee is ambiguous (0 or 2+ matches). const returnTypeLookup: ReturnTypeLookup = { lookupReturnType(callee: string): string | undefined { if (!symbolTable) return undefined; if (isBuiltInOrNoise(callee)) return undefined; const callables = symbolTable.lookupFuzzyCallable(callee); if (callables.length !== 1) return undefined; const rawReturn = callables[0].returnType; if (!rawReturn) return undefined; return extractReturnTypeName(rawReturn); }, lookupRawReturnType(callee: string): string | undefined { if (!symbolTable) return undefined; if (isBuiltInOrNoise(callee)) return undefined; const callables = symbolTable.lookupFuzzyCallable(callee); if (callables.length !== 1) return undefined; return callables[0].returnType; } }; // Pre-compute combined set of node types that need extractTypeBinding. // Single Set.has() replaces 3 separate checks per node in walk(). const interestingNodeTypes = new Set<string>(); TYPED_PARAMETER_TYPES.forEach(t => interestingNodeTypes.add(t)); config.declarationNodeTypes.forEach(t => interestingNodeTypes.add(t)); config.forLoopNodeTypes?.forEach(t => interestingNodeTypes.add(t)); // Tier 2: unified fixpoint propagation — collects copy, callResult, fieldAccess, and // methodCallResult items during walk(), then iterates until no new bindings are produced. // Handles arbitrary-depth mixed chains: callResult → fieldAccess → methodCallResult → copy. const pendingItems: Array<{ scope: string } & PendingAssignment> = []; // For-loop nodes whose iterable was unresolved at walk-time. Replayed after the fixpoint // resolves the iterable's type, bridging the walk-time/fixpoint gap (Phase 10 / ex-9B). const pendingForLoops: Array<{ node: SyntaxNode; scope: string }> = []; // Maps `scope\0varName` → the type annotation AST node from the original declaration. // Allows pattern extractors to navigate back to the declaration's generic type arguments // (e.g., to extract T from Result<T, E> for `if let Ok(x) = res`). // NOTE: This is a SUPERSET of scopeEnv — entries exist even when extractSimpleTypeName // returns undefined for container types (User[], []User, List[User]). This is intentional: // for-loop Strategy 1 needs the raw AST type node for exactly those container types. const declarationTypeNodes = new Map<string, SyntaxNode>(); /** * Try to extract a (variableName → typeName) binding from a single AST node. * * Resolution tiers (first match wins): * - Tier 0: explicit type annotations via extractDeclaration / extractForLoopBinding * - Tier 1: constructor-call inference via extractInitializer (fallback) * * Side effect: populates declarationTypeNodes for variables that have an explicit * type annotation field on the declaration node. This allows pattern extractors to * retrieve generic type arguments from the original declaration (e.g., extracting T * from Result<T, E> for `if let Ok(x) = res`). */ const extractTypeBinding = (node: SyntaxNode, scopeEnv: Map<string, string>, scope: string): void => { // This guard eliminates 90%+ of calls before any language dispatch. if (TYPED_PARAMETER_TYPES.has(node.type)) { // Capture the raw type annotation BEFORE extractParameter. // Most languages use 'name' field; Rust uses 'pattern'; TS uses 'pattern' for some param types. // Kotlin `parameter` nodes use positional children instead of named fields, // so we fall back to scanning children by type when childForFieldName returns null. let typeNode = node.childForFieldName('type'); if (typeNode) { const nameNode = node.childForFieldName('name') ?? node.childForFieldName('pattern') // Python typed_parameter: name is a positional child (identifier), not a named field ?? (node.firstNamedChild?.type === 'identifier' ? node.firstNamedChild : null); if (nameNode) { const varName = extractVarName(nameNode); if (varName && !declarationTypeNodes.has(`${scope}\0${varName}`)) { declarationTypeNodes.set(`${scope}\0${varName}`, typeNode); } } } else { // Fallback: positional children (Kotlin `parameter` → simple_identifier + user_type) let fallbackName: SyntaxNode | null = null; let fallbackType: SyntaxNode | null = null; for (let i = 0; i < node.namedChildCount; i++) { const child = node.namedChild(i); if (!child) continue; if (!fallbackName && (child.type === 'simple_identifier' || child.type === 'identifier')) { fallbackName = child; } if (!fallbackType && (child.type === 'user_type' || child.type === 'type_identifier' || child.type === 'generic_type' || child.type === 'parameterized_type' || child.type === 'nullable_type')) { fallbackType = child; } } if (fallbackName && fallbackType) { const varName = extractVarName(fallbackName); if (varName && !declarationTypeNodes.has(`${scope}\0${varName}`)) { declarationTypeNodes.set(`${scope}\0${varName}`, fallbackType); } } } config.extractParameter(node, scopeEnv); return; } // For-each loop variable bindings (Java/C#/Kotlin): explicit element types in the AST. // Checked before declarationNodeTypes — loop variables are not declarations. if (config.forLoopNodeTypes?.has(node.type)) { if (config.extractForLoopBinding) { const sizeBefore = scopeEnv.size; const forLoopCtx: ForLoopExtractorContext = { scopeEnv, declarationTypeNodes, scope, returnTypeLookup }; config.extractForLoopBinding(node, forLoopCtx); // If no new binding was produced, the iterable's type may not yet be resolved. // Store for post-fixpoint replay (Phase 10 / ex-9B loop-fixpoint bridge). if (scopeEnv.size === sizeBefore) { pendingForLoops.push({ node, scope }); } } return; } if (config.declarationNodeTypes.has(node.type)) { // Capture the raw type annotation AST node BEFORE extractDeclaration. // This decouples type node capture from scopeEnv success — container types // (User[], []User, List[User]) that fail extractSimpleTypeName still get // their AST type node recorded for Strategy 1 for-loop resolution. // Try direct extraction first (works for Go var_spec, Python assignment, Rust let_declaration). // Try direct type field first, then unwrap wrapper nodes (C# field_declaration, // local_declaration_statement wrap their type inside a variable_declaration child). let typeNode = node.childForFieldName('type'); if (!typeNode) { // C# field_declaration / local_declaration_statement wrap type inside variable_declaration. // Use manual loop instead of namedChildren.find() to avoid array allocation on hot path. let wrapped = node.childForFieldName('declaration'); if (!wrapped) { for (let i = 0; i < node.namedChildCount; i++) { const c = node.namedChild(i); if (c?.type === 'variable_declaration') { wrapped = c; break; } } } if (wrapped) { typeNode = wrapped.childForFieldName('type'); // Kotlin: variable_declaration stores the type as user_type / nullable_type // child rather than a named 'type' field. if (!typeNode) { for (let i = 0; i < wrapped.namedChildCount; i++) { const c = wrapped.namedChild(i); if (c && (c.type === 'user_type' || c.type === 'nullable_type')) { typeNode = c; break; } } } } } if (typeNode) { const nameNode = node.childForFieldName('name') ?? node.childForFieldName('left') ?? node.childForFieldName('pattern'); if (nameNode) { const varName = extractVarName(nameNode); if (varName && !declarationTypeNodes.has(`${scope}\0${varName}`)) { declarationTypeNodes.set(`${scope}\0${varName}`, typeNode); } } } // Run the language-specific declaration extractor (may or may not add to scopeEnv). const sizeBefore = typeNode ? scopeEnv.size : -1; config.extractDeclaration(node, scopeEnv); // Fallback: for multi-declarator languages (TS, C#, Java) where the type field // is on variable_declarator children, capture newly-added keys. // Map preserves insertion order, so new keys are always at the end — // skip the first sizeBefore entries to find only newly-added variables. if (sizeBefore >= 0 && scopeEnv.size > sizeBefore) { let skip = sizeBefore; for (const varName of scopeEnv.keys()) { if (skip > 0) { skip--; continue; } if (!declarationTypeNodes.has(`${scope}\0${varName}`)) { declarationTypeNodes.set(`${scope}\0${varName}`, typeNode); } } } // Tier 1: constructor-call inference as fallback. // Always called when available — each language's extractInitializer // internally skips declarators that already have explicit annotations, // so this handles mixed cases like `const a: A = x, b = new B()`. if (config.extractInitializer) { config.extractInitializer(node, scopeEnv, classNames); } // Phase P: detect constructor-visible virtual dispatch. // When a declaration has BOTH a type annotation AND a constructor initializer, // record the constructor type for receiver override at call resolution time. // e.g., `Animal a = new Dog()` → constructorTypeMap.set('scope\0a', 'Dog') if (sizeBefore >= 0 && scopeEnv.size > sizeBefore) { let ctorSkip = sizeBefore; for (const varName of scopeEnv.keys()) { if (ctorSkip > 0) { ctorSkip--; continue; } const declaredType = scopeEnv.get(varName); if (!declaredType) continue; const ctorType = extractConstructorTypeName(node) ?? config.detectConstructorType?.(node, classNames); if (!ctorType || ctorType === declaredType) continue; // Unwrap wrapper types (e.g., C++ shared_ptr<Animal> → Animal) for an // accurate isSubclassOf comparison. Language-specific via config hook. const declTypeNode = declarationTypeNodes.get(`${scope}\0${varName}`); const effectiveDeclaredType = (declTypeNode && config.unwrapDeclaredType) ? (config.unwrapDeclaredType(declaredType, declTypeNode) ?? declaredType) : declaredType; if (ctorType !== effectiveDeclaredType) { constructorTypeMap.set(`${scope}\0${varName}`, ctorType); } } } } }; const walk = (node: SyntaxNode, currentScope: string): void => { // Fast skip: subtrees that can never contain type-relevant nodes (leaf-like literals). if (SKIP_SUBTREE_TYPES.has(node.type)) return; // Collect class/struct names as we encounter them (used by extractInitializer // to distinguish constructor calls from function calls, e.g. C++ `User()` vs `getUser()`) // Currently only C++ uses this locally; other languages rely on the SymbolTable path. if (CLASS_CONTAINER_TYPES.has(node.type)) { // Most languages use 'name' field; Kotlin uses a type_identifier child instead const nameNode = node.childForFieldName('name') ?? findTypeIdentifierChild(node); if (nameNode) localClassNames.add(nameNode.text); } // Detect scope boundaries (function/method definitions) let scope = currentScope; if (FUNCTION_NODE_TYPES.has(node.type)) { const { funcName } = extractFunctionName(node); if (funcName) scope = `${funcName}@${node.startIndex}`; } // Only create scope map and call extractTypeBinding for interesting node types. // Single Set.has() replaces 3 separate checks inside extractTypeBinding. if (interestingNodeTypes.has(node.type)) { if (!env.has(scope)) env.set(scope, new Map()); const scopeEnv = env.get(scope)!; extractTypeBinding(node, scopeEnv, scope); } // Pattern binding extraction: handles constructs that introduce NEW typed variables // via pattern matching (e.g. `if let Some(x) = opt`, `x instanceof T t`) // or narrow existing variables within a branch (null-check narrowing). // Runs after Tier 0/1 so scopeEnv already contains the source variable's type. // Conservative: extractor returns undefined when source type is unknown. if (config.extractPatternBinding && (!config.patternBindingNodeTypes || config.patternBindingNodeTypes.has(node.type))) { // Ensure scopeEnv exists for pattern binding reads/writes if (!env.has(scope)) env.set(scope, new Map()); const scopeEnv = env.get(scope)!; const patternBinding = config.extractPatternBinding(node, scopeEnv, declarationTypeNodes, scope); if (patternBinding) { if (patternBinding.narrowingRange) { // Explicit narrowing range (null-check narrowing): always store in patternOverrides // using the extractor-provided range (typically the if-body block). if (!patternOverrides.has(scope)) patternOverrides.set(scope, new Map()); const varMap = patternOverrides.get(scope)!; if (!varMap.has(patternBinding.varName)) varMap.set(patternBinding.varName, []); varMap.get(patternBinding.varName)!.push({ rangeStart: patternBinding.narrowingRange.startIndex, rangeEnd: patternBinding.narrowingRange.endIndex, typeName: patternBinding.typeName, }); } else if (config.allowPatternBindingOverwrite) { // Position-indexed: store per-branch binding for smart-cast narrowing. // Each when arm / switch case gets its own type for the variable, // preventing cross-arm contamination (e.g., Kotlin when/is). const branchNode = findNarrowingBranchScope(node); if (branchNode) { if (!patternOverrides.has(scope)) patternOverrides.set(scope, new Map()); const varMap = patternOverrides.get(scope)!; if (!varMap.has(patternBinding.varName)) varMap.set(patternBinding.varName, []); varMap.get(patternBinding.varName)!.push({ rangeStart: branchNode.startIndex, rangeEnd: branchNode.endIndex, typeName: patternBinding.typeName, }); } // Also store in flat scopeEnv as fallback (last arm wins — same as before // for code that doesn't use position-indexed lookup). scopeEnv.set(patternBinding.varName, patternBinding.typeName); } else if (!scopeEnv.has(patternBinding.varName)) { // First-writer-wins for languages without smart-cast overwrite (Java instanceof, etc.) scopeEnv.set(patternBinding.varName, patternBinding.typeName); } } } // Tier 2: collect plain-identifier RHS assignments for post-walk propagation. // Delegates to per-language extractPendingAssignment — AST shapes differ widely // (JS uses variable_declarator/name/value, Rust uses let_declaration/pattern/value, // Python uses assignment/left/right, Go uses short_var_declaration/expression_list). // May return a single item or an array (for destructuring: N fieldAccess items). if (config.extractPendingAssignment && config.declarationNodeTypes.has(node.type)) { // scopeEnv is guaranteed to exist here because declarationNodeTypes is a subset // of interestingNodeTypes, so extractTypeBinding already created the scope map above. const scopeEnv = env.get(scope); if (scopeEnv) { const pending = config.extractPendingAssignment(node, scopeEnv); if (pending) { const items = Array.isArray(pending) ? pending : [pending]; for (const item of items) { // Substitute this/self/$this/Me receivers with enclosing class name const resolved = substituteThisReceiver(item, node); pendingItems.push({ scope, ...resolved }); } } } } // Scan for constructor bindings that couldn't be resolved locally. // Only collect if TypeEnv didn't already resolve this binding. if (config.scanConstructorBinding) { const result = config.scanConstructorBinding(node); if (result) { const scopeEnv = env.get(scope); if (!scopeEnv?.has(result.varName)) { bindings.push({ scope, ...result }); } } } // Recurse into children for (let i = 0; i < node.childCount; i++) { const child = node.child(i); if (child) walk(child, scope); } }; walk(tree.rootNode, FILE_SCOPE); resolveFixpointBindings(pendingItems, env, returnTypeLookup, symbolTable, parentMap); // Post-fixpoint for-loop replay (Phase 10 / ex-9B loop-fixpoint bridge): // For-loop nodes whose iterables were unresolved at walk-time may now be // resolvable because the fixpoint bound the iterable's type. // Example: `const users = getUsers(); for (const u of users) { u.save(); }` // - walk-time: users untyped → u unresolved // - fixpoint: users → User[] // - replay: users now typed → u → User if (pendingForLoops.length > 0 && config.extractForLoopBinding) { for (const { node, scope } of pendingForLoops) { if (!env.has(scope)) env.set(scope, new Map()); const scopeEnv = env.get(scope)!; config.extractForLoopBinding(node, { scopeEnv, declarationTypeNodes, scope, returnTypeLookup }); } // Re-run the main fixpoint to resolve items that depended on loop variables. // Only needed if replay actually produced new bindings. const unresolvedBefore = pendingItems.filter((item) => { const scopeEnv = env.get(item.scope); return scopeEnv && !scopeEnv.has(item.lhs); }); if (unresolvedBefore.length > 0) { resolveFixpointBindings(unresolvedBefore, env, returnTypeLookup, symbolTable); } } return { lookup: (varName, callNode) => lookupInEnv(env, varName, callNode, patternOverrides), constructorBindings: bindings, env, constructorTypeMap, }; }; /** * Unverified constructor binding: a `val x = Callee()` pattern where we * couldn't confirm the callee is a class (because it's defined in another file). * The caller must verify `calleeName` against the SymbolTable before trusting. */ export interface ConstructorBinding { /** Function scope key (matches TypeEnv scope keys) */ scope: string; /** Variable name that received the constructor result */ varName: string; /** Name of the callee (potential class constructor) */ calleeName: string; /** Enclosing class name when callee is a method on a known receiver (e.g. $this) */ receiverClassName?: string; } ================================================ FILE: gitnexus/src/core/ingestion/type-extractors/c-cpp.ts ================================================ import type { SyntaxNode } from '../utils.js'; import type { LanguageTypeConfig, ParameterExtractor, TypeBindingExtractor, InitializerExtractor, ClassNameLookup, ConstructorBindingScanner, PendingAssignmentExtractor, ForLoopExtractor, LiteralTypeInferrer, ConstructorTypeDetector, DeclaredTypeUnwrapper } from './types.js'; import { extractSimpleTypeName, extractVarName, resolveIterableElementType, methodToTypeArgPosition, type TypeArgPosition } from './shared.js'; const DECLARATION_NODE_TYPES: ReadonlySet<string> = new Set([ 'declaration', ]); /** Smart pointer factory function names that create a typed object. */ const SMART_PTR_FACTORIES = new Set([ 'make_shared', 'make_unique', 'make_shared_for_overwrite', ]); /** Smart pointer wrapper type names. When the declared type is a smart pointer, * the inner template type is extracted for virtual dispatch comparison. */ const SMART_PTR_WRAPPERS = new Set(['shared_ptr', 'unique_ptr', 'weak_ptr']); /** Extract the first type name from a template_argument_list child. * Unwraps type_descriptor wrappers common in tree-sitter-cpp ASTs. * Returns undefined if no template arguments or no type found. */ export const extractFirstTemplateTypeArg = (parentNode: SyntaxNode): string | undefined => { const templateArgs = parentNode.children.find((c: any) => c.type === 'template_argument_list'); if (!templateArgs?.firstNamedChild) return undefined; let argNode: any = templateArgs.firstNamedChild; if (argNode.type === 'type_descriptor') { const inner = argNode.childForFieldName('type'); if (inner) argNode = inner; } return extractSimpleTypeName(argNode) ?? undefined; }; /** C++: Type x = ...; Type* x; Type& x; */ const extractDeclaration: TypeBindingExtractor = (node: SyntaxNode, env: Map<string, string>): void => { const typeNode = node.childForFieldName('type'); if (!typeNode) return; const typeName = extractSimpleTypeName(typeNode); if (!typeName) return; const declarator = node.childForFieldName('declarator'); if (!declarator) return; // init_declarator: Type x = value const nameNode = declarator.type === 'init_declarator' ? declarator.childForFieldName('declarator') : declarator; if (!nameNode) return; // Handle pointer/reference declarators const finalName = nameNode.type === 'pointer_declarator' || nameNode.type === 'reference_declarator' ? nameNode.firstNamedChild : nameNode; if (!finalName) return; const varName = extractVarName(finalName); if (varName) env.set(varName, typeName); }; /** C++: auto x = new User(); auto x = User(); */ const extractInitializer: InitializerExtractor = (node: SyntaxNode, env: Map<string, string>, classNames: ClassNameLookup): void => { const typeNode = node.childForFieldName('type'); if (!typeNode) return; // Only handle auto/placeholder — typed declarations are handled by extractDeclaration const typeText = typeNode.text; if ( typeText !== 'auto' && typeText !== 'decltype(auto)' && typeNode.type !== 'placeholder_type_specifier' ) return; const declarator = node.childForFieldName('declarator'); if (!declarator) return; // Must be an init_declarator (i.e., has an initializer value) if (declarator.type !== 'init_declarator') return; const value = declarator.childForFieldName('value'); if (!value) return; // Resolve the variable name, unwrapping pointer/reference declarators const nameNode = declarator.childForFieldName('declarator'); if (!nameNode) return; const finalName = nameNode.type === 'pointer_declarator' || nameNode.type === 'reference_declarator' ? nameNode.firstNamedChild : nameNode; if (!finalName) return; const varName = extractVarName(finalName); if (!varName) return; // auto x = new User() — new_expression if (value.type === 'new_expression') { const ctorType = value.childForFieldName('type'); if (ctorType) { const typeName = extractSimpleTypeName(ctorType); if (typeName) env.set(varName, typeName); } return; } // auto x = User() — call_expression where function is a type name // tree-sitter-cpp may parse the constructor name as type_identifier or identifier. // For plain identifiers, verify against known class names from the file's AST // to distinguish constructor calls (User()) from function calls (getUser()). if (value.type === 'call_expression') { const func = value.childForFieldName('function'); if (!func) return; if (func.type === 'type_identifier') { const typeName = func.text; if (typeName) env.set(varName, typeName); } else if (func.type === 'identifier') { const text = func.text; if (text && classNames.has(text)) env.set(varName, text); } else { // auto x = std::make_shared<Dog>() — smart pointer factory via template_function. // AST: call_expression > function: qualified_identifier > template_function // or: call_expression > function: template_function (unqualified) const templateFunc = func.type === 'template_function' ? func : (func.type === 'qualified_identifier' || func.type === 'scoped_identifier') ? func.namedChildren.find((c: any) => c.type === 'template_function') ?? null : null; if (templateFunc) { const nameNode = templateFunc.firstNamedChild; if (nameNode) { const funcName = (nameNode.type === 'qualified_identifier' || nameNode.type === 'scoped_identifier') ? nameNode.lastNamedChild?.text ?? '' : nameNode.text; if (SMART_PTR_FACTORIES.has(funcName)) { const typeName = extractFirstTemplateTypeArg(templateFunc); if (typeName) env.set(varName, typeName); } } } } return; } // auto x = User{} — compound_literal_expression (brace initialization) // AST: compound_literal_expression > type_identifier + initializer_list if (value.type === 'compound_literal_expression') { const typeId = value.firstNamedChild; const typeName = typeId ? extractSimpleTypeName(typeId) : undefined; if (typeName) env.set(varName, typeName); } }; /** C/C++: parameter_declaration → type declarator */ const extractParameter: ParameterExtractor = (node: SyntaxNode, env: Map<string, string>): void => { let nameNode: SyntaxNode | null = null; let typeNode: SyntaxNode | null = null; if (node.type === 'parameter_declaration') { typeNode = node.childForFieldName('type'); const declarator = node.childForFieldName('declarator'); if (declarator) { nameNode = declarator.type === 'pointer_declarator' || declarator.type === 'reference_declarator' ? declarator.firstNamedChild : declarator; } } else { nameNode = node.childForFieldName('name') ?? node.childForFieldName('pattern'); typeNode = node.childForFieldName('type'); } if (!nameNode || !typeNode) return; const varName = extractVarName(nameNode); const typeName = extractSimpleTypeName(typeNode); if (varName && typeName) env.set(varName, typeName); }; /** C/C++: auto x = User() where function is an identifier (not type_identifier) */ const scanConstructorBinding: ConstructorBindingScanner = (node) => { if (node.type !== 'declaration') return undefined; const typeNode = node.childForFieldName('type'); if (!typeNode) return undefined; const typeText = typeNode.text; if (typeText !== 'auto' && typeText !== 'decltype(auto)' && typeNode.type !== 'placeholder_type_specifier') return undefined; const declarator = node.childForFieldName('declarator'); if (!declarator || declarator.type !== 'init_declarator') return undefined; const value = declarator.childForFieldName('value'); if (!value || value.type !== 'call_expression') return undefined; const func = value.childForFieldName('function'); if (!func) return undefined; if (func.type === 'qualified_identifier' || func.type === 'scoped_identifier') { const last = func.lastNamedChild; if (!last) return undefined; const nameNode = declarator.childForFieldName('declarator'); if (!nameNode) return undefined; const finalName = nameNode.type === 'pointer_declarator' || nameNode.type === 'reference_declarator' ? nameNode.firstNamedChild : nameNode; if (!finalName) return undefined; return { varName: finalName.text, calleeName: last.text }; } if (func.type !== 'identifier') return undefined; const nameNode = declarator.childForFieldName('declarator'); if (!nameNode) return undefined; const finalName = nameNode.type === 'pointer_declarator' || nameNode.type === 'reference_declarator' ? nameNode.firstNamedChild : nameNode; if (!finalName) return undefined; const varName = finalName.text; if (!varName) return undefined; return { varName, calleeName: func.text }; }; /** C++: auto alias = user → declaration with auto type + init_declarator where value is identifier */ const extractPendingAssignment: PendingAssignmentExtractor = (node, scopeEnv) => { if (node.type !== 'declaration') return undefined; const typeNode = node.childForFieldName('type'); if (!typeNode) return undefined; // Only handle auto — typed declarations already resolved by extractDeclaration const typeText = typeNode.text; if (typeText !== 'auto' && typeText !== 'decltype(auto)' && typeNode.type !== 'placeholder_type_specifier') return undefined; const declarator = node.childForFieldName('declarator'); if (!declarator || declarator.type !== 'init_declarator') return undefined; const value = declarator.childForFieldName('value'); if (!value) return undefined; const nameNode = declarator.childForFieldName('declarator'); if (!nameNode) return undefined; const finalName = nameNode.type === 'pointer_declarator' || nameNode.type === 'reference_declarator' ? nameNode.firstNamedChild : nameNode; if (!finalName) return undefined; const lhs = extractVarName(finalName); if (!lhs || scopeEnv.has(lhs)) return undefined; if (value.type === 'identifier') return { kind: 'copy', lhs, rhs: value.text }; // field_expression RHS → fieldAccess (a.field) if (value.type === 'field_expression') { const obj = value.firstNamedChild; const field = value.lastNamedChild; if (obj?.type === 'identifier' && field?.type === 'field_identifier') { return { kind: 'fieldAccess', lhs, receiver: obj.text, field: field.text }; } } // call_expression RHS if (value.type === 'call_expression') { const funcNode = value.childForFieldName('function'); if (funcNode?.type === 'identifier') { return { kind: 'callResult', lhs, callee: funcNode.text }; } // method call with receiver: call_expression → function: field_expression if (funcNode?.type === 'field_expression') { const obj = funcNode.firstNamedChild; const field = funcNode.lastNamedChild; if (obj?.type === 'identifier' && field?.type === 'field_identifier') { return { kind: 'methodCallResult', lhs, receiver: obj.text, method: field.text }; } } } return undefined; }; // --- For-loop Tier 1c --- const FOR_LOOP_NODE_TYPES: ReadonlySet<string> = new Set(['for_range_loop']); /** Extract template type arguments from a C++ template_type node. * C++ template_type uses template_argument_list (not type_arguments), and each * argument is a type_descriptor with a 'type' field containing the type_specifier. */ const extractCppTemplateTypeArgs = (templateTypeNode: SyntaxNode): string[] => { const argsNode = templateTypeNode.childForFieldName('arguments'); if (!argsNode || argsNode.type !== 'template_argument_list') return []; const result: string[] = []; for (let i = 0; i < argsNode.namedChildCount; i++) { let argNode = argsNode.namedChild(i); if (!argNode) continue; // type_descriptor wraps the actual type specifier in a 'type' field if (argNode.type === 'type_descriptor') { const inner = argNode.childForFieldName('type'); if (inner) argNode = inner; } const name = extractSimpleTypeName(argNode); if (name) result.push(name); } return result; }; /** Extract element type from a C++ type annotation AST node. * Handles: template_type (vector<User>, map<string, User>), * pointer/reference types (User*, User&). */ const extractCppElementTypeFromTypeNode = (typeNode: SyntaxNode, pos: TypeArgPosition = 'last', depth = 0): string | undefined => { if (depth > 50) return undefined; // template_type: vector<User>, map<string, User> — extract type arg based on position if (typeNode.type === 'template_type') { const args = extractCppTemplateTypeArgs(typeNode); if (args.length >= 1) return pos === 'first' ? args[0] : args[args.length - 1]; } // reference/pointer types: unwrap and recurse (vector<User>& → vector<User>) if (typeNode.type === 'reference_type' || typeNode.type === 'pointer_type' || typeNode.type === 'type_descriptor') { const inner = typeNode.lastNamedChild; if (inner) return extractCppElementTypeFromTypeNode(inner, pos, depth + 1); } // qualified/scoped types: std::vector<User> → unwrap to template_type child if (typeNode.type === 'qualified_identifier' || typeNode.type === 'scoped_type_identifier') { const inner = typeNode.lastNamedChild; if (inner) return extractCppElementTypeFromTypeNode(inner, pos, depth + 1); } return undefined; }; /** Walk up from a for-range-loop to the enclosing function_definition and search parameters * for one named `iterableName`. Returns the element type from its annotation. */ const findCppParamElementType = (iterableName: string, startNode: SyntaxNode, pos: TypeArgPosition = 'last'): string | undefined => { let current: SyntaxNode | null = startNode.parent; while (current) { if (current.type === 'function_definition') { const declarator = current.childForFieldName('declarator'); // function_definition > declarator (function_declarator) > parameters (parameter_list) const paramsNode = declarator?.childForFieldName('parameters'); if (paramsNode) { for (let i = 0; i < paramsNode.namedChildCount; i++) { const param = paramsNode.namedChild(i); if (!param || param.type !== 'parameter_declaration') continue; const paramDeclarator = param.childForFieldName('declarator'); if (!paramDeclarator) continue; // Unwrap reference/pointer declarators: vector<User>& users → &users let identNode = paramDeclarator; if (identNode.type === 'reference_declarator' || identNode.type === 'pointer_declarator') { identNode = identNode.firstNamedChild ?? identNode; } if (identNode.text !== iterableName) continue; const typeNode = param.childForFieldName('type'); if (typeNode) return extractCppElementTypeFromTypeNode(typeNode, pos); } } break; } current = current.parent; } return undefined; }; /** C++: for (auto& user : users) — extract loop variable binding. * Handles explicit types (for (User& user : users)) and auto (for (auto& user : users)). * For auto, resolves element type from the iterable's container type. */ const extractForLoopBinding: ForLoopExtractor = (node, { scopeEnv, declarationTypeNodes, scope } ): void => { if (node.type !== 'for_range_loop') return; const typeNode = node.childForFieldName('type'); const declaratorNode = node.childForFieldName('declarator'); const rightNode = node.childForFieldName('right'); if (!typeNode || !declaratorNode || !rightNode) return; // Unwrap reference/pointer declarator to get the loop variable name let nameNode = declaratorNode; if (nameNode.type === 'reference_declarator' || nameNode.type === 'pointer_declarator') { nameNode = nameNode.firstNamedChild ?? nameNode; } // Handle structured bindings: auto& [key, value] or auto [key, value] // Bind the last identifier (value heuristic for [key, value] patterns) let loopVarName: string | undefined; if (nameNode.type === 'structured_binding_declarator') { const lastChild = nameNode.lastNamedChild; if (lastChild?.type === 'identifier') { loopVarName = lastChild.text; } } else if (declaratorNode.type === 'structured_binding_declarator') { const lastChild = declaratorNode.lastNamedChild; if (lastChild?.type === 'identifier') { loopVarName = lastChild.text; } } const varName = loopVarName ?? extractVarName(nameNode); if (!varName) return; // Check if the type is auto/placeholder — if not, use the explicit type directly const isAuto = typeNode.type === 'placeholder_type_specifier' || typeNode.text === 'auto' || typeNode.text === 'const auto' || typeNode.text === 'decltype(auto)'; if (!isAuto) { // Explicit type: for (User& user : users) — extract directly const typeName = extractSimpleTypeName(typeNode); if (typeName) scopeEnv.set(varName, typeName); return; } // auto/const auto/auto& — resolve from the iterable's container type // Extract iterable name + optional method let iterableName: string | undefined; let methodName: string | undefined; if (rightNode.type === 'identifier') { iterableName = rightNode.text; } else if (rightNode.type === 'field_expression') { const prop = rightNode.lastNamedChild; if (prop) iterableName = prop.text; } else if (rightNode.type === 'call_expression') { // users.begin() is NOT used in range-for, but container.items() etc. might be const fieldExpr = rightNode.childForFieldName('function'); if (fieldExpr?.type === 'field_expression') { const obj = fieldExpr.firstNamedChild; if (obj?.type === 'identifier') iterableName = obj.text; const field = fieldExpr.lastNamedChild; if (field?.type === 'field_identifier') methodName = field.text; } } else if (rightNode.type === 'pointer_expression') { // Dereference: for (auto& user : *ptr) → pointer_expression > identifier // Only handles simple *identifier; *this->field and **ptr are not resolved. const operand = rightNode.lastNamedChild; if (operand?.type === 'identifier') iterableName = operand.text; } if (!iterableName) return; const containerTypeName = scopeEnv.get(iterableName); const typeArgPos = methodToTypeArgPosition(methodName, containerTypeName); const elementType = resolveIterableElementType( iterableName, node, scopeEnv, declarationTypeNodes, scope, extractCppElementTypeFromTypeNode, findCppParamElementType, typeArgPos, ); if (elementType) scopeEnv.set(varName, elementType); }; /** Infer the type of a literal AST node for C++ overload disambiguation. */ const inferLiteralType: LiteralTypeInferrer = (node) => { switch (node.type) { case 'number_literal': { const t = node.text; // Float suffixes if (t.endsWith('f') || t.endsWith('F')) return 'float'; if (t.includes('.') || t.includes('e') || t.includes('E')) return 'double'; // Long suffix if (t.endsWith('L') || t.endsWith('l') || t.endsWith('LL') || t.endsWith('ll')) return 'long'; return 'int'; } case 'string_literal': case 'raw_string_literal': case 'concatenated_string': return 'string'; case 'char_literal': return 'char'; case 'true': case 'false': return 'bool'; case 'null': case 'nullptr': return 'null'; default: return undefined; } }; /** C++: detect constructor type from smart pointer factory calls (make_shared<Dog>()). * Extracts the template type argument as the constructor type for virtual dispatch. */ const detectCppConstructorType: ConstructorTypeDetector = (node, classNames) => { // Navigate to the initializer value in the declaration const declarator = node.childForFieldName('declarator'); const initDecl = declarator?.type === 'init_declarator' ? declarator : undefined; if (!initDecl) return undefined; const value = initDecl.childForFieldName('value'); if (!value || value.type !== 'call_expression') return undefined; // Check for template_function pattern: make_shared<Dog>() const func = value.childForFieldName('function'); if (!func || func.type !== 'template_function') return undefined; // Extract function name (possibly qualified: std::make_shared) const nameNode = func.firstNamedChild; if (!nameNode) return undefined; let funcName: string; if (nameNode.type === 'qualified_identifier' || nameNode.type === 'scoped_identifier') { funcName = nameNode.lastNamedChild?.text ?? ''; } else { funcName = nameNode.text; } if (!SMART_PTR_FACTORIES.has(funcName)) return undefined; // Extract template type argument return extractFirstTemplateTypeArg(func); }; /** Unwrap a C++ smart pointer declared type to its inner template type. * E.g., shared_ptr<Animal> → Animal. Returns the original name if not a smart pointer. */ const unwrapCppDeclaredType: DeclaredTypeUnwrapper = (declaredType, typeNode) => { if (!SMART_PTR_WRAPPERS.has(declaredType)) return declaredType; if (typeNode.type !== 'template_type') return declaredType; return extractFirstTemplateTypeArg(typeNode) ?? declaredType; }; export const typeConfig: LanguageTypeConfig = { declarationNodeTypes: DECLARATION_NODE_TYPES, forLoopNodeTypes: FOR_LOOP_NODE_TYPES, extractDeclaration, extractParameter, extractInitializer, scanConstructorBinding, extractForLoopBinding, extractPendingAssignment, inferLiteralType, detectConstructorType: detectCppConstructorType, unwrapDeclaredType: unwrapCppDeclaredType, }; ================================================ FILE: gitnexus/src/core/ingestion/type-extractors/csharp.ts ================================================ import type { SyntaxNode } from '../utils.js'; import type { ConstructorBindingScanner, ForLoopExtractor, LanguageTypeConfig, ParameterExtractor, TypeBindingExtractor, PendingAssignmentExtractor, PatternBindingExtractor, LiteralTypeInferrer } from './types.js'; import { extractSimpleTypeName, extractVarName, findChildByType, unwrapAwait, extractGenericTypeArgs, resolveIterableElementType, methodToTypeArgPosition, extractElementTypeFromString, type TypeArgPosition } from './shared.js'; /** Known container property accessors that operate on the container itself (e.g., dict.Keys, dict.Values) */ const KNOWN_CONTAINER_PROPS: ReadonlySet<string> = new Set(['Keys', 'Values']); const DECLARATION_NODE_TYPES: ReadonlySet<string> = new Set([ 'local_declaration_statement', 'variable_declaration', 'field_declaration', ]); /** C#: Type x = ...; var x = new Type(); */ const extractDeclaration: TypeBindingExtractor = (node: SyntaxNode, env: Map<string, string>): void => { // C# tree-sitter: local_declaration_statement > variable_declaration > ... // Recursively descend through wrapper nodes for (let i = 0; i < node.namedChildCount; i++) { const child = node.namedChild(i); if (!child) continue; if (child.type === 'variable_declaration' || child.type === 'local_declaration_statement') { extractDeclaration(child, env); return; } } // At variable_declaration level: first child is type, rest are variable_declarators let typeNode: SyntaxNode | null = null; const declarators: SyntaxNode[] = []; for (let i = 0; i < node.namedChildCount; i++) { const child = node.namedChild(i); if (!child) continue; if (!typeNode && child.type !== 'variable_declarator' && child.type !== 'equals_value_clause') { // First non-declarator child is the type (identifier, implicit_type, generic_name, etc.) typeNode = child; } if (child.type === 'variable_declarator') { declarators.push(child); } } if (!typeNode || declarators.length === 0) return; // Handle 'var x = new Foo()' — infer from object_creation_expression let typeName: string | undefined; if (typeNode.type === 'implicit_type' && typeNode.text === 'var') { // Try to infer from initializer: var x = new Foo() // tree-sitter-c-sharp may put object_creation_expression as direct child // or inside equals_value_clause depending on grammar version if (declarators.length === 1) { const initializer = findChildByType(declarators[0], 'object_creation_expression') ?? findChildByType(declarators[0], 'equals_value_clause')?.firstNamedChild; if (initializer?.type === 'object_creation_expression') { const ctorType = initializer.childForFieldName('type'); if (ctorType) typeName = extractSimpleTypeName(ctorType); } } } else { typeName = extractSimpleTypeName(typeNode); } if (!typeName) return; for (const decl of declarators) { const nameNode = decl.childForFieldName('name') ?? decl.firstNamedChild; if (nameNode) { const varName = extractVarName(nameNode); if (varName) env.set(varName, typeName); } } }; /** C#: parameter → type name */ const extractParameter: ParameterExtractor = (node: SyntaxNode, env: Map<string, string>): void => { let nameNode: SyntaxNode | null = null; let typeNode: SyntaxNode | null = null; if (node.type === 'parameter') { typeNode = node.childForFieldName('type'); nameNode = node.childForFieldName('name'); } else { nameNode = node.childForFieldName('name') ?? node.childForFieldName('pattern'); typeNode = node.childForFieldName('type'); } if (!nameNode || !typeNode) return; const varName = extractVarName(nameNode); const typeName = extractSimpleTypeName(typeNode); if (varName && typeName) env.set(varName, typeName); }; /** C#: var x = SomeFactory(...) → bind x to SomeFactory (constructor-like call) */ const scanConstructorBinding: ConstructorBindingScanner = (node) => { if (node.type !== 'variable_declaration') return undefined; // Find type and declarator children by iterating (C# grammar doesn't expose 'type' as a named field) let typeNode: SyntaxNode | null = null; let declarator: SyntaxNode | null = null; for (let i = 0; i < node.namedChildCount; i++) { const child = node.namedChild(i); if (!child) continue; if (child.type === 'variable_declarator') { if (!declarator) declarator = child; } else if (!typeNode) { typeNode = child; } } // Only handle implicit_type (var) — explicit types handled by extractDeclaration if (!typeNode || typeNode.type !== 'implicit_type') return undefined; if (!declarator) return undefined; const nameNode = declarator.childForFieldName('name') ?? declarator.firstNamedChild; if (!nameNode || nameNode.type !== 'identifier') return undefined; // Find the initializer value: either inside equals_value_clause or as a direct child // (tree-sitter-c-sharp puts invocation_expression directly inside variable_declarator) let value: SyntaxNode | null = null; for (let i = 0; i < declarator.namedChildCount; i++) { const child = declarator.namedChild(i); if (!child) continue; if (child.type === 'equals_value_clause') { value = child.firstNamedChild; break; } if (child.type === 'invocation_expression' || child.type === 'object_creation_expression' || child.type === 'await_expression') { value = child; break; } } if (!value) return undefined; // Unwrap await: `var user = await svc.GetUserAsync()` → await_expression wraps invocation_expression value = unwrapAwait(value); if (!value) return undefined; // Skip object_creation_expression (new User()) — handled by extractInitializer if (value.type === 'object_creation_expression') return undefined; if (value.type !== 'invocation_expression') return undefined; const func = value.firstNamedChild; if (!func) return undefined; const calleeName = extractSimpleTypeName(func); if (!calleeName) return undefined; return { varName: nameNode.text, calleeName }; }; const FOR_LOOP_NODE_TYPES: ReadonlySet<string> = new Set([ 'foreach_statement', ]); /** Extract element type from a C# type annotation AST node. * Handles generic_name (List<User>), array_type (User[]), nullable_type (?). * `pos` selects which type arg: 'first' for keys, 'last' for values (default). */ const extractCSharpElementTypeFromTypeNode = (typeNode: SyntaxNode, pos: TypeArgPosition = 'last', depth = 0): string | undefined => { if (depth > 50) return undefined; // generic_name: List<User>, IEnumerable<User>, Dictionary<string, User> // C# uses generic_name (not generic_type) if (typeNode.type === 'generic_name') { const argList = findChildByType(typeNode, 'type_argument_list'); if (argList && argList.namedChildCount >= 1) { if (pos === 'first') { const firstArg = argList.namedChild(0); if (firstArg) return extractSimpleTypeName(firstArg); } else { const lastArg = argList.namedChild(argList.namedChildCount - 1); if (lastArg) return extractSimpleTypeName(lastArg); } } } // array_type: User[] if (typeNode.type === 'array_type') { const elemNode = typeNode.firstNamedChild; if (elemNode) return extractSimpleTypeName(elemNode); } // nullable_type: unwrap and recurse (List<User>? → List<User> → User) if (typeNode.type === 'nullable_type') { const inner = typeNode.firstNamedChild; if (inner) return extractCSharpElementTypeFromTypeNode(inner, pos, depth + 1); } return undefined; }; /** Walk up from a foreach to the enclosing method and search parameters. */ const findCSharpParamElementType = (iterableName: string, startNode: SyntaxNode, pos: TypeArgPosition = 'last'): string | undefined => { let current: SyntaxNode | null = startNode.parent; while (current) { if (current.type === 'method_declaration' || current.type === 'local_function_statement') { const paramsNode = current.childForFieldName('parameters'); if (paramsNode) { for (let i = 0; i < paramsNode.namedChildCount; i++) { const param = paramsNode.namedChild(i); if (!param || param.type !== 'parameter') continue; const nameNode = param.childForFieldName('name'); if (nameNode?.text !== iterableName) continue; const typeNode = param.childForFieldName('type'); if (typeNode) return extractCSharpElementTypeFromTypeNode(typeNode, pos); } } break; } current = current.parent; } return undefined; }; /** C#: foreach (User user in users) — extract loop variable binding. * Tier 1c: for `foreach (var user in users)`, resolves element type from iterable. */ const extractForLoopBinding: ForLoopExtractor = (node, { scopeEnv, declarationTypeNodes, scope, returnTypeLookup }): void => { const typeNode = node.childForFieldName('type'); const nameNode = node.childForFieldName('left'); if (!typeNode || !nameNode) return; const varName = extractVarName(nameNode); if (!varName) return; // Explicit type (existing behavior): foreach (User user in users) if (!(typeNode.type === 'implicit_type' && typeNode.text === 'var')) { const typeName = extractSimpleTypeName(typeNode); if (typeName) scopeEnv.set(varName, typeName); return; } // Tier 1c: implicit type (var) — resolve from iterable's container type const rightNode = node.childForFieldName('right'); let iterableName: string | undefined; let methodName: string | undefined; let callExprElementType: string | undefined; if (rightNode?.type === 'identifier') { iterableName = rightNode.text; } else if (rightNode?.type === 'member_access_expression') { // C# property access: data.Keys, data.Values → member_access_expression // Also handles bare member access: this.users, repo.users → use property as iterableName const obj = rightNode.childForFieldName('expression'); const prop = rightNode.childForFieldName('name'); const propText = prop?.type === 'identifier' ? prop.text : undefined; if (propText && KNOWN_CONTAINER_PROPS.has(propText)) { if (obj?.type === 'identifier') { iterableName = obj.text; } else if (obj?.type === 'member_access_expression') { // Nested member access: this.data.Values → obj is "this.data", extract "data" const innerProp = obj.childForFieldName('name'); if (innerProp) iterableName = innerProp.text; } methodName = propText; } else if (propText) { // Bare member access: this.users → use property name for scopeEnv lookup iterableName = propText; } } else if (rightNode?.type === 'invocation_expression') { // C# method call: data.Select(...) → invocation_expression > member_access_expression // Direct function call: GetUsers() → invocation_expression > identifier const fn = rightNode.firstNamedChild; if (fn?.type === 'member_access_expression') { const obj = fn.childForFieldName('expression'); const prop = fn.childForFieldName('name'); if (obj?.type === 'identifier') iterableName = obj.text; if (prop?.type === 'identifier') methodName = prop.text; } else if (fn?.type === 'identifier') { // Direct function call: foreach (var u in GetUsers()) const rawReturn = returnTypeLookup.lookupRawReturnType(fn.text); if (rawReturn) callExprElementType = extractElementTypeFromString(rawReturn); } } if (!iterableName && !callExprElementType) return; let elementType: string | undefined; if (callExprElementType) { elementType = callExprElementType; } else { const containerTypeName = scopeEnv.get(iterableName!); const typeArgPos = methodToTypeArgPosition(methodName, containerTypeName); elementType = resolveIterableElementType( iterableName!, node, scopeEnv, declarationTypeNodes, scope, extractCSharpElementTypeFromTypeNode, findCSharpParamElementType, typeArgPos, ); } if (elementType) scopeEnv.set(varName, elementType); }; /** * C# pattern binding extractor for `obj is Type variable` (type pattern). * * AST structure: * is_pattern_expression * expression: (the variable being tested) * pattern: declaration_pattern * type: (the declared type) * name: single_variable_designation > identifier (the new variable name) * * Conservative: returns undefined when the pattern field is absent, is not a * declaration_pattern, or when the type/name cannot be extracted. * No scopeEnv lookup is needed — the pattern explicitly declares the new variable's type. */ /** * Find the if-body (consequence) block for a C# null-check. * Walks up from the expression to find the enclosing if_statement, * then returns its first block child (the truthy branch body). */ const findCSharpIfConsequenceBlock = (expr: SyntaxNode): SyntaxNode | undefined => { let current = expr.parent; while (current) { if (current.type === 'if_statement') { // C# if_statement consequence is the 'consequence' field or first block child const consequence = current.childForFieldName('consequence'); if (consequence) return consequence; for (let i = 0; i < current.childCount; i++) { const child = current.child(i); if (child?.type === 'block') return child; } return undefined; } if (current.type === 'block' || current.type === 'method_declaration' || current.type === 'constructor_declaration' || current.type === 'local_function_statement' || current.type === 'lambda_expression') return undefined; current = current.parent; } return undefined; }; /** Check if a C# declaration type node represents a nullable type. * Checks for nullable_type AST node or '?' in the type text (e.g., User?). */ const isCSharpNullableDecl = (declTypeNode: SyntaxNode): boolean => { if (declTypeNode.type === 'nullable_type') return true; return declTypeNode.text.includes('?'); }; const extractPatternBinding: PatternBindingExtractor = (node, scopeEnv, declarationTypeNodes, scope) => { // is_pattern_expression: `obj is User user` — has a declaration_pattern child // Also handles `x is not null` for null-check narrowing if (node.type === 'is_pattern_expression') { const pattern = node.childForFieldName('pattern'); if (!pattern) return undefined; // Standard type pattern: `obj is User user` if (pattern.type === 'declaration_pattern' || pattern.type === 'recursive_pattern') { const typeNode = pattern.childForFieldName('type'); const nameNode = pattern.childForFieldName('name'); if (!typeNode || !nameNode) return undefined; const typeName = extractSimpleTypeName(typeNode); const varName = extractVarName(nameNode); if (!typeName || !varName) return undefined; return { varName, typeName }; } // Null-check: `x is not null` — negated_pattern > constant_pattern > null_literal if (pattern.type === 'negated_pattern') { const inner = pattern.firstNamedChild; if (inner?.type === 'constant_pattern') { const literal = inner.firstNamedChild ?? inner.firstChild; if (literal?.type === 'null_literal' || literal?.text === 'null') { const expr = node.childForFieldName('expression'); if (!expr || expr.type !== 'identifier') return undefined; const varName = expr.text; const resolvedType = scopeEnv.get(varName); if (!resolvedType) return undefined; // Verify the original declaration was nullable const declTypeNode = declarationTypeNodes.get(`${scope}\0${varName}`); if (!declTypeNode || !isCSharpNullableDecl(declTypeNode)) return undefined; const ifBody = findCSharpIfConsequenceBlock(node); if (!ifBody) return undefined; return { varName, typeName: resolvedType, narrowingRange: { startIndex: ifBody.startIndex, endIndex: ifBody.endIndex }, }; } } } return undefined; } // declaration_pattern / recursive_pattern: standalone in switch statements and switch expressions // `case User u:` or `User u =>` or `User { Name: "Alice" } u =>` // Both use the same 'type' and 'name' fields. if (node.type === 'declaration_pattern' || node.type === 'recursive_pattern') { const typeNode = node.childForFieldName('type'); const nameNode = node.childForFieldName('name'); if (!typeNode || !nameNode) return undefined; const typeName = extractSimpleTypeName(typeNode); const varName = extractVarName(nameNode); if (!typeName || !varName) return undefined; return { varName, typeName }; } // Null-check: `x != null` — binary_expression with != operator if (node.type === 'binary_expression') { const op = node.children.find(c => !c.isNamed && c.text === '!='); if (!op) return undefined; const left = node.namedChild(0); const right = node.namedChild(1); if (!left || !right) return undefined; let varNode: SyntaxNode | undefined; if (left.type === 'identifier' && (right.type === 'null_literal' || right.text === 'null')) { varNode = left; } else if (right.type === 'identifier' && (left.type === 'null_literal' || left.text === 'null')) { varNode = right; } if (!varNode) return undefined; const varName = varNode.text; const resolvedType = scopeEnv.get(varName); if (!resolvedType) return undefined; // Verify the original declaration was nullable const declTypeNode = declarationTypeNodes.get(`${scope}\0${varName}`); if (!declTypeNode || !isCSharpNullableDecl(declTypeNode)) return undefined; const ifBody = findCSharpIfConsequenceBlock(node); if (!ifBody) return undefined; return { varName, typeName: resolvedType, narrowingRange: { startIndex: ifBody.startIndex, endIndex: ifBody.endIndex }, }; } return undefined; }; /** C#: var alias = u → variable_declarator with name + equals_value_clause. * Only local_declaration_statement and variable_declaration contain variable_declarator children; * is_pattern_expression and field_declaration never do — skip them early. */ const extractPendingAssignment: PendingAssignmentExtractor = (node, scopeEnv) => { if (node.type === 'is_pattern_expression' || node.type === 'field_declaration') return undefined; for (let i = 0; i < node.namedChildCount; i++) { const child = node.namedChild(i); if (!child || child.type !== 'variable_declarator') continue; const nameNode = child.childForFieldName('name'); if (!nameNode) continue; const lhs = nameNode.text; if (scopeEnv.has(lhs)) continue; // C# wraps value in equals_value_clause; fall back to last named child let evc: SyntaxNode | null = null; for (let j = 0; j < child.childCount; j++) { if (child.child(j)?.type === 'equals_value_clause') { evc = child.child(j); break; } } const valueNode = evc?.firstNamedChild ?? child.namedChild(child.namedChildCount - 1); if (valueNode && valueNode !== nameNode && (valueNode.type === 'identifier' || valueNode.type === 'simple_identifier')) { return { kind: 'copy', lhs, rhs: valueNode.text }; } // member_access_expression RHS → fieldAccess (a.Field) if (valueNode?.type === 'member_access_expression') { const expr = valueNode.childForFieldName('expression'); const name = valueNode.childForFieldName('name'); if (expr?.type === 'identifier' && name?.type === 'identifier') { return { kind: 'fieldAccess', lhs, receiver: expr.text, field: name.text }; } } // invocation_expression RHS if (valueNode?.type === 'invocation_expression') { const funcNode = valueNode.firstNamedChild; if (funcNode?.type === 'identifier_name' || funcNode?.type === 'identifier') { return { kind: 'callResult', lhs, callee: funcNode.text }; } // method call with receiver → methodCallResult: a.GetC() if (funcNode?.type === 'member_access_expression') { const expr = funcNode.childForFieldName('expression'); const name = funcNode.childForFieldName('name'); if (expr?.type === 'identifier' && name?.type === 'identifier') { return { kind: 'methodCallResult', lhs, receiver: expr.text, method: name.text }; } } } // await_expression → unwrap and check inner if (valueNode?.type === 'await_expression') { const inner = valueNode.firstNamedChild; if (inner?.type === 'invocation_expression') { const funcNode = inner.firstNamedChild; if (funcNode?.type === 'identifier_name' || funcNode?.type === 'identifier') { return { kind: 'callResult', lhs, callee: funcNode.text }; } if (funcNode?.type === 'member_access_expression') { const expr = funcNode.childForFieldName('expression'); const name = funcNode.childForFieldName('name'); if (expr?.type === 'identifier' && name?.type === 'identifier') { return { kind: 'methodCallResult', lhs, receiver: expr.text, method: name.text }; } } } } } return undefined; }; /** Infer the type of a literal AST node for C# overload disambiguation. */ const inferLiteralType: LiteralTypeInferrer = (node) => { switch (node.type) { case 'integer_literal': if (node.text.endsWith('L') || node.text.endsWith('l')) return 'long'; return 'int'; case 'real_literal': if (node.text.endsWith('f') || node.text.endsWith('F')) return 'float'; if (node.text.endsWith('m') || node.text.endsWith('M')) return 'decimal'; return 'double'; case 'string_literal': case 'verbatim_string_literal': case 'raw_string_literal': case 'interpolated_string_expression': return 'string'; case 'character_literal': return 'char'; case 'boolean_literal': return 'bool'; case 'null_literal': return 'null'; default: return undefined; } }; export const typeConfig: LanguageTypeConfig = { declarationNodeTypes: DECLARATION_NODE_TYPES, forLoopNodeTypes: FOR_LOOP_NODE_TYPES, patternBindingNodeTypes: new Set(['is_pattern_expression', 'declaration_pattern', 'recursive_pattern', 'binary_expression']), extractDeclaration, extractParameter, scanConstructorBinding, extractForLoopBinding, extractPendingAssignment, extractPatternBinding, inferLiteralType, }; ================================================ FILE: gitnexus/src/core/ingestion/type-extractors/go.ts ================================================ import type { SyntaxNode } from '../utils.js'; import type { ConstructorBindingScanner, ForLoopExtractor, LanguageTypeConfig, ParameterExtractor, TypeBindingExtractor, PendingAssignmentExtractor } from './types.js'; import { extractSimpleTypeName, extractVarName, extractElementTypeFromString, extractGenericTypeArgs, findChildByType, resolveIterableElementType, methodToTypeArgPosition, type TypeArgPosition } from './shared.js'; const DECLARATION_NODE_TYPES: ReadonlySet<string> = new Set([ 'var_declaration', 'var_spec', 'short_var_declaration', ]); /** Go: var x Foo */ const extractGoVarDeclaration = (node: SyntaxNode, env: Map<string, string>): void => { // Go var_declaration contains var_spec children if (node.type === 'var_declaration') { for (let i = 0; i < node.namedChildCount; i++) { const spec = node.namedChild(i); if (spec?.type === 'var_spec') extractGoVarDeclaration(spec, env); } return; } // var_spec: name type [= value] const nameNode = node.childForFieldName('name'); const typeNode = node.childForFieldName('type'); if (!nameNode || !typeNode) return; const varName = extractVarName(nameNode); const typeName = extractSimpleTypeName(typeNode); if (varName && typeName) env.set(varName, typeName); }; /** Go: x := Foo{...} — infer type from composite literal (handles multi-assignment) */ const extractGoShortVarDeclaration = (node: SyntaxNode, env: Map<string, string>): void => { const left = node.childForFieldName('left'); const right = node.childForFieldName('right'); if (!left || !right) return; // Collect LHS names and RHS values (may be expression_lists for multi-assignment) const lhsNodes: SyntaxNode[] = []; const rhsNodes: SyntaxNode[] = []; if (left.type === 'expression_list') { for (let i = 0; i < left.namedChildCount; i++) { const c = left.namedChild(i); if (c) lhsNodes.push(c); } } else { lhsNodes.push(left); } if (right.type === 'expression_list') { for (let i = 0; i < right.namedChildCount; i++) { const c = right.namedChild(i); if (c) rhsNodes.push(c); } } else { rhsNodes.push(right); } // Pair each LHS name with its corresponding RHS value const count = Math.min(lhsNodes.length, rhsNodes.length); for (let i = 0; i < count; i++) { let valueNode = rhsNodes[i]; // Unwrap &User{} — unary_expression (address-of) wrapping composite_literal if (valueNode.type === 'unary_expression' && valueNode.firstNamedChild?.type === 'composite_literal') { valueNode = valueNode.firstNamedChild; } // Go built-in new(User) — call_expression with 'new' callee and type argument // Go built-in make([]User, 0) / make(map[string]User) — extract element/value type if (valueNode.type === 'call_expression') { const funcNode = valueNode.childForFieldName('function'); if (funcNode?.text === 'new') { const args = valueNode.childForFieldName('arguments'); if (args?.firstNamedChild) { const typeName = extractSimpleTypeName(args.firstNamedChild); const varName = extractVarName(lhsNodes[i]); if (varName && typeName) env.set(varName, typeName); } } else if (funcNode?.text === 'make') { const args = valueNode.childForFieldName('arguments'); const firstArg = args?.firstNamedChild; if (firstArg) { let innerType: SyntaxNode | null = null; if (firstArg.type === 'slice_type') { innerType = firstArg.childForFieldName('element'); } else if (firstArg.type === 'map_type') { innerType = firstArg.childForFieldName('value'); } if (innerType) { const typeName = extractSimpleTypeName(innerType); const varName = extractVarName(lhsNodes[i]); if (varName && typeName) env.set(varName, typeName); } } } continue; } // Go type assertion: user := iface.(User) — type_assertion_expression with 'type' field if (valueNode.type === 'type_assertion_expression') { const typeNode = valueNode.childForFieldName('type'); if (typeNode) { const typeName = extractSimpleTypeName(typeNode); const varName = extractVarName(lhsNodes[i]); if (varName && typeName) env.set(varName, typeName); } continue; } if (valueNode.type !== 'composite_literal') continue; const typeNode = valueNode.childForFieldName('type'); if (!typeNode) continue; const typeName = extractSimpleTypeName(typeNode); if (!typeName) continue; const varName = extractVarName(lhsNodes[i]); if (varName) env.set(varName, typeName); } }; const extractDeclaration: TypeBindingExtractor = (node: SyntaxNode, env: Map<string, string>): void => { if (node.type === 'var_declaration' || node.type === 'var_spec') { extractGoVarDeclaration(node, env); } else if (node.type === 'short_var_declaration') { extractGoShortVarDeclaration(node, env); } }; /** Go: parameter → name type */ const extractParameter: ParameterExtractor = (node: SyntaxNode, env: Map<string, string>): void => { let nameNode: SyntaxNode | null = null; let typeNode: SyntaxNode | null = null; if (node.type === 'parameter') { nameNode = node.childForFieldName('name'); typeNode = node.childForFieldName('type'); } else { nameNode = node.childForFieldName('name') ?? node.childForFieldName('pattern'); typeNode = node.childForFieldName('type'); } if (!nameNode || !typeNode) return; const varName = extractVarName(nameNode); const typeName = extractSimpleTypeName(typeNode); if (varName && typeName) env.set(varName, typeName); }; /** Go: user := NewUser(...) — infer type from single-assignment call expression */ const scanConstructorBinding: ConstructorBindingScanner = (node) => { if (node.type !== 'short_var_declaration') return undefined; const left = node.childForFieldName('left'); const right = node.childForFieldName('right'); if (!left || !right) return undefined; const leftIds = left.type === 'expression_list' ? left.namedChildren : [left]; const rightExprs = right.type === 'expression_list' ? right.namedChildren : [right]; // Multi-return: user, err := NewUser() — bind first var when second is err/ok/_ if (leftIds.length === 2 && rightExprs.length === 1) { const secondVar = leftIds[1]; const isErrorOrDiscard = secondVar.text === '_' || secondVar.text === 'err' || secondVar.text === 'ok' || secondVar.text === 'error'; if (isErrorOrDiscard && leftIds[0].type === 'identifier') { if (rightExprs[0].type !== 'call_expression') return undefined; const func = rightExprs[0].childForFieldName('function'); if (!func) return undefined; if (func.text === 'new' || func.text === 'make') return undefined; const calleeName = extractSimpleTypeName(func); if (!calleeName) return undefined; return { varName: leftIds[0].text, calleeName }; } } // Single assignment only if (leftIds.length !== 1 || leftIds[0].type !== 'identifier') return undefined; if (rightExprs.length !== 1 || rightExprs[0].type !== 'call_expression') return undefined; const func = rightExprs[0].childForFieldName('function'); if (!func) return undefined; // Skip new() and make() — already handled by extractDeclaration if (func.text === 'new' || func.text === 'make') return undefined; const calleeName = extractSimpleTypeName(func); if (!calleeName) return undefined; return { varName: leftIds[0].text, calleeName }; }; const FOR_LOOP_NODE_TYPES: ReadonlySet<string> = new Set([ 'for_statement', ]); /** Go function/method node types that carry a parameter list. */ const GO_FUNCTION_NODE_TYPES = new Set([ 'function_declaration', 'method_declaration', 'func_literal', ]); /** * Extract element type from a Go type annotation AST node. * Handles: * slice_type "[]User" → element field → type_identifier "User" * array_type "[10]User" → element field → type_identifier "User" * Falls back to text-based extraction via extractElementTypeFromString. */ const extractGoElementTypeFromTypeNode = (typeNode: SyntaxNode, pos: TypeArgPosition = 'last'): string | undefined => { // slice_type: []User — element field is the element type if (typeNode.type === 'slice_type' || typeNode.type === 'array_type') { const elemNode = typeNode.childForFieldName('element'); if (elemNode) return extractSimpleTypeName(elemNode); } // map_type: map[string]User — value field is the element type (for range, second var gets value) if (typeNode.type === 'map_type') { const valueNode = typeNode.childForFieldName('value'); if (valueNode) return extractSimpleTypeName(valueNode); } // channel_type: chan User — the type argument is the element type if (typeNode.type === 'channel_type') { const valueNode = typeNode.childForFieldName('value') ?? typeNode.lastNamedChild; if (valueNode) return extractSimpleTypeName(valueNode); } // generic_type: Go 1.18+ generics (e.g., MySlice[User], Cache[string, User]) // Use position-aware arg selection: 'first' for keys, 'last' for values. if (typeNode.type === 'generic_type') { const args = extractGenericTypeArgs(typeNode); if (args.length >= 1) return pos === 'first' ? args[0] : args[args.length - 1]; } // Fallback: text-based extraction ([]User → User, User[] → User) return extractElementTypeFromString(typeNode.text, pos); }; /** Check if a Go type node represents a channel type. Used to determine * whether single-var range yields the element (channels) vs index (slices/maps). */ const isChannelType = ( iterableName: string, scopeEnv: ReadonlyMap<string, string>, declarationTypeNodes?: ReadonlyMap<string, SyntaxNode>, scope?: string, ): boolean => { if (declarationTypeNodes && scope) { const typeNode = declarationTypeNodes.get(`${scope}\0${iterableName}`); if (typeNode) return typeNode.type === 'channel_type'; } const t = scopeEnv.get(iterableName); return !!t && t.startsWith('chan '); }; /** * Walk up the AST from a for-statement to find the enclosing function declaration, * then search its parameters for one named `iterableName`. * Returns the element type extracted from its type annotation, or undefined. * * Go parameter_declaration has: * name field: identifier (the parameter name) * type field: the type node (slice_type for []User) */ const findGoParamElementType = (iterableName: string, startNode: SyntaxNode, pos: TypeArgPosition = 'last'): string | undefined => { let current: SyntaxNode | null = startNode.parent; while (current) { if (GO_FUNCTION_NODE_TYPES.has(current.type)) { const paramsNode = current.childForFieldName('parameters'); if (paramsNode) { for (let i = 0; i < paramsNode.namedChildCount; i++) { const paramDecl = paramsNode.namedChild(i); if (!paramDecl || paramDecl.type !== 'parameter_declaration') continue; // parameter_declaration: name type — name field is the identifier const nameNode = paramDecl.childForFieldName('name'); if (nameNode?.text === iterableName) { const typeNode = paramDecl.childForFieldName('type'); if (typeNode) return extractGoElementTypeFromTypeNode(typeNode, pos); } } } break; } current = current.parent; } return undefined; }; /** * Go: for _, user := range users where users has a known slice type. * * Go uses a single `for_statement` node for all for-loop forms. We detect * range-based loops by looking for a `range_clause` child node. C-style for * loops (with `for_clause`) and infinite loops (no clause) are ignored. * * Tier 1c: resolves the element type via three strategies in priority order: * 1. declarationTypeNodes — raw type annotation AST node * 2. scopeEnv string — extractElementTypeFromString on the stored type * 3. AST walk — walks up to the enclosing function's parameters to read []User directly * For `_, user := range users`, the loop variable is the second identifier in * the `left` expression_list (index is discarded, value is the element). */ const extractForLoopBinding: ForLoopExtractor = (node, { scopeEnv, declarationTypeNodes, scope, returnTypeLookup }): void => { if (node.type !== 'for_statement') return; // Find the range_clause child — this distinguishes range loops from other for forms. let rangeClause: SyntaxNode | null = null; for (let i = 0; i < node.namedChildCount; i++) { const child = node.namedChild(i); if (child?.type === 'range_clause') { rangeClause = child; break; } } if (!rangeClause) return; // The iterable is the `right` field of the range_clause. const rightNode = rangeClause.childForFieldName('right'); let iterableName: string | undefined; let callExprElementType: string | undefined; if (rightNode?.type === 'identifier') { iterableName = rightNode.text; } else if (rightNode?.type === 'selector_expression') { const field = rightNode.childForFieldName('field'); if (field) iterableName = field.text; } else if (rightNode?.type === 'call_expression') { // Range over a call result: `for _, v := range getItems()` or `for _, v := range repo.All()` const funcNode = rightNode.childForFieldName('function'); let callee: string | undefined; if (funcNode?.type === 'identifier') { callee = funcNode.text; } else if (funcNode?.type === 'selector_expression') { const field = funcNode.childForFieldName('field'); if (field) callee = field.text; } if (callee) { const rawReturn = returnTypeLookup.lookupRawReturnType(callee); if (rawReturn) callExprElementType = extractElementTypeFromString(rawReturn); } } if (!iterableName && !callExprElementType) return; let elementType: string | undefined; if (callExprElementType) { elementType = callExprElementType; } else { const containerTypeName = scopeEnv.get(iterableName!); const typeArgPos = methodToTypeArgPosition(undefined, containerTypeName); elementType = resolveIterableElementType( iterableName!, node, scopeEnv, declarationTypeNodes, scope, extractGoElementTypeFromTypeNode, findGoParamElementType, typeArgPos, ); } if (!elementType) return; // The loop variable(s) are in the `left` field. // Go range semantics: // Slice/Array/String: single-var → INDEX (int); two-var → (index, element) // Map: single-var → KEY; two-var → (key, value) // Channel: single-var → ELEMENT (channels have no index) const leftNode = rangeClause.childForFieldName('left'); if (!leftNode) return; let loopVarNode: SyntaxNode | null = null; if (leftNode.type === 'expression_list') { if (leftNode.namedChildCount >= 2) { // Two-var form: `_, user` or `i, user` — second variable gets element/value type loopVarNode = leftNode.namedChild(1); } else { // Single-var in expression_list — yields INDEX for slices/maps, ELEMENT for channels. // For call-expression iterables (iterableName undefined), conservative: treat as non-channel. // Channels are rarely returned from function calls, and even if they were, skipping here // just means we miss a binding rather than create an incorrect one. if (iterableName && isChannelType(iterableName, scopeEnv, declarationTypeNodes, scope)) { loopVarNode = leftNode.namedChild(0); } else { return; // index-only range on slice/map — skip } } } else { // Plain identifier (single-var form without expression_list) // For call-expression iterables (iterableName undefined), conservative: treat as non-channel. // Channels are rarely returned from function calls, and even if they were, skipping here // just means we miss a binding rather than create an incorrect one. if (iterableName && isChannelType(iterableName, scopeEnv, declarationTypeNodes, scope)) { loopVarNode = leftNode; } else { return; // index-only range on slice/map — skip } } if (!loopVarNode) return; // Skip the blank identifier `_` if (loopVarNode.text === '_') return; const loopVarName = extractVarName(loopVarNode); if (loopVarName) scopeEnv.set(loopVarName, elementType); }; /** Go: alias := u (short_var_declaration) or var b = u (var_spec) */ const extractPendingAssignment: PendingAssignmentExtractor = (node, scopeEnv) => { if (node.type === 'short_var_declaration') { const left = node.childForFieldName('left'); const right = node.childForFieldName('right'); if (!left || !right) return undefined; const lhsNode = left.type === 'expression_list' ? left.firstNamedChild : left; const rhsNode = right.type === 'expression_list' ? right.firstNamedChild : right; if (!lhsNode || !rhsNode) return undefined; if (lhsNode.type !== 'identifier') return undefined; const lhs = lhsNode.text; if (scopeEnv.has(lhs)) return undefined; if (rhsNode.type === 'identifier') return { kind: 'copy', lhs, rhs: rhsNode.text }; // selector_expression RHS → fieldAccess (a.field) if (rhsNode.type === 'selector_expression') { const operand = rhsNode.childForFieldName('operand'); const field = rhsNode.childForFieldName('field'); if (operand?.type === 'identifier' && field) { return { kind: 'fieldAccess', lhs, receiver: operand.text, field: field.text }; } } // call_expression RHS if (rhsNode.type === 'call_expression') { const funcNode = rhsNode.childForFieldName('function'); if (funcNode?.type === 'identifier') { return { kind: 'callResult', lhs, callee: funcNode.text }; } // method call with receiver: call_expression → function: selector_expression if (funcNode?.type === 'selector_expression') { const operand = funcNode.childForFieldName('operand'); const field = funcNode.childForFieldName('field'); if (operand?.type === 'identifier' && field) { return { kind: 'methodCallResult', lhs, receiver: operand.text, method: field.text }; } } } return undefined; } if (node.type === 'var_spec' || node.type === 'var_declaration') { // var_declaration contains var_spec children; var_spec has name + expression_list value const specs: SyntaxNode[] = []; if (node.type === 'var_declaration') { for (let i = 0; i < node.namedChildCount; i++) { const c = node.namedChild(i); if (c?.type === 'var_spec') specs.push(c); } } else { specs.push(node); } for (const spec of specs) { const nameNode = spec.childForFieldName('name'); if (!nameNode || nameNode.type !== 'identifier') continue; const lhs = nameNode.text; if (scopeEnv.has(lhs)) continue; // Check if the last named child is a bare identifier (no type annotation between name and value) let exprList: SyntaxNode | null = null; for (let i = 0; i < spec.childCount; i++) { if (spec.child(i)?.type === 'expression_list') { exprList = spec.child(i); break; } } const rhsNode = exprList?.firstNamedChild; if (rhsNode?.type === 'identifier') return { kind: 'copy', lhs, rhs: rhsNode.text }; // selector_expression RHS → fieldAccess if (rhsNode?.type === 'selector_expression') { const operand = rhsNode.childForFieldName('operand'); const field = rhsNode.childForFieldName('field'); if (operand?.type === 'identifier' && field) { return { kind: 'fieldAccess', lhs, receiver: operand.text, field: field.text }; } } // call_expression RHS if (rhsNode?.type === 'call_expression') { const funcNode = rhsNode.childForFieldName('function'); if (funcNode?.type === 'identifier') { return { kind: 'callResult', lhs, callee: funcNode.text }; } if (funcNode?.type === 'selector_expression') { const operand = funcNode.childForFieldName('operand'); const field = funcNode.childForFieldName('field'); if (operand?.type === 'identifier' && field) { return { kind: 'methodCallResult', lhs, receiver: operand.text, method: field.text }; } } } } } return undefined; }; export const typeConfig: LanguageTypeConfig = { declarationNodeTypes: DECLARATION_NODE_TYPES, forLoopNodeTypes: FOR_LOOP_NODE_TYPES, extractDeclaration, extractParameter, scanConstructorBinding, extractForLoopBinding, extractPendingAssignment, }; ================================================ FILE: gitnexus/src/core/ingestion/type-extractors/index.ts ================================================ /** * Per-language type extraction configurations. * Assembled here into a dispatch map keyed by SupportedLanguages. */ import { SupportedLanguages } from '../../../config/supported-languages.js'; import type { LanguageTypeConfig } from './types.js'; import { typeConfig as typescriptConfig } from './typescript.js'; import { javaTypeConfig, kotlinTypeConfig } from './jvm.js'; import { typeConfig as csharpConfig } from './csharp.js'; import { typeConfig as goConfig } from './go.js'; import { typeConfig as rustConfig } from './rust.js'; import { typeConfig as pythonConfig } from './python.js'; import { typeConfig as swiftConfig } from './swift.js'; import { typeConfig as cCppConfig } from './c-cpp.js'; import { typeConfig as phpConfig } from './php.js'; import { typeConfig as rubyConfig } from './ruby.js'; export const typeConfigs = { [SupportedLanguages.JavaScript]: typescriptConfig, [SupportedLanguages.TypeScript]: typescriptConfig, [SupportedLanguages.Java]: javaTypeConfig, [SupportedLanguages.Kotlin]: kotlinTypeConfig, [SupportedLanguages.CSharp]: csharpConfig, [SupportedLanguages.Go]: goConfig, [SupportedLanguages.Rust]: rustConfig, [SupportedLanguages.Python]: pythonConfig, [SupportedLanguages.Swift]: swiftConfig, [SupportedLanguages.C]: cCppConfig, [SupportedLanguages.CPlusPlus]: cCppConfig, [SupportedLanguages.PHP]: phpConfig, [SupportedLanguages.Ruby]: rubyConfig, } satisfies Record<SupportedLanguages, LanguageTypeConfig>; export type { LanguageTypeConfig, TypeBindingExtractor, ParameterExtractor, ConstructorBindingScanner, ForLoopExtractor, PendingAssignmentExtractor, PatternBindingExtractor, } from './types.js'; export { TYPED_PARAMETER_TYPES, extractSimpleTypeName, extractGenericTypeArgs, extractVarName, findChildByType, extractRubyConstructorAssignment } from './shared.js'; ================================================ FILE: gitnexus/src/core/ingestion/type-extractors/jvm.ts ================================================ import type { SyntaxNode } from '../utils.js'; import type { LanguageTypeConfig, ParameterExtractor, TypeBindingExtractor, InitializerExtractor, ClassNameLookup, ConstructorBindingScanner, ForLoopExtractor, PendingAssignmentExtractor, PatternBindingExtractor, LiteralTypeInferrer, ConstructorTypeDetector } from './types.js'; import { extractSimpleTypeName, extractVarName, findChildByType, extractGenericTypeArgs, resolveIterableElementType, methodToTypeArgPosition, extractElementTypeFromString, type TypeArgPosition } from './shared.js'; // ── Java ────────────────────────────────────────────────────────────────── const JAVA_DECLARATION_NODE_TYPES: ReadonlySet<string> = new Set([ 'local_variable_declaration', 'field_declaration', ]); /** Java: Type x = ...; Type x; */ const extractJavaDeclaration: TypeBindingExtractor = (node: SyntaxNode, env: Map<string, string>): void => { const typeNode = node.childForFieldName('type'); if (!typeNode) return; const typeName = extractSimpleTypeName(typeNode); if (!typeName || typeName === 'var') return; // skip Java 10 var — handled by extractInitializer // Find variable_declarator children for (let i = 0; i < node.namedChildCount; i++) { const child = node.namedChild(i); if (child?.type !== 'variable_declarator') continue; const nameNode = child.childForFieldName('name'); if (nameNode) { const varName = extractVarName(nameNode); if (varName) env.set(varName, typeName); } } }; /** Java 10+: var x = new User() — infer type from object_creation_expression */ const extractJavaInitializer: InitializerExtractor = (node: SyntaxNode, env: Map<string, string>, _classNames: ClassNameLookup): void => { for (let i = 0; i < node.namedChildCount; i++) { const child = node.namedChild(i); if (child?.type !== 'variable_declarator') continue; const nameNode = child.childForFieldName('name'); const valueNode = child.childForFieldName('value'); if (!nameNode || !valueNode) continue; // Skip declarators that already have a binding from extractDeclaration const varName = extractVarName(nameNode); if (!varName || env.has(varName)) continue; if (valueNode.type !== 'object_creation_expression') continue; const ctorType = valueNode.childForFieldName('type'); if (!ctorType) continue; const typeName = extractSimpleTypeName(ctorType); if (typeName) env.set(varName, typeName); } }; /** Java: formal_parameter → type name */ const extractJavaParameter: ParameterExtractor = (node: SyntaxNode, env: Map<string, string>): void => { let nameNode: SyntaxNode | null = null; let typeNode: SyntaxNode | null = null; if (node.type === 'formal_parameter') { typeNode = node.childForFieldName('type'); nameNode = node.childForFieldName('name'); } else { // Generic fallback nameNode = node.childForFieldName('name') ?? node.childForFieldName('pattern'); typeNode = node.childForFieldName('type'); } if (!nameNode || !typeNode) return; const varName = extractVarName(nameNode); const typeName = extractSimpleTypeName(typeNode); if (varName && typeName) env.set(varName, typeName); }; /** Java: var x = SomeFactory.create() — constructor binding for `var` with method_invocation */ const scanJavaConstructorBinding: ConstructorBindingScanner = (node) => { if (node.type !== 'local_variable_declaration') return undefined; const typeNode = node.childForFieldName('type'); if (!typeNode) return undefined; if (typeNode.text !== 'var') return undefined; const declarator = findChildByType(node, 'variable_declarator'); if (!declarator) return undefined; const nameNode = declarator.childForFieldName('name'); const value = declarator.childForFieldName('value'); if (!nameNode || !value) return undefined; if (value.type === 'object_creation_expression') return undefined; if (value.type !== 'method_invocation') return undefined; const methodName = value.childForFieldName('name'); if (!methodName) return undefined; return { varName: nameNode.text, calleeName: methodName.text }; }; const JAVA_FOR_LOOP_NODE_TYPES: ReadonlySet<string> = new Set([ 'enhanced_for_statement', ]); /** Extract element type from a Java type annotation AST node. * Handles generic_type (List<User>), array_type (User[]). */ const extractJavaElementTypeFromTypeNode = (typeNode: SyntaxNode, pos: TypeArgPosition = 'last'): string | undefined => { if (typeNode.type === 'generic_type') { const args = extractGenericTypeArgs(typeNode); if (args.length >= 1) return pos === 'first' ? args[0] : args[args.length - 1]; } if (typeNode.type === 'array_type') { const elemNode = typeNode.firstNamedChild; if (elemNode) return extractSimpleTypeName(elemNode); } return undefined; }; /** Walk up from a for-each to the enclosing method_declaration and search parameters. */ const findJavaParamElementType = (iterableName: string, startNode: SyntaxNode, pos: TypeArgPosition = 'last'): string | undefined => { let current: SyntaxNode | null = startNode.parent; while (current) { if (current.type === 'method_declaration' || current.type === 'constructor_declaration') { const paramsNode = current.childForFieldName('parameters'); if (paramsNode) { for (let i = 0; i < paramsNode.namedChildCount; i++) { const param = paramsNode.namedChild(i); if (!param || param.type !== 'formal_parameter') continue; const nameNode = param.childForFieldName('name'); if (nameNode?.text !== iterableName) continue; const typeNode = param.childForFieldName('type'); if (typeNode) return extractJavaElementTypeFromTypeNode(typeNode, pos); } } break; } current = current.parent; } return undefined; }; /** Java: for (User user : users) — extract loop variable binding. * Tier 1c: for `for (var user : users)`, resolves element type from iterable. */ const extractJavaForLoopBinding: ForLoopExtractor = (node, { scopeEnv, declarationTypeNodes, scope, returnTypeLookup }): void => { const typeNode = node.childForFieldName('type'); const nameNode = node.childForFieldName('name'); if (!typeNode || !nameNode) return; const varName = extractVarName(nameNode); if (!varName) return; // Explicit type (existing behavior): for (User user : users) const typeName = extractSimpleTypeName(typeNode); if (typeName && typeName !== 'var') { scopeEnv.set(varName, typeName); return; } // Tier 1c: var — resolve from iterable's container type const iterableNode = node.childForFieldName('value'); if (!iterableNode) return; let iterableName: string | undefined; let methodName: string | undefined; let callExprElementType: string | undefined; if (iterableNode.type === 'identifier') { iterableName = iterableNode.text; } else if (iterableNode.type === 'field_access') { const field = iterableNode.childForFieldName('field'); if (field) iterableName = field.text; } else if (iterableNode.type === 'method_invocation') { // data.keySet() → method_invocation > object: identifier + name: identifier // Also handles this.data.values() → object is field_access, extract inner field name const obj = iterableNode.childForFieldName('object'); const name = iterableNode.childForFieldName('name'); if (obj?.type === 'identifier') { iterableName = obj.text; } else if (obj?.type === 'field_access') { const innerField = obj.childForFieldName('field'); if (innerField) iterableName = innerField.text; } else if (!obj && name) { // Direct function call: for (var u : getUsers()) — no receiver object const rawReturn = returnTypeLookup.lookupRawReturnType(name.text); if (rawReturn) callExprElementType = extractElementTypeFromString(rawReturn); } if (name) methodName = name.text; } if (!iterableName && !callExprElementType) return; let elementType: string | undefined; if (callExprElementType) { elementType = callExprElementType; } else { const containerTypeName = scopeEnv.get(iterableName!); const typeArgPos = methodToTypeArgPosition(methodName, containerTypeName); elementType = resolveIterableElementType( iterableName!, node, scopeEnv, declarationTypeNodes, scope, extractJavaElementTypeFromTypeNode, findJavaParamElementType, typeArgPos, ); } if (elementType) scopeEnv.set(varName, elementType); }; /** Java: var alias = u → local_variable_declaration > variable_declarator with name/value */ const extractJavaPendingAssignment: PendingAssignmentExtractor = (node, scopeEnv) => { for (let i = 0; i < node.namedChildCount; i++) { const child = node.namedChild(i); if (!child || child.type !== 'variable_declarator') continue; const nameNode = child.childForFieldName('name'); const valueNode = child.childForFieldName('value'); if (!nameNode || !valueNode) continue; const lhs = nameNode.text; if (scopeEnv.has(lhs)) continue; if (valueNode.type === 'identifier' || valueNode.type === 'simple_identifier') return { kind: 'copy', lhs, rhs: valueNode.text }; // field_access RHS → fieldAccess (a.field) if (valueNode.type === 'field_access') { const obj = valueNode.childForFieldName('object'); const field = valueNode.childForFieldName('field'); if (obj?.type === 'identifier' && field) { return { kind: 'fieldAccess', lhs, receiver: obj.text, field: field.text }; } } // method_invocation RHS if (valueNode.type === 'method_invocation') { const objField = valueNode.childForFieldName('object'); if (!objField) { // No receiver → callResult const nameField = valueNode.childForFieldName('name'); if (nameField?.type === 'identifier') { return { kind: 'callResult', lhs, callee: nameField.text }; } } else if (objField.type === 'identifier') { // With receiver → methodCallResult const nameField = valueNode.childForFieldName('name'); if (nameField?.type === 'identifier') { return { kind: 'methodCallResult', lhs, receiver: objField.text, method: nameField.text }; } } } } return undefined; }; /** * Java 16+ `instanceof` pattern variable: `x instanceof User user` * * AST structure: * instanceof_expression * left: expression (the variable being tested) * instanceof keyword * right: type (the type to test against) * name: identifier (the pattern variable — optional, Java 16+) * * Conservative: returns undefined when the `name` field is absent (plain instanceof * without pattern variable, e.g. `x instanceof User`) or when the type cannot be * extracted. The source variable's existing type is NOT used — the pattern explicitly * declares the new type, so no scopeEnv lookup is needed. */ const extractJavaPatternBinding: PatternBindingExtractor = (node) => { if (node.type === 'type_pattern') { // Java 17+ switch pattern: case User u -> ... // type_pattern has positional children (NO named fields): // namedChild(0) = type (type_identifier, e.g., User) // namedChild(1) = identifier (e.g., u) const typeNode = node.namedChild(0); const nameNode = node.namedChild(1); if (!typeNode || !nameNode) return undefined; const typeName = extractSimpleTypeName(typeNode); const varName = extractVarName(nameNode); if (!typeName || !varName) return undefined; return { varName, typeName }; } if (node.type !== 'instanceof_expression') return undefined; const nameNode = node.childForFieldName('name'); if (!nameNode) return undefined; const typeNode = node.childForFieldName('right'); if (!typeNode) return undefined; const typeName = extractSimpleTypeName(typeNode); const varName = extractVarName(nameNode); if (!typeName || !varName) return undefined; return { varName, typeName }; }; /** Infer the type of a literal AST node for Java/Kotlin overload disambiguation. */ const inferJvmLiteralType: LiteralTypeInferrer = (node) => { switch (node.type) { case 'decimal_integer_literal': case 'integer_literal': case 'hex_integer_literal': case 'octal_integer_literal': case 'binary_integer_literal': // Check for long suffix if (node.text.endsWith('L') || node.text.endsWith('l')) return 'long'; return 'int'; case 'decimal_floating_point_literal': case 'real_literal': if (node.text.endsWith('f') || node.text.endsWith('F')) return 'float'; return 'double'; case 'string_literal': case 'line_string_literal': case 'multi_line_string_literal': return 'String'; case 'character_literal': return 'char'; case 'true': case 'false': case 'boolean_literal': return 'boolean'; case 'null_literal': return 'null'; default: return undefined; } }; export const javaTypeConfig: LanguageTypeConfig = { declarationNodeTypes: JAVA_DECLARATION_NODE_TYPES, forLoopNodeTypes: JAVA_FOR_LOOP_NODE_TYPES, patternBindingNodeTypes: new Set(['instanceof_expression', 'type_pattern']), extractDeclaration: extractJavaDeclaration, extractParameter: extractJavaParameter, extractInitializer: extractJavaInitializer, scanConstructorBinding: scanJavaConstructorBinding, extractForLoopBinding: extractJavaForLoopBinding, extractPendingAssignment: extractJavaPendingAssignment, extractPatternBinding: extractJavaPatternBinding, inferLiteralType: inferJvmLiteralType, }; // ── Kotlin ──────────────────────────────────────────────────────────────── const KOTLIN_DECLARATION_NODE_TYPES: ReadonlySet<string> = new Set([ 'property_declaration', 'variable_declaration', ]); /** Kotlin: val x: Foo = ... */ const extractKotlinDeclaration: TypeBindingExtractor = (node: SyntaxNode, env: Map<string, string>): void => { if (node.type === 'property_declaration') { // Kotlin property_declaration: name/type are inside a variable_declaration child const varDecl = findChildByType(node, 'variable_declaration'); if (varDecl) { const nameNode = findChildByType(varDecl, 'simple_identifier'); const typeNode = findChildByType(varDecl, 'user_type') ?? findChildByType(varDecl, 'nullable_type'); if (!nameNode || !typeNode) return; const varName = extractVarName(nameNode); const typeName = extractSimpleTypeName(typeNode); if (varName && typeName) env.set(varName, typeName); return; } // Fallback: try direct fields const nameNode = node.childForFieldName('name') ?? findChildByType(node, 'simple_identifier'); const typeNode = node.childForFieldName('type') ?? findChildByType(node, 'user_type'); if (!nameNode || !typeNode) return; const varName = extractVarName(nameNode); const typeName = extractSimpleTypeName(typeNode); if (varName && typeName) env.set(varName, typeName); } else if (node.type === 'variable_declaration') { // variable_declaration directly inside functions const nameNode = findChildByType(node, 'simple_identifier'); const typeNode = findChildByType(node, 'user_type'); if (nameNode && typeNode) { const varName = extractVarName(nameNode); const typeName = extractSimpleTypeName(typeNode); if (varName && typeName) env.set(varName, typeName); } } }; /** Kotlin: parameter / formal_parameter → type name. * Kotlin's tree-sitter grammar uses positional children (simple_identifier, user_type) * rather than named fields (name, type) on `parameter` nodes, so we fall back to * findChildByType when childForFieldName returns null. */ const extractKotlinParameter: ParameterExtractor = (node: SyntaxNode, env: Map<string, string>): void => { let nameNode: SyntaxNode | null = null; let typeNode: SyntaxNode | null = null; if (node.type === 'formal_parameter') { typeNode = node.childForFieldName('type'); nameNode = node.childForFieldName('name'); } else { nameNode = node.childForFieldName('name') ?? node.childForFieldName('pattern'); typeNode = node.childForFieldName('type'); } // Fallback: Kotlin `parameter` nodes use positional children, not named fields if (!nameNode) nameNode = findChildByType(node, 'simple_identifier'); if (!typeNode) typeNode = findChildByType(node, 'user_type') ?? findChildByType(node, 'nullable_type'); if (!nameNode || !typeNode) return; const varName = extractVarName(nameNode); const typeName = extractSimpleTypeName(typeNode); if (varName && typeName) env.set(varName, typeName); }; /** Find the constructor callee name in a Kotlin property_declaration's initializer. * Returns the class name if the callee is a verified class constructor, undefined otherwise. */ const findKotlinConstructorCallee = (node: SyntaxNode, classNames: ClassNameLookup): string | undefined => { if (node.type !== 'property_declaration') return undefined; const value = node.childForFieldName('value') ?? findChildByType(node, 'call_expression'); if (!value || value.type !== 'call_expression') return undefined; const callee = value.firstNamedChild; if (!callee || callee.type !== 'simple_identifier') return undefined; const calleeName = callee.text; if (!calleeName || !classNames.has(calleeName)) return undefined; return calleeName; }; /** Kotlin: val user = User() — infer type from call_expression when callee is a known class. * Kotlin constructors are syntactically identical to function calls, so we verify * against classNames (which may include cross-file SymbolTable lookups). */ const extractKotlinInitializer: InitializerExtractor = (node: SyntaxNode, env: Map<string, string>, classNames: ClassNameLookup): void => { // Skip if there's an explicit type annotation — Tier 0 already handled it const varDecl = findChildByType(node, 'variable_declaration'); if (varDecl && findChildByType(varDecl, 'user_type')) return; const calleeName = findKotlinConstructorCallee(node, classNames); if (!calleeName) return; // Extract the variable name from the variable_declaration inside property_declaration const nameNode = varDecl ? findChildByType(varDecl, 'simple_identifier') : findChildByType(node, 'simple_identifier'); if (!nameNode) return; const varName = extractVarName(nameNode); if (varName) env.set(varName, calleeName); }; /** Kotlin: detect constructor type from call_expression in typed declarations. * Unlike extractKotlinInitializer (which SKIPS typed declarations), this detects * the constructor type EVEN when a type annotation exists, enabling virtual dispatch * for patterns like `val a: Animal = Dog()`. */ const detectKotlinConstructorType: ConstructorTypeDetector = (node, classNames) => { return findKotlinConstructorCallee(node, classNames); }; /** Kotlin: val x = User(...) — constructor binding for property_declaration with call_expression */ const scanKotlinConstructorBinding: ConstructorBindingScanner = (node) => { if (node.type !== 'property_declaration') return undefined; const varDecl = findChildByType(node, 'variable_declaration'); if (!varDecl) return undefined; if (findChildByType(varDecl, 'user_type')) return undefined; const callExpr = findChildByType(node, 'call_expression'); if (!callExpr) return undefined; const callee = callExpr.firstNamedChild; if (!callee) return undefined; let calleeName: string | undefined; if (callee.type === 'simple_identifier') { calleeName = callee.text; } else if (callee.type === 'navigation_expression') { // Extract method name from qualified call: service.getUser() → getUser const suffix = callee.lastNamedChild; if (suffix?.type === 'navigation_suffix') { const methodName = suffix.lastNamedChild; if (methodName?.type === 'simple_identifier') { calleeName = methodName.text; } } } if (!calleeName) return undefined; const nameNode = findChildByType(varDecl, 'simple_identifier'); if (!nameNode) return undefined; return { varName: nameNode.text, calleeName }; }; const KOTLIN_FOR_LOOP_NODE_TYPES: ReadonlySet<string> = new Set([ 'for_statement', ]); /** Extract element type from a Kotlin type annotation AST node (user_type wrapping generic). * Kotlin: user_type → [type_identifier, type_arguments → [type_projection → user_type]] * Handles the type_projection wrapper that Kotlin uses for generic type arguments. */ const extractKotlinElementTypeFromTypeNode = (typeNode: SyntaxNode, pos: TypeArgPosition = 'last'): string | undefined => { if (typeNode.type === 'user_type') { const argsNode = findChildByType(typeNode, 'type_arguments'); if (argsNode && argsNode.namedChildCount >= 1) { const targetArg = pos === 'first' ? argsNode.namedChild(0) : argsNode.namedChild(argsNode.namedChildCount - 1); if (!targetArg) return undefined; // Kotlin wraps type args in type_projection — unwrap to get the inner type const inner = targetArg.type === 'type_projection' ? targetArg.firstNamedChild : targetArg; if (inner) return extractSimpleTypeName(inner); } } return undefined; }; /** Walk up from a for-loop to the enclosing function_declaration and search parameters. * Kotlin parameters use positional children (simple_identifier, user_type), not named fields. */ const findKotlinParamElementType = (iterableName: string, startNode: SyntaxNode, pos: TypeArgPosition = 'last'): string | undefined => { let current: SyntaxNode | null = startNode.parent; while (current) { if (current.type === 'function_declaration') { const paramsNode = findChildByType(current, 'function_value_parameters'); if (paramsNode) { for (let i = 0; i < paramsNode.namedChildCount; i++) { const param = paramsNode.namedChild(i); if (!param || param.type !== 'parameter') continue; const nameNode = findChildByType(param, 'simple_identifier'); if (nameNode?.text !== iterableName) continue; const typeNode = findChildByType(param, 'user_type'); if (typeNode) return extractKotlinElementTypeFromTypeNode(typeNode, pos); } } break; } current = current.parent; } return undefined; }; /** Kotlin: for (user: User in users) — extract loop variable binding. * Tier 1c: for `for (user in users)` without annotation, resolves from iterable. */ const extractKotlinForLoopBinding: ForLoopExtractor = (node, ctx): void => { const { scopeEnv, declarationTypeNodes, scope, returnTypeLookup } = ctx; const varDecl = findChildByType(node, 'variable_declaration'); if (!varDecl) return; const nameNode = findChildByType(varDecl, 'simple_identifier'); if (!nameNode) return; const varName = extractVarName(nameNode); if (!varName) return; // Explicit type annotation (existing behavior): for (user: User in users) const typeNode = findChildByType(varDecl, 'user_type'); if (typeNode) { const typeName = extractSimpleTypeName(typeNode); if (typeName) scopeEnv.set(varName, typeName); return; } // Tier 1c: no annotation — resolve from iterable's container type // Kotlin for-loop children: [variable_declaration, iterable_expr, control_structure_body] // The iterable is the second named child of the for_statement (after variable_declaration) let iterableName: string | undefined; let methodName: string | undefined; let fallbackIterableName: string | undefined; let callExprElementType: string | undefined; let foundVarDecl = false; for (let i = 0; i < node.namedChildCount; i++) { const child = node.namedChild(i); if (child === varDecl) { foundVarDecl = true; continue; } if (!foundVarDecl || !child) continue; if (child.type === 'simple_identifier') { iterableName = child.text; break; } if (child.type === 'navigation_expression') { // data.keys → navigation_expression > simple_identifier(data) + navigation_suffix > simple_identifier(keys) const obj = child.firstNamedChild; const suffix = findChildByType(child, 'navigation_suffix'); const prop = suffix ? findChildByType(suffix, 'simple_identifier') : null; const hasCallSuffix = suffix ? findChildByType(suffix, 'call_suffix') !== null : false; // Always try object as iterable + property as method first (handles data.values, data.keys). // For bare property access without call_suffix, also save property as fallback // (handles this.users, repo.items where the property IS the iterable). if (obj?.type === 'simple_identifier') iterableName = obj.text; if (prop) methodName = prop.text; if (!hasCallSuffix && prop) { fallbackIterableName = prop.text; } break; } if (child.type === 'call_expression') { // data.values() → call_expression > navigation_expression > simple_identifier + navigation_suffix const callee = child.firstNamedChild; if (callee?.type === 'navigation_expression') { const obj = callee.firstNamedChild; if (obj?.type === 'simple_identifier') iterableName = obj.text; const suffix = findChildByType(callee, 'navigation_suffix'); if (suffix) { const prop = findChildByType(suffix, 'simple_identifier'); if (prop) methodName = prop.text; } } else if (callee?.type === 'simple_identifier') { // Direct function call: for (u in getUsers()) const rawReturn = returnTypeLookup.lookupRawReturnType(callee.text); if (rawReturn) callExprElementType = extractElementTypeFromString(rawReturn); } break; } } if (!iterableName && !callExprElementType) return; let elementType: string | undefined; if (callExprElementType) { elementType = callExprElementType; } else { let containerTypeName = scopeEnv.get(iterableName!); // Fallback: if object has no type in scope, try the property as the iterable name. // Handles patterns like this.users where the property itself is the iterable variable. if (!containerTypeName && fallbackIterableName) { iterableName = fallbackIterableName; methodName = undefined; containerTypeName = scopeEnv.get(iterableName); } const typeArgPos = methodToTypeArgPosition(methodName, containerTypeName); elementType = resolveIterableElementType( iterableName!, node, scopeEnv, declarationTypeNodes, scope, extractKotlinElementTypeFromTypeNode, findKotlinParamElementType, typeArgPos, ); } if (elementType) scopeEnv.set(varName, elementType); }; /** Kotlin: val alias = u → property_declaration or variable_declaration. * property_declaration has: binding_pattern_kind("val"), variable_declaration("alias"), * "=", and the RHS value (simple_identifier "u"). * variable_declaration appears directly inside functions and has simple_identifier children. */ const extractKotlinPendingAssignment: PendingAssignmentExtractor = (node, scopeEnv) => { if (node.type === 'property_declaration') { // Find the variable name from variable_declaration child const varDecl = findChildByType(node, 'variable_declaration'); if (!varDecl) return undefined; const nameNode = varDecl.firstNamedChild; if (!nameNode || nameNode.type !== 'simple_identifier') return undefined; const lhs = nameNode.text; if (scopeEnv.has(lhs)) return undefined; // Find the RHS after the "=" token let foundEq = false; for (let i = 0; i < node.childCount; i++) { const child = node.child(i); if (!child) continue; if (child.type === '=') { foundEq = true; continue; } if (foundEq && child.type === 'simple_identifier') { return { kind: 'copy', lhs, rhs: child.text }; } // navigation_expression RHS → fieldAccess (a.field) if (foundEq && child.type === 'navigation_expression') { const recv = child.firstNamedChild; const suffix = child.lastNamedChild; const fieldNode = suffix?.type === 'navigation_suffix' ? suffix.lastNamedChild : suffix; if (recv?.type === 'simple_identifier' && fieldNode?.type === 'simple_identifier') { return { kind: 'fieldAccess', lhs, receiver: recv.text, field: fieldNode.text }; } } // call_expression RHS if (foundEq && child.type === 'call_expression') { const calleeNode = child.firstNamedChild; if (calleeNode?.type === 'simple_identifier') { return { kind: 'callResult', lhs, callee: calleeNode.text }; } // navigation_expression callee → methodCallResult (a.method()) if (calleeNode?.type === 'navigation_expression') { const recv = calleeNode.firstNamedChild; const suffix = calleeNode.lastNamedChild; const methodNode = suffix?.type === 'navigation_suffix' ? suffix.lastNamedChild : suffix; if (recv?.type === 'simple_identifier' && methodNode?.type === 'simple_identifier') { return { kind: 'methodCallResult', lhs, receiver: recv.text, method: methodNode.text }; } } } } return undefined; } if (node.type === 'variable_declaration') { // variable_declaration directly inside functions: simple_identifier children const nameNode = findChildByType(node, 'simple_identifier'); if (!nameNode) return undefined; const lhs = nameNode.text; if (scopeEnv.has(lhs)) return undefined; // Look for RHS after "=" in the parent (property_declaration) const parent = node.parent; if (!parent) return undefined; let foundEq = false; for (let i = 0; i < parent.childCount; i++) { const child = parent.child(i); if (!child) continue; if (child.type === '=') { foundEq = true; continue; } if (foundEq && child.type === 'simple_identifier') { return { kind: 'copy', lhs, rhs: child.text }; } if (foundEq && child.type === 'navigation_expression') { const recv = child.firstNamedChild; const suffix = child.lastNamedChild; const fieldNode = suffix?.type === 'navigation_suffix' ? suffix.lastNamedChild : suffix; if (recv?.type === 'simple_identifier' && fieldNode?.type === 'simple_identifier') { return { kind: 'fieldAccess', lhs, receiver: recv.text, field: fieldNode.text }; } } if (foundEq && child.type === 'call_expression') { const calleeNode = child.firstNamedChild; if (calleeNode?.type === 'simple_identifier') { return { kind: 'callResult', lhs, callee: calleeNode.text }; } if (calleeNode?.type === 'navigation_expression') { const recv = calleeNode.firstNamedChild; const suffix = calleeNode.lastNamedChild; const methodNode = suffix?.type === 'navigation_suffix' ? suffix.lastNamedChild : suffix; if (recv?.type === 'simple_identifier' && methodNode?.type === 'simple_identifier') { return { kind: 'methodCallResult', lhs, receiver: recv.text, method: methodNode.text }; } } } } return undefined; } return undefined; }; /** Walk up from a node to find an ancestor of a given type. */ const findAncestorByType = (node: SyntaxNode, type: string): SyntaxNode | undefined => { let current = node.parent; while (current) { if (current.type === type) return current; current = current.parent; } return undefined; }; const extractKotlinPatternBinding: PatternBindingExtractor = (node, scopeEnv, declarationTypeNodes, scope) => { // Kotlin when/is smart casts (existing behavior) if (node.type === 'type_test') { const typeNode = node.lastNamedChild; if (!typeNode) return undefined; const typeName = extractSimpleTypeName(typeNode); if (!typeName) return undefined; const whenExpr = findAncestorByType(node, 'when_expression'); if (!whenExpr) return undefined; const whenSubject = whenExpr.namedChild(0); const subject = whenSubject?.firstNamedChild ?? whenSubject; if (!subject) return undefined; const varName = extractVarName(subject); if (!varName) return undefined; return { varName, typeName }; } // Null-check narrowing: if (x != null) { ... } // Kotlin AST: equality_expression > simple_identifier, "!=" [anon], "null" [anon] // Note: `null` is an anonymous node in tree-sitter-kotlin, not `null_literal`. if (node.type === 'equality_expression') { const op = node.children.find(c => !c.isNamed && c.text === '!='); if (!op) return undefined; // `null` is anonymous in Kotlin grammar — use positional child scan let varNode: SyntaxNode | undefined; let hasNull = false; for (let i = 0; i < node.childCount; i++) { const c = node.child(i); if (!c) continue; if (c.type === 'simple_identifier') varNode = c; if (!c.isNamed && c.text === 'null') hasNull = true; } if (!varNode || !hasNull) return undefined; const varName = varNode.text; const resolvedType = scopeEnv.get(varName); if (!resolvedType) return undefined; // Check if the original declaration type was nullable (ends with ?) const declTypeNode = declarationTypeNodes.get(`${scope}\0${varName}`); if (!declTypeNode) return undefined; const declText = declTypeNode.text; if (!declText.includes('?') && !declText.includes('null')) return undefined; // Find the if-body: walk up to if_expression, then find control_structure_body const ifExpr = findAncestorByType(node, 'if_expression'); if (!ifExpr) return undefined; // The consequence is the first control_structure_body child for (let i = 0; i < ifExpr.childCount; i++) { const child = ifExpr.child(i); if (child?.type === 'control_structure_body') { return { varName, typeName: resolvedType, narrowingRange: { startIndex: child.startIndex, endIndex: child.endIndex }, }; } } return undefined; } return undefined; }; export const kotlinTypeConfig: LanguageTypeConfig = { allowPatternBindingOverwrite: true, declarationNodeTypes: KOTLIN_DECLARATION_NODE_TYPES, forLoopNodeTypes: KOTLIN_FOR_LOOP_NODE_TYPES, patternBindingNodeTypes: new Set(['type_test', 'equality_expression']), extractDeclaration: extractKotlinDeclaration, extractParameter: extractKotlinParameter, extractInitializer: extractKotlinInitializer, scanConstructorBinding: scanKotlinConstructorBinding, extractForLoopBinding: extractKotlinForLoopBinding, extractPendingAssignment: extractKotlinPendingAssignment, extractPatternBinding: extractKotlinPatternBinding, inferLiteralType: inferJvmLiteralType, detectConstructorType: detectKotlinConstructorType, }; ================================================ FILE: gitnexus/src/core/ingestion/type-extractors/php.ts ================================================ import type { SyntaxNode } from '../utils.js'; import type { LanguageTypeConfig, ParameterExtractor, TypeBindingExtractor, InitializerExtractor, ClassNameLookup, ConstructorBindingScanner, ReturnTypeExtractor, PendingAssignmentExtractor, ForLoopExtractor } from './types.js'; import { extractSimpleTypeName, extractVarName, extractCalleeName, resolveIterableElementType, extractElementTypeFromString } from './shared.js'; const DECLARATION_NODE_TYPES: ReadonlySet<string> = new Set([ 'assignment_expression', // For constructor inference: $x = new User() 'property_declaration', // PHP 7.4+ typed properties: private UserRepo $repo; 'method_declaration', // PHPDoc @param on class methods 'function_definition', // PHPDoc @param on top-level functions ]); /** Walk up the AST to find the enclosing class declaration. */ const findEnclosingClass = (node: SyntaxNode): SyntaxNode | null => { let current = node.parent; while (current) { if (current.type === 'class_declaration') return current; current = current.parent; } return null; }; /** * Resolve PHP self/static/parent to the actual class name. * - self/static → enclosing class name * - parent → superclass from base_clause */ const resolvePhpKeyword = (keyword: string, node: SyntaxNode): string | undefined => { if (keyword === 'self' || keyword === 'static') { const cls = findEnclosingClass(node); if (!cls) return undefined; const nameNode = cls.childForFieldName('name'); return nameNode?.text; } if (keyword === 'parent') { const cls = findEnclosingClass(node); if (!cls) return undefined; // base_clause contains the parent class name for (let i = 0; i < cls.namedChildCount; i++) { const child = cls.namedChild(i); if (child?.type === 'base_clause') { const parentName = child.firstNamedChild; if (parentName) return extractSimpleTypeName(parentName); } } return undefined; } return undefined; }; const normalizePhpType = (raw: string): string | undefined => { // Strip nullable prefix: ?User → User let type = raw.startsWith('?') ? raw.slice(1) : raw; // Strip array suffix: User[] → User type = type.replace(/\[\]$/, ''); // Strip union with null/false/void: User|null → User const parts = type.split('|').filter(p => p !== 'null' && p !== 'false' && p !== 'void' && p !== 'mixed'); if (parts.length !== 1) return undefined; type = parts[0]; // Strip namespace: \App\Models\User → User const segments = type.split('\\'); type = segments[segments.length - 1]; // Skip uninformative types if (type === 'mixed' || type === 'void' || type === 'self' || type === 'static' || type === 'object') return undefined; // Extract element type from generic: Collection<User> → User // PHPDoc generics encode the element type in angle brackets. Since PHP's Strategy B // uses the scopeEnv value directly as the element type, we must store the inner type, // not the container name. This mirrors how User[] → User is handled by the [] strip above. const genericMatch = type.match(/^(\w+)\s*</); if (genericMatch) { const elementType = extractElementTypeFromString(type); return elementType ?? undefined; } if (/^\w+$/.test(type)) return type; return undefined; }; /** Node types to skip when walking backwards to find doc-comments. * PHP 8+ attributes (#[Route(...)]) appear as named siblings between PHPDoc and method. */ const SKIP_NODE_TYPES: ReadonlySet<string> = new Set(['attribute_list', 'attribute']); /** Regex to extract PHPDoc @param annotations: `@param Type $name` (standard order) */ const PHPDOC_PARAM_RE = /@param\s+(\S+)\s+\$(\w+)/g; /** Alternate PHPDoc order: `@param $name Type` (name first) */ const PHPDOC_PARAM_ALT_RE = /@param\s+\$(\w+)\s+(\S+)/g; /** Regex to extract PHPDoc @var annotations: `@var Type` */ const PHPDOC_VAR_RE = /@var\s+(\S+)/; /** * Extract the element type for a class property from its PHPDoc @var annotation or * PHP 7.4+ native type. Walks backward from the property_declaration node to find * an immediately preceding comment containing @var. * * Returns the normalized element type (e.g. User[] → User, Collection<User> → User). * Returns undefined when no usable type annotation is found. */ const extractClassPropertyElementType = (propDecl: SyntaxNode): string | undefined => { // Strategy 1: PHPDoc @var annotation on a preceding comment sibling let sibling = propDecl.previousSibling; while (sibling) { if (sibling.type === 'comment') { const match = PHPDOC_VAR_RE.exec(sibling.text); if (match) return normalizePhpType(match[1]); } else if (sibling.isNamed && !SKIP_NODE_TYPES.has(sibling.type)) { break; } sibling = sibling.previousSibling; } // Strategy 2: PHP 7.4+ native type field — skip generic 'array' since element type is unknown const typeNode = propDecl.childForFieldName('type'); if (!typeNode) return undefined; const typeName = extractSimpleTypeName(typeNode); if (!typeName || typeName === 'array') return undefined; return typeName; }; /** * Scan a class body for a property_declaration matching the given property name, * and extract its element type. The class body is the `declaration_list` child of * a `class_declaration` node. * * Used as Strategy C in extractForLoopBinding for `$this->property` iterables * where Strategy A (resolveIterableElementType) and Strategy B (scopeEnv lookup) * both fail to find the type. */ const findClassPropertyElementType = (propName: string, classNode: SyntaxNode): string | undefined => { const declList = classNode.childForFieldName('body') ?? (classNode.namedChild(classNode.namedChildCount - 1)?.type === 'declaration_list' ? classNode.namedChild(classNode.namedChildCount - 1) : null); // fallback: last named child, only if it's a declaration_list if (!declList) return undefined; for (let i = 0; i < declList.namedChildCount; i++) { const child = declList.namedChild(i); if (child?.type !== 'property_declaration') continue; // Check if any property_element has a variable_name matching '$propName' for (let j = 0; j < child.namedChildCount; j++) { const elem = child.namedChild(j); if (elem?.type !== 'property_element') continue; const varNameNode = elem.firstNamedChild; // variable_name node if (varNameNode?.text === '$' + propName) { return extractClassPropertyElementType(child); } } } return undefined; }; /** * Collect PHPDoc @param type bindings from comment nodes preceding a method/function. * Returns a map of paramName → typeName (without $ prefix). */ const collectPhpDocParams = (methodNode: SyntaxNode): Map<string, string> => { const commentTexts: string[] = []; let sibling = methodNode.previousSibling; while (sibling) { if (sibling.type === 'comment') { commentTexts.unshift(sibling.text); } else if (sibling.isNamed && !SKIP_NODE_TYPES.has(sibling.type)) { break; } sibling = sibling.previousSibling; } if (commentTexts.length === 0) return new Map(); const params = new Map<string, string>(); const commentBlock = commentTexts.join('\n'); PHPDOC_PARAM_RE.lastIndex = 0; let match: RegExpExecArray | null; while ((match = PHPDOC_PARAM_RE.exec(commentBlock)) !== null) { const typeName = normalizePhpType(match[1]); const paramName = match[2]; // without $ prefix if (typeName) { // Store with $ prefix to match how PHP variables appear in the env params.set('$' + paramName, typeName); } } // Also check alternate PHPDoc order: @param $name Type PHPDOC_PARAM_ALT_RE.lastIndex = 0; while ((match = PHPDOC_PARAM_ALT_RE.exec(commentBlock)) !== null) { const paramName = match[1]; if (params.has('$' + paramName)) continue; // standard format takes priority const typeName = normalizePhpType(match[2]); if (typeName) { params.set('$' + paramName, typeName); } } return params; }; /** * PHP: typed class properties (PHP 7.4+): private UserRepo $repo; * Also: PHPDoc @param annotations on method/function definitions. */ const extractDeclaration: TypeBindingExtractor = (node: SyntaxNode, env: Map<string, string>): void => { // PHPDoc @param on methods/functions — pre-populate env with param types if (node.type === 'method_declaration' || node.type === 'function_definition') { const phpDocParams = collectPhpDocParams(node); for (const [paramName, typeName] of phpDocParams) { if (!env.has(paramName)) env.set(paramName, typeName); } return; } if (node.type !== 'property_declaration') return; const typeNode = node.childForFieldName('type'); if (!typeNode) return; const typeName = extractSimpleTypeName(typeNode); if (!typeName) return; // The variable name is inside property_element > variable_name for (let i = 0; i < node.namedChildCount; i++) { const child = node.namedChild(i); if (child?.type === 'property_element') { const varNameNode = child.firstNamedChild; // variable_name if (varNameNode) { const varName = extractVarName(varNameNode); if (varName) env.set(varName, typeName); } break; } } }; /** PHP: $x = new User() — infer type from object_creation_expression */ const extractInitializer: InitializerExtractor = (node: SyntaxNode, env: Map<string, string>, _classNames: ClassNameLookup): void => { if (node.type !== 'assignment_expression') return; const left = node.childForFieldName('left'); const right = node.childForFieldName('right'); if (!left || !right) return; if (right.type !== 'object_creation_expression') return; // The class name is the first named child of object_creation_expression // (tree-sitter-php uses 'name' or 'qualified_name' nodes here) const ctorType = right.firstNamedChild; if (!ctorType) return; const typeName = extractSimpleTypeName(ctorType); if (!typeName) return; // Resolve PHP self/static/parent to actual class names const resolvedType = (typeName === 'self' || typeName === 'static' || typeName === 'parent') ? resolvePhpKeyword(typeName, node) : typeName; if (!resolvedType) return; const varName = extractVarName(left); if (varName) env.set(varName, resolvedType); }; /** PHP: simple_parameter → type $name */ const extractParameter: ParameterExtractor = (node: SyntaxNode, env: Map<string, string>): void => { let nameNode: SyntaxNode | null = null; let typeNode: SyntaxNode | null = null; if (node.type === 'simple_parameter') { typeNode = node.childForFieldName('type'); nameNode = node.childForFieldName('name'); } else { nameNode = node.childForFieldName('name') ?? node.childForFieldName('pattern'); typeNode = node.childForFieldName('type'); } if (!nameNode || !typeNode) return; const varName = extractVarName(nameNode); if (!varName) return; // Don't overwrite PHPDoc-derived types (e.g. @param User[] $users → User) // with the less-specific AST type annotation (e.g. array). if (env.has(varName)) return; const typeName = extractSimpleTypeName(typeNode); if (typeName) env.set(varName, typeName); }; /** PHP: $x = SomeFactory() or $x = $this->getUser() — bind variable to call return type */ const scanConstructorBinding: ConstructorBindingScanner = (node) => { if (node.type !== 'assignment_expression') return undefined; const left = node.childForFieldName('left'); const right = node.childForFieldName('right'); if (!left || !right) return undefined; if (left.type !== 'variable_name') return undefined; // Skip object_creation_expression (new User()) — handled by extractInitializer if (right.type === 'object_creation_expression') return undefined; // Handle both standalone function calls and method calls ($this->getUser()) if (right.type === 'function_call_expression') { const calleeName = extractCalleeName(right); if (!calleeName) return undefined; return { varName: left.text, calleeName }; } if (right.type === 'member_call_expression') { const methodName = right.childForFieldName('name'); if (!methodName) return undefined; // When receiver is $this/self/static, qualify with enclosing class for disambiguation const receiver = right.childForFieldName('object'); const receiverText = receiver?.text; let receiverClassName: string | undefined; if (receiverText === '$this' || receiverText === 'self' || receiverText === 'static') { const cls = findEnclosingClass(node); const clsName = cls?.childForFieldName('name'); if (clsName) receiverClassName = clsName.text; } return { varName: left.text, calleeName: methodName.text, receiverClassName }; } return undefined; }; /** Regex to extract PHPDoc @return annotations: `@return User` */ const PHPDOC_RETURN_RE = /@return\s+(\S+)/; /** * Normalize a PHPDoc return type for storage in the SymbolTable. * Unlike normalizePhpType (which strips User[] → User for scopeEnv), this preserves * array notation so lookupRawReturnType can extract element types for for-loop resolution. * \App\Models\User[] → User[] * ?User → User * Collection<User> → Collection<User> (preserved for extractElementTypeFromString) */ const normalizePhpReturnType = (raw: string): string | undefined => { // Strip nullable prefix: ?User[] → User[] let type = raw.startsWith('?') ? raw.slice(1) : raw; // Strip union with null/false/void: User[]|null → User[] const parts = type.split('|').filter(p => p !== 'null' && p !== 'false' && p !== 'void' && p !== 'mixed'); if (parts.length !== 1) return undefined; type = parts[0]; // Strip namespace: \App\Models\User[] → User[] const segments = type.split('\\'); type = segments[segments.length - 1]; // Skip uninformative types if (type === 'mixed' || type === 'void' || type === 'self' || type === 'static' || type === 'object' || type === 'array') return undefined; if (/^\w+(\[\])?$/.test(type) || /^\w+\s*</.test(type)) return type; return undefined; }; /** * Extract return type from PHPDoc `@return Type` annotation preceding a method. * Walks backwards through preceding siblings looking for comment nodes. * Preserves array notation (e.g., User[]) for for-loop element type extraction. */ const extractReturnType: ReturnTypeExtractor = (node) => { let sibling = node.previousSibling; while (sibling) { if (sibling.type === 'comment') { const match = PHPDOC_RETURN_RE.exec(sibling.text); if (match) return normalizePhpReturnType(match[1]); } else if (sibling.isNamed && !SKIP_NODE_TYPES.has(sibling.type)) break; sibling = sibling.previousSibling; } return undefined; }; /** PHP: $alias = $user → assignment_expression with variable_name left/right. * PHP TypeEnv stores variables WITH $ prefix ($user → User), so we keep $ in lhs/rhs. */ const extractPendingAssignment: PendingAssignmentExtractor = (node, scopeEnv) => { if (node.type !== 'assignment_expression') return undefined; const left = node.childForFieldName('left'); const right = node.childForFieldName('right'); if (!left || !right) return undefined; if (left.type !== 'variable_name') return undefined; const lhs = left.text; if (!lhs || scopeEnv.has(lhs)) return undefined; if (right.type === 'variable_name') { const rhs = right.text; if (rhs) return { kind: 'copy', lhs, rhs }; } // member_access_expression RHS → fieldAccess ($a->field) if (right.type === 'member_access_expression') { const obj = right.childForFieldName('object'); const name = right.childForFieldName('name'); if (obj?.type === 'variable_name' && name) { return { kind: 'fieldAccess', lhs, receiver: obj.text, field: name.text }; } } // function_call_expression RHS → callResult (bare function calls only) if (right.type === 'function_call_expression') { const funcNode = right.childForFieldName('function'); if (funcNode?.type === 'name') { return { kind: 'callResult', lhs, callee: funcNode.text }; } } // member_call_expression RHS → methodCallResult ($a->method()) if (right.type === 'member_call_expression') { const obj = right.childForFieldName('object'); const name = right.childForFieldName('name'); if (obj?.type === 'variable_name' && name) { return { kind: 'methodCallResult', lhs, receiver: obj.text, method: name.text }; } } return undefined; }; const FOR_LOOP_NODE_TYPES: ReadonlySet<string> = new Set([ 'foreach_statement', ]); /** Extract element type from a PHP type annotation AST node. * PHP has limited AST-level container types — `array` is a primitive_type with no generic args. * Named types (e.g., `Collection`) are returned as-is (container descriptor lookup handles them). */ const extractPhpElementTypeFromTypeNode = (_typeNode: SyntaxNode): string | undefined => { // PHP AST type nodes don't carry generic parameters (array<User> is PHPDoc-only). // primitive_type 'array' and named_type 'Collection' don't encode element types. return undefined; }; /** Walk up from a foreach to the enclosing function and search parameter type annotations. * PHP parameter type hints are limited (array, ClassName) — this extracts element type when possible. */ const findPhpParamElementType = (iterableName: string, startNode: SyntaxNode): string | undefined => { let current: SyntaxNode | null = startNode.parent; while (current) { if (current.type === 'method_declaration' || current.type === 'function_definition') { const paramsNode = current.childForFieldName('parameters'); if (paramsNode) { for (let i = 0; i < paramsNode.namedChildCount; i++) { const param = paramsNode.namedChild(i); if (!param || param.type !== 'simple_parameter') continue; const nameNode = param.childForFieldName('name'); if (nameNode?.text !== iterableName) continue; const typeNode = param.childForFieldName('type'); if (typeNode) return extractPhpElementTypeFromTypeNode(typeNode); } } break; } current = current.parent; } return undefined; }; /** * PHP: foreach ($users as $user) — extract loop variable binding. * * AST structure (from tree-sitter-php grammar): * foreach_statement — no named fields for iterable/value (only 'body') * children[0]: expression (iterable, e.g. $users) * children[1]: expression (simple value) OR pair ($key => $value) * pair children: expression (key), expression (value) * * PHP's PHPDoc @param normalizes `User[]` → `User` in the env, so the iterable's * stored type IS the element type. We first try resolveIterableElementType (for * constructor-binding cases that retain container types), then fall back to direct * scopeEnv lookup (for PHPDoc-normalized types). */ const extractForLoopBinding: ForLoopExtractor = (node, { scopeEnv, declarationTypeNodes, scope, returnTypeLookup }): void => { if (node.type !== 'foreach_statement') return; // Collect non-body named children: first is the iterable, second is value or pair const children: SyntaxNode[] = []; for (let i = 0; i < node.namedChildCount; i++) { const child = node.namedChild(i); if (child && child !== node.childForFieldName('body')) { children.push(child); } } if (children.length < 2) return; const iterableNode = children[0]; const valueOrPair = children[1]; // Determine the loop variable node let loopVarNode: SyntaxNode; if (valueOrPair.type === 'pair') { // $key => $value — the value is the last named child of the pair const lastChild = valueOrPair.namedChild(valueOrPair.namedChildCount - 1); if (!lastChild) return; // Handle by_ref: foreach ($arr as $k => &$v) loopVarNode = lastChild.type === 'by_ref' ? (lastChild.firstNamedChild ?? lastChild) : lastChild; } else { // Simple: foreach ($users as $user) or foreach ($users as &$user) loopVarNode = valueOrPair.type === 'by_ref' ? (valueOrPair.firstNamedChild ?? valueOrPair) : valueOrPair; } const varName = extractVarName(loopVarNode); if (!varName) return; // Get iterable variable name (PHP vars include $ prefix) let iterableName: string | undefined; let callExprElementType: string | undefined; if (iterableNode.type === 'variable_name') { iterableName = iterableNode.text; } else if (iterableNode?.type === 'member_access_expression') { const name = iterableNode.childForFieldName('name'); // PHP properties are stored in scopeEnv with $ prefix ($users), but // member_access_expression.name returns without $ (users). Add $ to match. if (name) iterableName = '$' + name.text; } else if (iterableNode?.type === 'function_call_expression') { // foreach (getUsers() as $user) — resolve via return type lookup const calleeName = extractCalleeName(iterableNode); if (calleeName) { const rawReturn = returnTypeLookup.lookupRawReturnType(calleeName); if (rawReturn) callExprElementType = extractElementTypeFromString(rawReturn); } } else if (iterableNode?.type === 'member_call_expression') { // foreach ($this->getUsers() as $user) — resolve via return type lookup const methodName = iterableNode.childForFieldName('name'); if (methodName) { const rawReturn = returnTypeLookup.lookupRawReturnType(methodName.text); if (rawReturn) callExprElementType = extractElementTypeFromString(rawReturn); } } if (!iterableName && !callExprElementType) return; // If we resolved the element type from a call expression, bind and return early if (callExprElementType) { scopeEnv.set(varName, callExprElementType); return; } // Strategy A: try resolveIterableElementType (handles constructor-binding container types) const elementType = resolveIterableElementType( iterableName, node, scopeEnv, declarationTypeNodes, scope, extractPhpElementTypeFromTypeNode, findPhpParamElementType, undefined, ); if (elementType) { scopeEnv.set(varName, elementType); return; } // Strategy B: direct scopeEnv lookup — PHP normalizePhpType strips User[] → User, // so the iterable's stored type is already the element type from PHPDoc annotations. const iterableType = scopeEnv.get(iterableName); if (iterableType) { scopeEnv.set(varName, iterableType); return; } // Strategy C: $this->property — scan the enclosing class body for the property // declaration and extract its element type from @var PHPDoc or native type. // This handles the common PHP pattern where the property type is declared on the // class body (/** @var User[] */ private $users) but the foreach is in a method // whose scopeEnv does not contain the property type. if (iterableNode?.type === 'member_access_expression') { const obj = iterableNode.childForFieldName('object'); if (obj?.text === '$this') { const nameNode = iterableNode.childForFieldName('name'); const propName = nameNode?.text; if (propName) { const classNode = findEnclosingClass(iterableNode); if (classNode) { const elementType = findClassPropertyElementType(propName, classNode); if (elementType) scopeEnv.set(varName, elementType); } } } } }; export const typeConfig: LanguageTypeConfig = { declarationNodeTypes: DECLARATION_NODE_TYPES, forLoopNodeTypes: FOR_LOOP_NODE_TYPES, extractDeclaration, extractParameter, extractInitializer, scanConstructorBinding, extractReturnType, extractForLoopBinding, extractPendingAssignment, }; ================================================ FILE: gitnexus/src/core/ingestion/type-extractors/python.ts ================================================ import type { SyntaxNode } from '../utils.js'; import type { LanguageTypeConfig, ParameterExtractor, TypeBindingExtractor, InitializerExtractor, ClassNameLookup, ConstructorBindingScanner, PendingAssignmentExtractor, PatternBindingExtractor, ForLoopExtractor } from './types.js'; import { extractSimpleTypeName, extractVarName, extractElementTypeFromString, extractGenericTypeArgs, resolveIterableElementType, methodToTypeArgPosition, type TypeArgPosition } from './shared.js'; const DECLARATION_NODE_TYPES: ReadonlySet<string> = new Set([ 'assignment', 'named_expression', 'expression_statement', ]); /** Python: x: Foo = ... (PEP 484 annotated assignment) or x: Foo (standalone annotation). * * tree-sitter-python grammar produces two distinct shapes: * * 1. Annotated assignment with value: `name: str = ""` * Node type: `assignment` * Fields: left=identifier, type=identifier/type, right=value * * 2. Standalone annotation (no value): `name: str` * Node type: `expression_statement` * Child: `type` node with fields name=identifier, type=identifier/type * * Both appear at file scope and inside class bodies (PEP 526 class variable annotations). */ const extractDeclaration: TypeBindingExtractor = (node: SyntaxNode, env: Map<string, string>): void => { if (node.type === 'expression_statement') { // Standalone annotation: expression_statement > type { name: identifier, type: identifier } const typeChild = node.firstNamedChild; if (!typeChild || typeChild.type !== 'type') return; const nameNode = typeChild.childForFieldName('name'); const typeNode = typeChild.childForFieldName('type'); if (!nameNode || !typeNode) return; const varName = extractVarName(nameNode); const inner = typeNode.type === 'type' ? (typeNode.firstNamedChild ?? typeNode) : typeNode; const typeName = extractSimpleTypeName(inner) ?? inner.text; if (varName && typeName) env.set(varName, typeName); return; } // Annotated assignment: left : type = value const left = node.childForFieldName('left'); const typeNode = node.childForFieldName('type'); if (!left || !typeNode) return; const varName = extractVarName(left); // extractSimpleTypeName handles identifiers and qualified names. // Python 3.10+ union syntax `User | None` is parsed as binary_operator, // which extractSimpleTypeName doesn't handle. Fall back to raw text so // stripNullable can process it at lookup time (e.g., "User | None" → "User"). const inner = typeNode.type === 'type' ? (typeNode.firstNamedChild ?? typeNode) : typeNode; const typeName = extractSimpleTypeName(inner) ?? inner.text; if (varName && typeName) env.set(varName, typeName); }; /** Python: parameter with type annotation */ const extractParameter: ParameterExtractor = (node: SyntaxNode, env: Map<string, string>): void => { let nameNode: SyntaxNode | null = null; let typeNode: SyntaxNode | null = null; if (node.type === 'parameter') { nameNode = node.childForFieldName('name'); typeNode = node.childForFieldName('type'); } else { nameNode = node.childForFieldName('name') ?? node.childForFieldName('pattern'); typeNode = node.childForFieldName('type'); // Python typed_parameter: name is a positional child (identifier), not a named field if (!nameNode && node.type === 'typed_parameter') { nameNode = node.firstNamedChild?.type === 'identifier' ? node.firstNamedChild : null; } } if (!nameNode || !typeNode) return; const varName = extractVarName(nameNode); const typeName = extractSimpleTypeName(typeNode); if (varName && typeName) env.set(varName, typeName); }; /** Python: user = User("alice") — infer type from call when callee is a known class. * Python constructors are syntactically identical to function calls, so we verify * against classNames (which may include cross-file SymbolTable lookups). * Also handles walrus operator: if (user := User("alice")): */ const extractInitializer: InitializerExtractor = (node: SyntaxNode, env: Map<string, string>, classNames: ClassNameLookup): void => { let left: SyntaxNode | null; let right: SyntaxNode | null; if (node.type === 'named_expression') { // Walrus operator: (user := User("alice")) // tree-sitter-python: named_expression has 'name' and 'value' fields left = node.childForFieldName('name'); right = node.childForFieldName('value'); } else if (node.type === 'assignment') { left = node.childForFieldName('left'); right = node.childForFieldName('right'); // Skip if already has type annotation — extractDeclaration handled it if (node.childForFieldName('type')) return; } else { return; } if (!left || !right) return; const varName = extractVarName(left); if (!varName || env.has(varName)) return; if (right.type !== 'call') return; const func = right.childForFieldName('function'); if (!func) return; // Support both direct calls (User()) and qualified calls (models.User()) // tree-sitter-python: direct → identifier, qualified → attribute const calleeName = extractSimpleTypeName(func); if (!calleeName) return; if (classNames.has(calleeName)) { env.set(varName, calleeName); } }; /** Python: user = User("alice") — scan assignment/walrus for constructor-like calls. * Returns {varName, calleeName} without checking classNames (caller validates). */ const scanConstructorBinding: ConstructorBindingScanner = (node) => { let left: SyntaxNode | null; let right: SyntaxNode | null; if (node.type === 'named_expression') { left = node.childForFieldName('name'); right = node.childForFieldName('value'); } else if (node.type === 'assignment') { left = node.childForFieldName('left'); right = node.childForFieldName('right'); if (node.childForFieldName('type')) return undefined; } else { return undefined; } if (!left || !right) return undefined; if (left.type !== 'identifier') return undefined; if (right.type !== 'call') return undefined; const func = right.childForFieldName('function'); if (!func) return undefined; const calleeName = extractSimpleTypeName(func); if (!calleeName) return undefined; return { varName: left.text, calleeName }; }; const FOR_LOOP_NODE_TYPES: ReadonlySet<string> = new Set([ 'for_statement', ]); /** Python function/method node types that carry a parameters list. */ const PY_FUNCTION_NODE_TYPES = new Set([ 'function_definition', 'decorated_definition', ]); /** * Extract element type from a Python type annotation AST node. * Handles: * subscript "List[User]" → extractElementTypeFromString("List[User]") → "User" * generic_type → extractGenericTypeArgs → first arg * Falls back to text-based extraction. */ const extractPyElementTypeFromAnnotation = (typeNode: SyntaxNode, pos: TypeArgPosition = 'last'): string | undefined => { // Unwrap 'type' wrapper node to get to the actual type (e.g., type > generic_type) const inner = typeNode.type === 'type' ? (typeNode.firstNamedChild ?? typeNode) : typeNode; // Python subscript: List[User], Sequence[User] — use raw text if (inner.type === 'subscript') { return extractElementTypeFromString(inner.text, pos); } // generic_type: dict[str, User] — tree-sitter-python uses type_parameter child if (inner.type === 'generic_type') { // Try standard extractGenericTypeArgs first (handles type_arguments) const args = extractGenericTypeArgs(inner); if (args.length >= 1) return pos === 'first' ? args[0] : args[args.length - 1]; // Fallback: look for type_parameter child (tree-sitter-python specific) for (let i = 0; i < inner.namedChildCount; i++) { const child = inner.namedChild(i); if (child?.type === 'type_parameter') { if (pos === 'first') { const firstArg = child.firstNamedChild; if (firstArg) return extractSimpleTypeName(firstArg); } else { const lastArg = child.lastNamedChild; if (lastArg) return extractSimpleTypeName(lastArg); } } } } // Fallback: raw text extraction (handles User[], [User], etc.) return extractElementTypeFromString(inner.text, pos); }; /** * Walk up the AST from a for-statement to find the enclosing function definition, * then search its parameters for one named `iterableName`. * Returns the element type extracted from its type annotation, or undefined. * * Handles both `parameter` and `typed_parameter` node types in tree-sitter-python. * `typed_parameter` may not expose the name as a `name` field — falls back to * checking the first identifier-type named child. */ const findPyParamElementType = (iterableName: string, startNode: SyntaxNode, pos: TypeArgPosition = 'last'): string | undefined => { let current: SyntaxNode | null = startNode.parent; while (current) { if (current.type === 'function_definition') { const paramsNode = current.childForFieldName('parameters'); if (paramsNode) { for (let i = 0; i < paramsNode.namedChildCount; i++) { const param = paramsNode.namedChild(i); if (!param) continue; // Try named `name` field first (parameter node), then first identifier child // (typed_parameter node may store name as first positional child) const nameNode = param.childForFieldName('name') ?? (param.firstNamedChild?.type === 'identifier' ? param.firstNamedChild : null); if (nameNode?.text !== iterableName) continue; // Try `type` field, then last named child (typed_parameter stores type last) const typeAnnotation = param.childForFieldName('type') ?? (param.namedChildCount >= 2 ? param.namedChild(param.namedChildCount - 1) : null); if (typeAnnotation && typeAnnotation !== nameNode) { return extractPyElementTypeFromAnnotation(typeAnnotation, pos); } } } break; } current = current.parent; } return undefined; }; /** * Extracts iterableName and methodName from a call expression like `data.items()`. * Returns undefined if the call doesn't match the expected pattern. */ const extractMethodCall = (callNode: SyntaxNode): { iterableName: string; methodName?: string } | undefined => { const fn = callNode.childForFieldName('function'); if (fn?.type !== 'attribute') return undefined; const obj = fn.firstNamedChild; if (obj?.type !== 'identifier') return undefined; const method = fn.lastNamedChild; const methodName = (method?.type === 'identifier' && method !== obj) ? method.text : undefined; return { iterableName: obj.text, methodName }; }; /** * Collects all identifier nodes from a pattern, descending into nested tuple_patterns. * For `i, (k, v)` returns [i, k, v]. For `key, value` returns [key, value]. */ const collectPatternIdentifiers = (pattern: SyntaxNode): SyntaxNode[] => { const vars: SyntaxNode[] = []; for (let i = 0; i < pattern.namedChildCount; i++) { const child = pattern.namedChild(i); if (child?.type === 'identifier') { vars.push(child); } else if (child?.type === 'tuple_pattern') { vars.push(...collectPatternIdentifiers(child)); } } return vars; }; /** * Python: for user in users: where users has a known container type annotation. * * AST node: `for_statement` with `left` (loop variable) and `right` (iterable). * * Tier 1c: resolves the element type via three strategies in priority order: * 1. declarationTypeNodes — raw type annotation AST node (covers stored container types) * 2. scopeEnv string — extractElementTypeFromString on the stored type * 3. AST walk — walks up to the enclosing function's parameters to read List[User] directly * * Also handles `enumerate(iterable)` — unwraps the outer call and skips the integer * index variable so the value variable still resolves to the element type. */ const extractForLoopBinding: ForLoopExtractor = (node, { scopeEnv, declarationTypeNodes, scope, returnTypeLookup }): void => { if (node.type !== 'for_statement') return; const rightNode = node.childForFieldName('right'); let iterableName: string | undefined; let methodName: string | undefined; let callExprElementType: string | undefined; let isEnumerate = false; // Extract iterable info from the `right` field — may be identifier, attribute, or call. if (rightNode?.type === 'identifier') { iterableName = rightNode.text; } else if (rightNode?.type === 'attribute') { const prop = rightNode.lastNamedChild; if (prop) iterableName = prop.text; } else if (rightNode?.type === 'call') { const fn = rightNode.childForFieldName('function'); if (fn?.type === 'identifier' && fn.text === 'enumerate') { // enumerate(iterable) or enumerate(d.items()) — unwrap to inner iterable. isEnumerate = true; const innerArg = rightNode.childForFieldName('arguments')?.firstNamedChild; if (innerArg?.type === 'identifier') { iterableName = innerArg.text; } else if (innerArg?.type === 'call') { const extracted = extractMethodCall(innerArg); if (extracted) ({ iterableName, methodName } = extracted); } } else if (fn?.type === 'attribute') { // data.items() → call > function: attribute > identifier('data') + identifier('items') const extracted = extractMethodCall(rightNode); if (extracted) ({ iterableName, methodName } = extracted); } else if (fn?.type === 'identifier') { // Direct function call: for user in get_users() (Phase 7.3 — return-type path) const rawReturn = returnTypeLookup.lookupRawReturnType(fn.text); if (rawReturn) callExprElementType = extractElementTypeFromString(rawReturn); } } if (!iterableName && !callExprElementType) return; let elementType: string | undefined; if (callExprElementType) { elementType = callExprElementType; } else { const containerTypeName = scopeEnv.get(iterableName!); const typeArgPos = methodToTypeArgPosition(methodName, containerTypeName); elementType = resolveIterableElementType( iterableName!, node, scopeEnv, declarationTypeNodes, scope, extractPyElementTypeFromAnnotation, findPyParamElementType, typeArgPos, ); } if (!elementType) return; // The loop variable is the `left` field — identifier or pattern_list. const leftNode = node.childForFieldName('left'); if (!leftNode) return; if (leftNode.type === 'pattern_list' || leftNode.type === 'tuple_pattern') { // Tuple unpacking: `key, value` or `i, (k, v)` or `(k, v)` — bind the last identifier to element type. // With enumerate, skip binding if there's only one var (just the index, no value to bind). const vars = collectPatternIdentifiers(leftNode); if (vars.length > 0 && (!isEnumerate || vars.length > 1)) { scopeEnv.set(vars[vars.length - 1].text, elementType); } return; } const loopVarName = extractVarName(leftNode); if (loopVarName) scopeEnv.set(loopVarName, elementType); }; /** Python: alias = u → assignment with left/right fields. * Also handles walrus operator: alias := u → named_expression with name/value fields. */ const extractPendingAssignment: PendingAssignmentExtractor = (node, scopeEnv) => { let left: SyntaxNode | null; let right: SyntaxNode | null; if (node.type === 'assignment') { left = node.childForFieldName('left'); right = node.childForFieldName('right'); } else if (node.type === 'named_expression') { left = node.childForFieldName('name'); right = node.childForFieldName('value'); } else { return undefined; } if (!left || !right) return undefined; const lhs = left.type === 'identifier' ? left.text : undefined; if (!lhs || scopeEnv.has(lhs)) return undefined; if (right.type === 'identifier') return { kind: 'copy', lhs, rhs: right.text }; // attribute RHS → fieldAccess (a.field) if (right.type === 'attribute') { const obj = right.firstNamedChild; const field = right.lastNamedChild; if (obj?.type === 'identifier' && field?.type === 'identifier' && obj !== field) { return { kind: 'fieldAccess', lhs, receiver: obj.text, field: field.text }; } } // call RHS if (right.type === 'call') { const funcNode = right.childForFieldName('function'); if (funcNode?.type === 'identifier') { return { kind: 'callResult', lhs, callee: funcNode.text }; } // method call with receiver: call → function: attribute if (funcNode?.type === 'attribute') { const obj = funcNode.firstNamedChild; const method = funcNode.lastNamedChild; if (obj?.type === 'identifier' && method?.type === 'identifier' && obj !== method) { return { kind: 'methodCallResult', lhs, receiver: obj.text, method: method.text }; } } } return undefined; }; /** * Python match/case `as` pattern binding: `case User() as u:` * * AST structure (tree-sitter-python): * as_pattern * alias: as_pattern_target ← the bound variable name (e.g. "u") * children[0]: case_pattern ← wraps class_pattern (or is class_pattern directly) * class_pattern * dotted_name ← the class name (e.g. "User") * * The `alias` field is an `as_pattern_target` node whose `.text` is the identifier. * The class name lives in the first non-alias named child: either a `case_pattern` * wrapping a `class_pattern`, or a direct `class_pattern`. * * Conservative: returns undefined when: * - The node is not an `as_pattern` * - The pattern side is not a class_pattern (e.g. guard or literal match) * - The variable was already bound in scopeEnv */ const extractPatternBinding: PatternBindingExtractor = (node, scopeEnv) => { if (node.type !== 'as_pattern') return undefined; // as_pattern: `case User() as u:` — binds matched value to a name. // Try named field first (future grammar versions may expose it), fall back to positional. if (node.namedChildCount < 2) return undefined; const patternChild = node.namedChild(0); const varNameNode = node.childForFieldName('alias') ?? node.namedChild(node.namedChildCount - 1); if (!patternChild || !varNameNode) return undefined; if (varNameNode.type !== 'identifier') return undefined; const varName = varNameNode.text; if (!varName || scopeEnv.has(varName)) return undefined; // Find the class_pattern — may be direct or wrapped in case_pattern. let classPattern: SyntaxNode | null = null; if (patternChild.type === 'class_pattern') { classPattern = patternChild; } else if (patternChild.type === 'case_pattern') { // Unwrap one level: case_pattern wraps class_pattern for (let j = 0; j < patternChild.namedChildCount; j++) { const inner = patternChild.namedChild(j); if (inner?.type === 'class_pattern') { classPattern = inner; break; } } } if (!classPattern) return undefined; // class_pattern children: dotted_name (the class name) + optional keyword_pattern args. const classNameNode = classPattern.firstNamedChild; if (!classNameNode || (classNameNode.type !== 'dotted_name' && classNameNode.type !== 'identifier')) return undefined; const typeName = classNameNode.text; if (!typeName) return undefined; return { varName, typeName }; }; const PATTERN_BINDING_NODE_TYPES: ReadonlySet<string> = new Set(['as_pattern']); export const typeConfig: LanguageTypeConfig = { declarationNodeTypes: DECLARATION_NODE_TYPES, forLoopNodeTypes: FOR_LOOP_NODE_TYPES, patternBindingNodeTypes: PATTERN_BINDING_NODE_TYPES, extractDeclaration, extractParameter, extractInitializer, scanConstructorBinding, extractForLoopBinding, extractPendingAssignment, extractPatternBinding, }; ================================================ FILE: gitnexus/src/core/ingestion/type-extractors/ruby.ts ================================================ import type { LanguageTypeConfig, ParameterExtractor, TypeBindingExtractor, InitializerExtractor, ClassNameLookup, ConstructorBindingScanner, ReturnTypeExtractor, PendingAssignmentExtractor, ForLoopExtractor } from './types.js'; import { extractRubyConstructorAssignment, extractSimpleTypeName, extractElementTypeFromString, extractVarName, resolveIterableElementType } from './shared.js'; import type { SyntaxNode } from '../utils.js'; /** * Ruby type extractor — YARD annotation parsing. * * Ruby has no static type system, but the YARD documentation convention * provides de facto type annotations via comments: * * # @param name [String] the user's name * # @param repo [UserRepo] the repository * # @return [User] * def create(name, repo) * repo.save * end * * This extractor parses `@param name [Type]` patterns from comment nodes * preceding method definitions and binds parameter names to their types. * * Resolution tiers: * - Tier 0: YARD @param annotations (extractDeclaration pre-populates env) * - Tier 1: Constructor inference via `user = User.new` (handled by scanConstructorBinding in typeConfig) */ /** Regex to extract @param annotations: `@param name [Type]` */ const YARD_PARAM_RE = /@param\s+(\w+)\s+\[([^\]]+)\]/g; /** Alternate YARD order: `@param [Type] name` */ const YARD_PARAM_ALT_RE = /@param\s+\[([^\]]+)\]\s+(\w+)/g; /** Regex to extract @return annotations: `@return [Type]` */ const YARD_RETURN_RE = /@return\s+\[([^\]]+)\]/; /** * Extract the simple type name from a YARD type string. * Handles: * - Simple types: "String" → "String" * - Qualified types: "Models::User" → "User" * - Generic types: "Array<User>" → "Array" * - Nullable types: "String, nil" → "String" * - Union types: "String, Integer" → undefined (ambiguous) */ const extractYardTypeName = (yardType: string): string | undefined => { const trimmed = yardType.trim(); // Handle nullable: "Type, nil" or "nil, Type" // Use bracket-balanced split to avoid breaking on commas inside generics like Hash<Symbol, User> const parts: string[] = []; let depth = 0, start = 0; for (let i = 0; i < trimmed.length; i++) { if (trimmed[i] === '<') depth++; else if (trimmed[i] === '>') depth--; else if (trimmed[i] === ',' && depth === 0) { parts.push(trimmed.slice(start, i).trim()); start = i + 1; } } parts.push(trimmed.slice(start).trim()); const filtered = parts.filter(p => p !== '' && p !== 'nil'); if (filtered.length !== 1) return undefined; // ambiguous union const typePart = filtered[0]; // Handle qualified: "Models::User" → "User" const segments = typePart.split('::'); const last = segments[segments.length - 1]; // Handle generic: "Array<User>" → "Array" const genericMatch = last.match(/^(\w+)\s*[<{(]/); if (genericMatch) return genericMatch[1]; // Simple identifier check if (/^\w+$/.test(last)) return last; return undefined; }; /** * Collect YARD @param annotations from comment nodes preceding a method definition. * Returns a map of paramName → typeName. * * In tree-sitter-ruby, comments are sibling nodes that appear before the method node. * We walk backwards through preceding siblings collecting consecutive comment nodes. */ const collectYardParams = (methodNode: SyntaxNode): Map<string, string> => { const params = new Map<string, string>(); // In tree-sitter-ruby, YARD comments preceding a method inside a class body // are placed as children of the `class` node, NOT as siblings of the `method` // inside `body_statement`. The AST structure is: // // class // constant = "ClassName" // comment = "# @param ..." ← sibling of body_statement // comment = "# @param ..." ← sibling of body_statement // body_statement // method ← method is here, no preceding siblings // // For top-level methods (outside classes), comments ARE direct siblings. // We handle both by checking: if method has no preceding comment siblings, // look at parent (body_statement) siblings instead. const commentTexts: string[] = []; const collectComments = (startNode: SyntaxNode): void => { let sibling = startNode.previousSibling; while (sibling) { if (sibling.type === 'comment') { commentTexts.unshift(sibling.text); } else if (sibling.isNamed) { break; } sibling = sibling.previousSibling; } }; // Try method's own siblings first (top-level methods) collectComments(methodNode); // If no comments found and parent is body_statement, check parent's siblings if (commentTexts.length === 0 && methodNode.parent?.type === 'body_statement') { collectComments(methodNode.parent); } // Parse all comment lines for @param annotations const commentBlock = commentTexts.join('\n'); let match: RegExpExecArray | null; // Reset regex state YARD_PARAM_RE.lastIndex = 0; while ((match = YARD_PARAM_RE.exec(commentBlock)) !== null) { const paramName = match[1]; const rawType = match[2]; const typeName = extractYardTypeName(rawType); if (typeName) { params.set(paramName, typeName); } } // Also check alternate YARD order: @param [Type] name YARD_PARAM_ALT_RE.lastIndex = 0; while ((match = YARD_PARAM_ALT_RE.exec(commentBlock)) !== null) { const rawType = match[1]; const paramName = match[2]; if (params.has(paramName)) continue; // standard format takes priority const typeName = extractYardTypeName(rawType); if (typeName) { params.set(paramName, typeName); } } return params; }; /** * Ruby node types that may carry type bindings. * - `method`/`singleton_method`: YARD @param annotations (via extractDeclaration) * - `assignment`: Constructor inference like `user = User.new` (via extractInitializer; * extractDeclaration returns early for these nodes) */ const DECLARATION_NODE_TYPES: ReadonlySet<string> = new Set([ 'method', 'singleton_method', 'assignment', ]); /** * Extract YARD annotations from method definitions. * Pre-populates the scope env with parameter types before the * standard parameter walk (which won't find types since Ruby has none). */ const extractDeclaration: TypeBindingExtractor = (node: SyntaxNode, env: Map<string, string>): void => { if (node.type !== 'method' && node.type !== 'singleton_method') return; const yardParams = collectYardParams(node); if (yardParams.size === 0) return; // Pre-populate env with YARD type bindings for each parameter for (const [paramName, typeName] of yardParams) { env.set(paramName, typeName); } }; /** * Ruby parameter extraction. * Ruby parameters (identifiers inside method_parameters) have no inline * type annotations. YARD types are already populated by extractDeclaration, * so this is a no-op — the bindings are already in the env. * * We still register this to maintain the LanguageTypeConfig contract. */ const extractParameter: ParameterExtractor = (_node: SyntaxNode, _env: Map<string, string>): void => { // Ruby parameters have no type annotations. // YARD types are pre-populated by extractDeclaration. }; /** * Ruby constructor inference: user = User.new or service = Models::User.new * Uses the shared extractRubyConstructorAssignment helper for AST matching, * then resolves against locally-known class names. */ const extractInitializer: InitializerExtractor = (node, env, classNames): void => { const result = extractRubyConstructorAssignment(node); if (!result) return; if (env.has(result.varName)) return; if (classNames.has(result.calleeName)) { env.set(result.varName, result.calleeName); } }; /** * Extract return type from YARD `@return [Type]` annotation preceding a method. * Reuses the same comment-walking strategy as collectYardParams: try direct * siblings first, fall back to parent (body_statement) siblings for class methods. */ const extractReturnType: ReturnTypeExtractor = (node) => { const search = (startNode: SyntaxNode): string | undefined => { let sibling = startNode.previousSibling; while (sibling) { if (sibling.type === 'comment') { const match = YARD_RETURN_RE.exec(sibling.text); if (match) return extractYardTypeName(match[1]); } else if (sibling.isNamed) { break; } sibling = sibling.previousSibling; } return undefined; }; const result = search(node); if (result) return result; if (node.parent?.type === 'body_statement') { return search(node.parent); } return undefined; }; /** * Ruby constructor binding scanner: captures both `user = User.new` and * plain call assignments like `user = get_user()`. * The `.new` pattern returns the class name directly; plain calls return the * callee name for return-type inference via SymbolTable lookup. */ const scanConstructorBinding: ConstructorBindingScanner = (node) => { // Try the .new pattern first (returns class name directly) const newResult = extractRubyConstructorAssignment(node); if (newResult) return newResult; // Plain call assignment: user = get_user() / user = Models.create() if (node.type !== 'assignment') return undefined; const left = node.childForFieldName('left'); const right = node.childForFieldName('right'); if (!left || !right) return undefined; if (left.type !== 'identifier' && left.type !== 'constant') return undefined; if (right.type !== 'call') return undefined; const method = right.childForFieldName('method'); if (!method) return undefined; const calleeName = extractSimpleTypeName(method); if (!calleeName) return undefined; return { varName: left.text, calleeName }; }; /** Ruby method node types that carry a parameter list. */ const RUBY_METHOD_NODE_TYPES = new Set(['method', 'singleton_method']); const FOR_LOOP_NODE_TYPES: ReadonlySet<string> = new Set(['for']); /** * Collect raw YARD @param type strings from comment nodes preceding a method. * Unlike collectYardParams which returns simplified type names, this returns the * raw bracket content (e.g., "Array<User>" not "Array") for element type extraction. */ const collectYardRawParams = (methodNode: SyntaxNode): Map<string, string> => { const params = new Map<string, string>(); const commentTexts: string[] = []; const collectComments = (startNode: SyntaxNode): void => { let sibling = startNode.previousSibling; while (sibling) { if (sibling.type === 'comment') { commentTexts.unshift(sibling.text); } else if (sibling.isNamed) { break; } sibling = sibling.previousSibling; } }; collectComments(methodNode); if (commentTexts.length === 0 && methodNode.parent?.type === 'body_statement') { collectComments(methodNode.parent); } const commentBlock = commentTexts.join('\n'); let match: RegExpExecArray | null; YARD_PARAM_RE.lastIndex = 0; while ((match = YARD_PARAM_RE.exec(commentBlock)) !== null) { params.set(match[1], match[2]); } YARD_PARAM_ALT_RE.lastIndex = 0; while ((match = YARD_PARAM_ALT_RE.exec(commentBlock)) !== null) { if (!params.has(match[2])) params.set(match[2], match[1]); } return params; }; /** * Walk up the AST from a for-statement to find the enclosing method, * then search its YARD @param annotations for one named `iterableName`. * Returns the element type extracted from the raw YARD type string. * * Example: `@param users [Array<User>]` → extracts "User" from "Array<User>". */ const findRubyParamElementType = (iterableName: string, startNode: SyntaxNode): string | undefined => { let current: SyntaxNode | null = startNode.parent; while (current) { if (RUBY_METHOD_NODE_TYPES.has(current.type)) { const rawParams = collectYardRawParams(current); const rawType = rawParams.get(iterableName); if (rawType) return extractElementTypeFromString(rawType); break; } current = current.parent; } return undefined; }; /** * Ruby: for user in users ... end * * tree-sitter-ruby `for` node structure: * pattern field: the loop variable (identifier) * value field: `in` node whose child is the iterable expression * * Tier 1c: resolves the element type via: * 1. scopeEnv string — extractElementTypeFromString on the stored type * 2. AST walk — walks up to the enclosing method's YARD @param to read Array<User> directly * * Ruby has no static types on loop variables, so this mainly works when the * iterable has a YARD-annotated container type (e.g., `@param users [Array<User>]`). */ const extractForLoopBinding: ForLoopExtractor = (node, { scopeEnv, declarationTypeNodes, scope }): void => { if (node.type !== 'for') return; // The loop variable is the `pattern` field (identifier). const patternNode = node.childForFieldName('pattern'); if (!patternNode) return; const loopVarName = extractVarName(patternNode); if (!loopVarName) return; // The iterable is inside the `value` field which is an `in` node wrapping the expression. const inNode = node.childForFieldName('value'); if (!inNode) return; const iterableNode = inNode.firstNamedChild; let iterableName: string | undefined; if (iterableNode?.type === 'identifier') { iterableName = iterableNode.text; } else if (iterableNode?.type === 'call') { const method = iterableNode.childForFieldName('method'); if (method) iterableName = method.text; } if (!iterableName) return; // Ruby has no extractFromTypeNode (no AST type annotations), pass a no-op. const noopExtractFromTypeNode = (): string | undefined => undefined; const elementType = resolveIterableElementType( iterableName, node, scopeEnv, declarationTypeNodes, scope, noopExtractFromTypeNode, findRubyParamElementType, undefined, ); if (!elementType) return; scopeEnv.set(loopVarName, elementType); }; /** * Ruby: alias_user = user → assignment with left/right identifier fields. * Only handles plain identifier RHS (not calls, not literals). * Skips if LHS already has a resolved type in scopeEnv. */ const extractPendingAssignment: PendingAssignmentExtractor = (node, scopeEnv) => { if (node.type !== 'assignment') return undefined; const lhsNode = node.childForFieldName('left'); if (!lhsNode || lhsNode.type !== 'identifier') return undefined; const varName = lhsNode.text; if (scopeEnv.has(varName)) return undefined; const rhsNode = node.childForFieldName('right'); if (!rhsNode) return undefined; if (rhsNode.type === 'identifier') return { kind: 'copy', lhs: varName, rhs: rhsNode.text }; // call/method_call RHS — Ruby uses method calls for both field access and method calls if (rhsNode.type === 'call' || rhsNode.type === 'method_call') { const methodNode = rhsNode.childForFieldName('method'); const receiverNode = rhsNode.childForFieldName('receiver'); if (!receiverNode && methodNode?.type === 'identifier') { // No receiver → callResult (bare function call) return { kind: 'callResult', lhs: varName, callee: methodNode.text }; } if (receiverNode?.type === 'identifier' && methodNode?.type === 'identifier') { // With receiver → methodCallResult (a.method) return { kind: 'methodCallResult', lhs: varName, receiver: receiverNode.text, method: methodNode.text }; } } return undefined; }; export const typeConfig: LanguageTypeConfig = { declarationNodeTypes: DECLARATION_NODE_TYPES, forLoopNodeTypes: FOR_LOOP_NODE_TYPES, extractDeclaration, extractParameter, extractInitializer, scanConstructorBinding, extractReturnType, extractForLoopBinding, extractPendingAssignment, }; ================================================ FILE: gitnexus/src/core/ingestion/type-extractors/rust.ts ================================================ import type { SyntaxNode } from '../utils.js'; import type { LanguageTypeConfig, ParameterExtractor, TypeBindingExtractor, InitializerExtractor, ClassNameLookup, ConstructorBindingScanner, PendingAssignmentExtractor, PendingAssignment, PatternBindingExtractor, ForLoopExtractor } from './types.js'; import { extractSimpleTypeName, extractVarName, hasTypeAnnotation, unwrapAwait, extractGenericTypeArgs, resolveIterableElementType, methodToTypeArgPosition, extractElementTypeFromString, type TypeArgPosition } from './shared.js'; const DECLARATION_NODE_TYPES: ReadonlySet<string> = new Set([ 'let_declaration', 'let_condition', ]); /** Walk up the AST to find the enclosing impl block and extract the implementing type name. */ const findEnclosingImplType = (node: SyntaxNode): string | undefined => { let current = node.parent; while (current) { if (current.type === 'impl_item') { // The 'type' field holds the implementing type (e.g., `impl User { ... }`) const typeNode = current.childForFieldName('type'); if (typeNode) return extractSimpleTypeName(typeNode); } current = current.parent; } return undefined; }; /** * Extract the type name from a struct_pattern's 'type' field. * Handles both simple `User { .. }` and scoped `Message::Data { .. }`. */ const extractStructPatternType = (structPattern: SyntaxNode): string | undefined => { const typeNode = structPattern.childForFieldName('type'); if (!typeNode) return undefined; return extractSimpleTypeName(typeNode); }; /** * Recursively scan a pattern tree for captured_pattern nodes (x @ StructType { .. }) * and extract variable → type bindings from them. */ const extractCapturedPatternBindings = (pattern: SyntaxNode, env: Map<string, string>, depth = 0): void => { if (depth > 50) return; if (pattern.type === 'captured_pattern') { // captured_pattern: identifier @ inner_pattern // The first named child is the identifier, followed by the inner pattern. const nameNode = pattern.firstNamedChild; if (!nameNode || nameNode.type !== 'identifier') return; // Find the struct_pattern child — that gives us the type for (let i = 0; i < pattern.namedChildCount; i++) { const child = pattern.namedChild(i); if (child?.type === 'struct_pattern') { const typeName = extractStructPatternType(child); if (typeName) env.set(nameNode.text, typeName); return; } } return; } // Recurse into tuple_struct_pattern children to find nested captured_patterns // e.g., Some(user @ User { .. }) if (pattern.type === 'tuple_struct_pattern') { for (let i = 0; i < pattern.namedChildCount; i++) { const child = pattern.namedChild(i); if (child) extractCapturedPatternBindings(child, env, depth + 1); } } }; /** Rust: let x: Foo = ... | if let / while let pattern bindings */ const extractDeclaration: TypeBindingExtractor = (node: SyntaxNode, env: Map<string, string>): void => { if (node.type === 'let_condition') { // if let / while let: extract type bindings from pattern matching. // // Supported patterns: // - captured_pattern: `if let user @ User { .. } = expr` → user: User // - tuple_struct_pattern with nested captured_pattern: // `if let Some(user @ User { .. }) = expr` → user: User // // NOT supported (requires generic unwrapping — Phase 3): // - `if let Some(x) = opt` where opt: Option<T> → x: T // // struct_pattern without capture (`if let User { name } = expr`) // destructures fields — individual field types are unknown without // field-type resolution, so no bindings are extracted. const pattern = node.childForFieldName('pattern'); if (!pattern) return; extractCapturedPatternBindings(pattern, env); return; } // Standard let_declaration: let x: Foo = ... const pattern = node.childForFieldName('pattern'); const typeNode = node.childForFieldName('type'); if (!pattern || !typeNode) return; const varName = extractVarName(pattern); const typeName = extractSimpleTypeName(typeNode); if (varName && typeName) env.set(varName, typeName); }; /** Rust: let x = User::new(), let x = User::default(), or let x = User { ... } */ const extractInitializer: InitializerExtractor = (node: SyntaxNode, env: Map<string, string>, classNames: ClassNameLookup): void => { // Skip if there's an explicit type annotation — Tier 0 already handled it if (node.childForFieldName('type') !== null) return; const pattern = node.childForFieldName('pattern'); const value = node.childForFieldName('value'); if (!pattern || !value) return; // Rust struct literal: let user = User { name: "alice", age: 30 } // tree-sitter-rust: struct_expression with 'name' field holding the type if (value.type === 'struct_expression') { const typeNode = value.childForFieldName('name'); if (!typeNode) return; const rawType = extractSimpleTypeName(typeNode); if (!rawType) return; // Resolve Self to the actual struct/enum name from the enclosing impl block const typeName = rawType === 'Self' ? findEnclosingImplType(node) : rawType; const varName = extractVarName(pattern); if (varName && typeName) env.set(varName, typeName); return; } // Unit struct instantiation: let svc = UserService; (bare identifier, no braces or call) if (value.type === 'identifier' && classNames.has(value.text)) { const varName = extractVarName(pattern); if (varName) env.set(varName, value.text); return; } if (value.type !== 'call_expression') return; const func = value.childForFieldName('function'); if (!func || func.type !== 'scoped_identifier') return; const nameField = func.childForFieldName('name'); // Only match ::new() and ::default() — the two idiomatic Rust constructors. // Deliberately excludes ::from(), ::with_capacity(), etc. to avoid false positives // (e.g. String::from("x") is not necessarily the "String" type we want for method resolution). if (!nameField || (nameField.text !== 'new' && nameField.text !== 'default')) return; const pathField = func.childForFieldName('path'); if (!pathField) return; const rawType = extractSimpleTypeName(pathField); if (!rawType) return; // Resolve Self to the actual struct/enum name from the enclosing impl block const typeName = rawType === 'Self' ? findEnclosingImplType(node) : rawType; const varName = extractVarName(pattern); if (varName && typeName) env.set(varName, typeName); }; /** Rust: parameter → pattern: type */ const extractParameter: ParameterExtractor = (node: SyntaxNode, env: Map<string, string>): void => { let nameNode: SyntaxNode | null = null; let typeNode: SyntaxNode | null = null; if (node.type === 'parameter') { nameNode = node.childForFieldName('pattern'); typeNode = node.childForFieldName('type'); } else { nameNode = node.childForFieldName('name') ?? node.childForFieldName('pattern'); typeNode = node.childForFieldName('type'); } if (!nameNode || !typeNode) return; const varName = extractVarName(nameNode); const typeName = extractSimpleTypeName(typeNode); if (varName && typeName) env.set(varName, typeName); }; /** Rust: let user = get_user("alice") — let_declaration with call_expression value, no type annotation. * Skips `let user: User = ...` (explicit type annotation — handled by extractDeclaration). * Skips `let user = User::new()` (scoped_identifier callee named "new" — handled by extractInitializer). * Unwraps `let mut user = get_user()` by looking inside mut_pattern for the inner identifier. */ const scanConstructorBinding: ConstructorBindingScanner = (node) => { if (node.type !== 'let_declaration') return undefined; if (hasTypeAnnotation(node)) return undefined; let patternNode = node.childForFieldName('pattern'); if (!patternNode) return undefined; if (patternNode.type === 'mut_pattern') { patternNode = patternNode.firstNamedChild; if (!patternNode) return undefined; } if (patternNode.type !== 'identifier') return undefined; // Unwrap `.await`: `let user = get_user().await` → await_expression wraps call_expression const value = unwrapAwait(node.childForFieldName('value')); if (!value || value.type !== 'call_expression') return undefined; const func = value.childForFieldName('function'); if (!func) return undefined; if (func.type === 'scoped_identifier') { const methodName = func.lastNamedChild; if (methodName?.text === 'new' || methodName?.text === 'default') return undefined; } const calleeName = extractSimpleTypeName(func); if (!calleeName) return undefined; return { varName: patternNode.text, calleeName }; }; /** Rust: let alias = u; → let_declaration with pattern + value fields. * Also handles struct destructuring: `let Point { x, y } = p` → N fieldAccess items. */ const extractPendingAssignment: PendingAssignmentExtractor = (node, scopeEnv) => { if (node.type !== 'let_declaration') return undefined; const pattern = node.childForFieldName('pattern'); const value = node.childForFieldName('value'); if (!pattern || !value) return undefined; // Struct pattern destructuring: `let Point { x, y } = receiver` // struct_pattern has a type child (struct name) and field_pattern children if (pattern.type === 'struct_pattern' && value.type === 'identifier') { const receiver = value.text; const items: PendingAssignment[] = []; for (let j = 0; j < pattern.namedChildCount; j++) { const field = pattern.namedChild(j); if (!field) continue; if (field.type === 'field_pattern') { // `Point { x: local_x }` → field_pattern with name + pattern children const nameNode = field.childForFieldName('name'); const patNode = field.childForFieldName('pattern'); if (nameNode && patNode) { const fieldName = nameNode.text; const varName = extractVarName(patNode); if (varName && !scopeEnv.has(varName)) { items.push({ kind: 'fieldAccess', lhs: varName, receiver, field: fieldName }); } } else if (nameNode) { // Shorthand: `Point { x }` → field_pattern with only name (varName = fieldName) const varName = nameNode.text; if (!scopeEnv.has(varName)) { items.push({ kind: 'fieldAccess', lhs: varName, receiver, field: varName }); } } } } if (items.length > 0) return items; return undefined; } const lhs = extractVarName(pattern); if (!lhs || scopeEnv.has(lhs)) return undefined; // Unwrap Rust .await: `let user = get_user().await` → call_expression const unwrapped = unwrapAwait(value) ?? value; if (unwrapped.type === 'identifier') return { kind: 'copy', lhs, rhs: unwrapped.text }; // field_expression RHS → fieldAccess (a.field) if (unwrapped.type === 'field_expression') { const obj = unwrapped.firstNamedChild; const field = unwrapped.lastNamedChild; if (obj?.type === 'identifier' && field?.type === 'field_identifier') { return { kind: 'fieldAccess', lhs, receiver: obj.text, field: field.text }; } } // call_expression RHS → callResult (simple calls only) if (unwrapped.type === 'call_expression') { const funcNode = unwrapped.childForFieldName('function'); if (funcNode?.type === 'identifier') { return { kind: 'callResult', lhs, callee: funcNode.text }; } } // method_call_expression RHS → methodCallResult (receiver.method()) if (unwrapped.type === 'method_call_expression') { const obj = unwrapped.firstNamedChild; if (obj?.type === 'identifier') { const methodNode = unwrapped.childForFieldName('name') ?? unwrapped.namedChild(1); if (methodNode?.type === 'field_identifier') { return { kind: 'methodCallResult', lhs, receiver: obj.text, method: methodNode.text }; } } } return undefined; }; /** * Rust pattern binding extractor for `if let` / `while let` constructs that unwrap * enum variants and introduce new typed variables. * * Supported patterns: * - `if let Some(x) = opt` → x: T (opt: Option<T>, T already in scopeEnv via NULLABLE_WRAPPER_TYPES) * - `if let Ok(x) = res` → x: T (res: Result<T, E>, T extracted from declarationTypeNodes) * * These complement the captured_pattern support in extractDeclaration (which handles * `if let x @ Struct { .. } = expr` but NOT tuple struct unwrapping like Some(x) / Ok(x)). * * Conservative: returns undefined when: * - The source variable's type is unknown (not in scopeEnv) * - The wrapper is not a known single-unwrap variant (Some / Ok) * - The value side is not a simple identifier */ const extractPatternBinding: PatternBindingExtractor = ( node, scopeEnv, declarationTypeNodes, scope, ) => { let patternNode: SyntaxNode | null = null; let valueNode: SyntaxNode | null = null; if (node.type === 'let_condition') { patternNode = node.childForFieldName('pattern'); valueNode = node.childForFieldName('value'); } else if (node.type === 'match_arm') { // match_arm → pattern field is match_pattern wrapping the actual pattern const matchPatternNode = node.childForFieldName('pattern'); // Unwrap match_pattern to get the tuple_struct_pattern inside patternNode = matchPatternNode?.type === 'match_pattern' ? matchPatternNode.firstNamedChild : matchPatternNode; // source variable is in the parent match_expression's 'value' field const matchExpr = node.parent?.parent; // match_arm → match_block → match_expression if (matchExpr?.type === 'match_expression') { valueNode = matchExpr.childForFieldName('value'); } } if (!patternNode || !valueNode) return undefined; // Only handle tuple_struct_pattern: Some(x) or Ok(x) if (patternNode.type !== 'tuple_struct_pattern') return undefined; // Extract the wrapper type name: Some | Ok const wrapperTypeNode = patternNode.childForFieldName('type'); if (!wrapperTypeNode) return undefined; const wrapperName = extractSimpleTypeName(wrapperTypeNode); if (wrapperName !== 'Some' && wrapperName !== 'Ok' && wrapperName !== 'Err') return undefined; // Extract the inner variable name from the single child of the tuple_struct_pattern. // `Some(x)` → the first named child after the type field is the identifier. // tree-sitter-rust: tuple_struct_pattern has 'type' field + unnamed children for args. let innerVar: string | undefined; for (let i = 0; i < patternNode.namedChildCount; i++) { const child = patternNode.namedChild(i); if (!child) continue; // Skip the type node itself if (child === wrapperTypeNode) continue; if (child.type === 'identifier') { innerVar = child.text; break; } } if (!innerVar) return undefined; // The value must be a simple identifier so we can look it up in scopeEnv const sourceVarName = valueNode.type === 'identifier' ? valueNode.text : undefined; if (!sourceVarName) return undefined; // For `Some(x)`: Option<T> is already unwrapped to T in scopeEnv (via NULLABLE_WRAPPER_TYPES). // For `Ok(x)`: Result<T, E> stores "Result" in scopeEnv — must use declarationTypeNodes. if (wrapperName === 'Some') { const innerType = scopeEnv.get(sourceVarName); if (!innerType) return undefined; return { varName: innerVar, typeName: innerType }; } // wrapperName === 'Ok' or 'Err': look up the Result<T, E> type AST node. // Ok(x) → extract T (typeArgs[0]), Err(e) → extract E (typeArgs[1]). const typeNodeKey = `${scope}\0${sourceVarName}`; const typeAstNode = declarationTypeNodes.get(typeNodeKey); if (!typeAstNode) return undefined; const typeArgs = extractGenericTypeArgs(typeAstNode); const argIndex = wrapperName === 'Err' ? 1 : 0; if (typeArgs.length < argIndex + 1) return undefined; return { varName: innerVar, typeName: typeArgs[argIndex] }; }; // --- For-loop Tier 1c --- const FOR_LOOP_NODE_TYPES: ReadonlySet<string> = new Set(['for_expression']); /** Extract element type from a Rust type annotation AST node. * Handles: generic_type (Vec<User>), reference_type (&[User]), array_type ([User; N]), * slice_type ([User]). For call-graph purposes, strips references (&User → User). */ const extractRustElementTypeFromTypeNode = (typeNode: SyntaxNode, pos: TypeArgPosition = 'last', depth = 0): string | undefined => { if (depth > 50) return undefined; // generic_type: Vec<User>, HashMap<K, V> — extract type arg based on position if (typeNode.type === 'generic_type') { const args = extractGenericTypeArgs(typeNode); if (args.length >= 1) return pos === 'first' ? args[0] : args[args.length - 1]; } // reference_type: &[User] or &Vec<User> — unwrap the reference and recurse if (typeNode.type === 'reference_type') { const inner = typeNode.lastNamedChild; if (inner) return extractRustElementTypeFromTypeNode(inner, pos, depth + 1); } // array_type: [User; N] — element is the first child if (typeNode.type === 'array_type') { const elemNode = typeNode.firstNamedChild; if (elemNode) return extractSimpleTypeName(elemNode); } // slice_type: [User] — element is the first child if (typeNode.type === 'slice_type') { const elemNode = typeNode.firstNamedChild; if (elemNode) return extractSimpleTypeName(elemNode); } return undefined; }; /** Walk up from a for-loop to the enclosing function_item and search parameters * for one named `iterableName`. Returns the element type from its annotation. */ const findRustParamElementType = (iterableName: string, startNode: SyntaxNode, pos: TypeArgPosition = 'last'): string | undefined => { let current: SyntaxNode | null = startNode.parent; while (current) { if (current.type === 'function_item') { const paramsNode = current.childForFieldName('parameters'); if (paramsNode) { for (let i = 0; i < paramsNode.namedChildCount; i++) { const param = paramsNode.namedChild(i); if (!param || param.type !== 'parameter') continue; const nameNode = param.childForFieldName('pattern'); if (!nameNode) continue; // Unwrap reference patterns: &users, &mut users let identNode = nameNode; if (identNode.type === 'reference_pattern') { identNode = identNode.lastNamedChild ?? identNode; } if (identNode.type === 'mut_pattern') { identNode = identNode.firstNamedChild ?? identNode; } if (identNode.text !== iterableName) continue; const typeNode = param.childForFieldName('type'); if (typeNode) return extractRustElementTypeFromTypeNode(typeNode, pos); } } break; } current = current.parent; } return undefined; }; /** Rust: for user in &users where users has a known container type. * Unwraps reference_expression (&users, &mut users) to get the iterable name. */ const extractForLoopBinding: ForLoopExtractor = (node, { scopeEnv, declarationTypeNodes, scope, returnTypeLookup }): void => { if (node.type !== 'for_expression') return; const patternNode = node.childForFieldName('pattern'); const valueNode = node.childForFieldName('value'); if (!patternNode || !valueNode) return; // Extract iterable name + method — may be &users, users, or users.iter()/keys()/values() let iterableName: string | undefined; let methodName: string | undefined; let callExprElementType: string | undefined; if (valueNode.type === 'reference_expression') { const inner = valueNode.lastNamedChild; if (inner?.type === 'identifier') iterableName = inner.text; } else if (valueNode.type === 'identifier') { iterableName = valueNode.text; } else if (valueNode.type === 'field_expression') { const prop = valueNode.lastNamedChild; if (prop) iterableName = prop.text; } else if (valueNode.type === 'call_expression') { const funcExpr = valueNode.childForFieldName('function'); if (funcExpr?.type === 'field_expression') { // users.iter() → field_expression > identifier + field_identifier const obj = funcExpr.firstNamedChild; if (obj?.type === 'identifier') iterableName = obj.text; // Extract method name: iter, keys, values, into_iter, etc. const field = funcExpr.lastNamedChild; if (field?.type === 'field_identifier') methodName = field.text; } else if (funcExpr?.type === 'identifier') { // Direct function call: for user in get_users() const rawReturn = returnTypeLookup.lookupRawReturnType(funcExpr.text); if (rawReturn) callExprElementType = extractElementTypeFromString(rawReturn); } } if (!iterableName && !callExprElementType) return; let elementType: string | undefined; if (callExprElementType) { elementType = callExprElementType; } else { const containerTypeName = scopeEnv.get(iterableName!); const typeArgPos = methodToTypeArgPosition(methodName, containerTypeName); elementType = resolveIterableElementType( iterableName!, node, scopeEnv, declarationTypeNodes, scope, extractRustElementTypeFromTypeNode, findRustParamElementType, typeArgPos, ); } if (!elementType) return; const loopVarName = extractVarName(patternNode); if (loopVarName) scopeEnv.set(loopVarName, elementType); }; export const typeConfig: LanguageTypeConfig = { declarationNodeTypes: DECLARATION_NODE_TYPES, forLoopNodeTypes: FOR_LOOP_NODE_TYPES, patternBindingNodeTypes: new Set(['let_condition', 'match_arm']), extractDeclaration, extractInitializer, extractParameter, scanConstructorBinding, extractForLoopBinding, extractPendingAssignment, extractPatternBinding, }; ================================================ FILE: gitnexus/src/core/ingestion/type-extractors/shared.ts ================================================ import type { SyntaxNode } from '../utils.js'; /** Which type argument to extract from a multi-arg generic container. * - 'first': key type (e.g., K from Map<K,V>) — used for .keys(), .keySet() * - 'last': value type (e.g., V from Map<K,V>) — used for .values(), .items(), .iter() */ export type TypeArgPosition = 'first' | 'last'; // --------------------------------------------------------------------------- // Container type descriptors — maps container base names to type parameter // semantics per access method. Replaces the simple KEY_METHODS heuristic. // // For user-defined generics (MyCache<K,V> extends Map<K,V>), heritage-aware // fallback can walk the EXTENDS chain to find a matching descriptor. // --------------------------------------------------------------------------- /** Describes which type parameter position each access method yields. */ interface ContainerDescriptor { /** Number of type parameters (1 = single-element, 2 = key-value) */ arity: number; /** Methods that yield the first type parameter (key type for maps) */ keyMethods: ReadonlySet<string>; /** Methods that yield the last type parameter (value type) */ valueMethods: ReadonlySet<string>; } /** Empty set for containers that have no key-yielding methods */ const NO_KEYS: ReadonlySet<string> = new Set(); /** Standard key-yielding methods across languages */ const STD_KEY_METHODS: ReadonlySet<string> = new Set(['keys']); const JAVA_KEY_METHODS: ReadonlySet<string> = new Set(['keySet']); const CSHARP_KEY_METHODS: ReadonlySet<string> = new Set(['Keys']); /** Standard value-yielding methods across languages */ const STD_VALUE_METHODS: ReadonlySet<string> = new Set(['values', 'get', 'pop', 'remove']); const CSHARP_VALUE_METHODS: ReadonlySet<string> = new Set(['Values', 'TryGetValue']); const SINGLE_ELEMENT_METHODS: ReadonlySet<string> = new Set([ 'iter', 'into_iter', 'iterator', 'get', 'first', 'last', 'pop', 'peek', 'poll', 'find', 'filter', 'map', ]); const CONTAINER_DESCRIPTORS: ReadonlyMap<string, ContainerDescriptor> = new Map([ // --- Map / Dict types (arity 2: key + value) --- ['Map', { arity: 2, keyMethods: STD_KEY_METHODS, valueMethods: STD_VALUE_METHODS }], ['WeakMap', { arity: 2, keyMethods: STD_KEY_METHODS, valueMethods: STD_VALUE_METHODS }], ['HashMap', { arity: 2, keyMethods: STD_KEY_METHODS, valueMethods: STD_VALUE_METHODS }], ['BTreeMap', { arity: 2, keyMethods: STD_KEY_METHODS, valueMethods: STD_VALUE_METHODS }], ['LinkedHashMap', { arity: 2, keyMethods: JAVA_KEY_METHODS, valueMethods: STD_VALUE_METHODS }], ['TreeMap', { arity: 2, keyMethods: JAVA_KEY_METHODS, valueMethods: STD_VALUE_METHODS }], ['dict', { arity: 2, keyMethods: STD_KEY_METHODS, valueMethods: STD_VALUE_METHODS }], ['Dict', { arity: 2, keyMethods: STD_KEY_METHODS, valueMethods: STD_VALUE_METHODS }], ['Dictionary', { arity: 2, keyMethods: CSHARP_KEY_METHODS, valueMethods: CSHARP_VALUE_METHODS }], ['SortedDictionary', { arity: 2, keyMethods: CSHARP_KEY_METHODS, valueMethods: CSHARP_VALUE_METHODS }], ['Record', { arity: 2, keyMethods: STD_KEY_METHODS, valueMethods: STD_VALUE_METHODS }], ['OrderedDict', { arity: 2, keyMethods: STD_KEY_METHODS, valueMethods: STD_VALUE_METHODS }], ['ConcurrentHashMap', { arity: 2, keyMethods: JAVA_KEY_METHODS, valueMethods: STD_VALUE_METHODS }], ['ConcurrentDictionary', { arity: 2, keyMethods: CSHARP_KEY_METHODS, valueMethods: CSHARP_VALUE_METHODS }], // --- Single-element containers (arity 1) --- ['Array', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['List', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['ArrayList', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['LinkedList',{ arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['Vec', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['VecDeque', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['Set', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['HashSet', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['BTreeSet', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['TreeSet', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['Queue', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['Deque', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['Stack', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['Sequence', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['Iterable', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['Iterator', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['IEnumerable', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['IList', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['ICollection', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['Collection', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['ObservableCollection', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['IEnumerator', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['SortedSet', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['Stream', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['MutableList', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['MutableSet', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['LinkedHashSet', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['ArrayDeque', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['PriorityQueue', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['MutableMap', { arity: 2, keyMethods: STD_KEY_METHODS, valueMethods: STD_VALUE_METHODS }], ['list', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['set', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['tuple', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ['frozenset', { arity: 1, keyMethods: NO_KEYS, valueMethods: SINGLE_ELEMENT_METHODS }], ]); /** Determine which type arg to extract based on container type name and access method. * * Resolution order: * 1. If container is known and method is in keyMethods → 'first' * 2. If container is known with arity 1 → 'last' (same as 'first' for single-arg) * 3. If container is unknown → fall back to method name heuristic * 4. Default: 'last' (value type) */ export function methodToTypeArgPosition(methodName: string | undefined, containerTypeName?: string): TypeArgPosition { if (containerTypeName) { const desc = CONTAINER_DESCRIPTORS.get(containerTypeName); if (desc) { // Single-element container: always 'last' (= only arg) if (desc.arity === 1) return 'last'; // Multi-element: check if method yields key type if (methodName && desc.keyMethods.has(methodName)) return 'first'; // Default for multi-element: value type return 'last'; } } // Fallback for unknown containers: simple method name heuristic if (methodName && (methodName === 'keys' || methodName === 'keySet' || methodName === 'Keys')) { return 'first'; } return 'last'; } /** Look up the container descriptor for a type name. Exported for heritage-chain lookups. */ export function getContainerDescriptor(typeName: string): ContainerDescriptor | undefined { return CONTAINER_DESCRIPTORS.get(typeName); } /** * Shared 3-strategy fallback for resolving the element type of a container variable. * Used by all for-loop extractors to resolve the loop variable's type from the iterable. * * Strategy 1: declarationTypeNodes — raw AST type annotation node (handles container types * where extractSimpleTypeName returned undefined, e.g., User[], List[User]) * Strategy 2: scopeEnv string — extractElementTypeFromString on the stored type string * Strategy 3: AST walk — language-specific upward walk to enclosing function parameters * * @param extractFromTypeNode Language-specific function to extract element type from AST node * @param findParamElementType Optional language-specific AST walk to find parameter type * @param typeArgPos Which generic type arg to extract: 'first' for keys, 'last' for values (default) */ export function resolveIterableElementType( iterableName: string, node: SyntaxNode, scopeEnv: ReadonlyMap<string, string>, declarationTypeNodes: ReadonlyMap<string, SyntaxNode>, scope: string, extractFromTypeNode: (typeNode: SyntaxNode, pos?: TypeArgPosition) => string | undefined, findParamElementType?: (name: string, startNode: SyntaxNode, pos?: TypeArgPosition) => string | undefined, typeArgPos: TypeArgPosition = 'last', ): string | undefined { // Strategy 1: declarationTypeNodes AST node (check current scope, then file scope) const typeNode = declarationTypeNodes.get(`${scope}\0${iterableName}`) ?? (scope !== '' ? declarationTypeNodes.get(`\0${iterableName}`) : undefined); if (typeNode) { const t = extractFromTypeNode(typeNode, typeArgPos); if (t) return t; } // Strategy 2: scopeEnv string → extractElementTypeFromString const iterableType = scopeEnv.get(iterableName); if (iterableType) { const el = extractElementTypeFromString(iterableType, typeArgPos); if (el) return el; } // Strategy 3: AST walk to function parameters if (findParamElementType) return findParamElementType(iterableName, node, typeArgPos); return undefined; } /** Known single-arg nullable wrapper types that unwrap to their inner type * for receiver resolution. Optional<User> → "User", Option<User> → "User". * Only nullable wrappers — NOT containers (List, Vec) or async wrappers (Promise, Future). * See WRAPPER_GENERICS below for the full set used in return-type inference. */ const NULLABLE_WRAPPER_TYPES = new Set([ 'Optional', // Java 'Option', // Rust, Scala 'Maybe', // Haskell-style, Kotlin Arrow ]); /** * Extract the simple type name from a type AST node. * Handles generic types (e.g., List<User> → List), qualified names * (e.g., models.User → User), and nullable types (e.g., User? → User). * Returns undefined for complex types (unions, intersections, function types). */ export const extractSimpleTypeName = (typeNode: SyntaxNode, depth = 0): string | undefined => { if (depth > 50 || typeNode.text.length > 2048) return undefined; // Direct type identifier (includes Ruby 'constant' for class names) if (typeNode.type === 'type_identifier' || typeNode.type === 'identifier' || typeNode.type === 'simple_identifier' || typeNode.type === 'constant') { return typeNode.text; } // Qualified/scoped names: take the last segment (e.g., models.User → User, Models::User → User) if (typeNode.type === 'scoped_identifier' || typeNode.type === 'qualified_identifier' || typeNode.type === 'scoped_type_identifier' || typeNode.type === 'qualified_name' || typeNode.type === 'qualified_type' || typeNode.type === 'member_expression' || typeNode.type === 'member_access_expression' || typeNode.type === 'attribute' || typeNode.type === 'scope_resolution' || typeNode.type === 'selector_expression') { const last = typeNode.lastNamedChild; if (last && (last.type === 'type_identifier' || last.type === 'identifier' || last.type === 'simple_identifier' || last.type === 'name' || last.type === 'constant' || last.type === 'property_identifier' || last.type === 'field_identifier')) { return last.text; } } // C++ template_type (e.g., vector<User>, map<string, User>): extract base name if (typeNode.type === 'template_type') { const base = typeNode.childForFieldName('name') ?? typeNode.firstNamedChild; if (base) return extractSimpleTypeName(base, depth + 1); } // Generic types: extract the base type (e.g., List<User> → List) // For nullable wrappers (Optional<User>, Option<User>), unwrap to inner type. if (typeNode.type === 'generic_type' || typeNode.type === 'parameterized_type' || typeNode.type === 'generic_name') { const base = typeNode.childForFieldName('name') ?? typeNode.childForFieldName('type') ?? typeNode.firstNamedChild; if (!base) return undefined; const baseName = extractSimpleTypeName(base, depth + 1); // Unwrap known nullable wrappers: Optional<User> → User, Option<User> → User if (baseName && NULLABLE_WRAPPER_TYPES.has(baseName)) { const args = extractGenericTypeArgs(typeNode); if (args.length >= 1) return args[0]; } return baseName; } // Nullable types (Kotlin User?, C# User?) if (typeNode.type === 'nullable_type') { const inner = typeNode.firstNamedChild; if (inner) return extractSimpleTypeName(inner, depth + 1); } // Nullable union types (TS/JS: User | null, User | undefined, User | null | undefined) // Extract the single non-null/undefined type from the union. if (typeNode.type === 'union_type') { const nonNullTypes: SyntaxNode[] = []; for (let i = 0; i < typeNode.namedChildCount; i++) { const child = typeNode.namedChild(i); if (!child) continue; // Skip null/undefined/void literal types const text = child.text; if (text === 'null' || text === 'undefined' || text === 'void') continue; nonNullTypes.push(child); } // Only unwrap if exactly one meaningful type remains if (nonNullTypes.length === 1) { return extractSimpleTypeName(nonNullTypes[0], depth + 1); } } // Type annotations that wrap the actual type (TS/Python: `: Foo`, Kotlin: user_type) if (typeNode.type === 'type_annotation' || typeNode.type === 'type' || typeNode.type === 'user_type') { const inner = typeNode.firstNamedChild; if (inner) return extractSimpleTypeName(inner, depth + 1); } // Pointer/reference types (C++, Rust): User*, &User, &mut User if (typeNode.type === 'pointer_type' || typeNode.type === 'reference_type') { // Skip mutable_specifier for Rust &mut references — firstNamedChild would be // `mutable_specifier` not the actual type. Walk named children to find the type. for (let i = 0; i < typeNode.namedChildCount; i++) { const child = typeNode.namedChild(i); if (child && child.type !== 'mutable_specifier') { return extractSimpleTypeName(child, depth + 1); } } } // Primitive/predefined types: string, int, float, bool, number, unknown, any // PHP: primitive_type; TS/JS: predefined_type // Java: integral_type (int/long/short/byte), floating_point_type (float/double), // boolean_type (boolean), void_type (void) if (typeNode.type === 'primitive_type' || typeNode.type === 'predefined_type' || typeNode.type === 'integral_type' || typeNode.type === 'floating_point_type' || typeNode.type === 'boolean_type' || typeNode.type === 'void_type') { return typeNode.text; } // PHP named_type / optional_type if (typeNode.type === 'named_type' || typeNode.type === 'optional_type') { const inner = typeNode.childForFieldName('name') ?? typeNode.firstNamedChild; if (inner) return extractSimpleTypeName(inner, depth + 1); } // Name node (PHP) if (typeNode.type === 'name') { return typeNode.text; } return undefined; }; /** * Extract variable name from a declarator or pattern node. * Returns the simple identifier text, or undefined for destructuring/complex patterns. */ export const extractVarName = (node: SyntaxNode): string | undefined => { if (node.type === 'identifier' || node.type === 'simple_identifier' || node.type === 'variable_name' || node.type === 'name' || node.type === 'constant' || node.type === 'property_identifier') { return node.text; } // variable_declarator (Java/C#): has a 'name' field if (node.type === 'variable_declarator') { const nameChild = node.childForFieldName('name'); if (nameChild) return extractVarName(nameChild); } // Rust: let mut x = ... — mut_pattern wraps an identifier if (node.type === 'mut_pattern') { const inner = node.firstNamedChild; if (inner) return extractVarName(inner); } return undefined; }; /** Node types for function/method parameters with type annotations */ export const TYPED_PARAMETER_TYPES = new Set([ 'required_parameter', // TS: (x: Foo) 'optional_parameter', // TS: (x?: Foo) 'formal_parameter', // Java/Kotlin 'parameter', // C#/Rust/Go/Python/Swift 'typed_parameter', // Python: def f(x: Foo) — distinct from 'parameter' in tree-sitter-python 'parameter_declaration', // C/C++ void f(Type name) 'simple_parameter', // PHP function(Foo $x) 'property_promotion_parameter', // PHP 8.0+ constructor promotion: __construct(private Foo $x) 'closure_parameter', // Rust: |user: User| — typed closure parameters ]); /** * Extract type arguments from a generic type node. * e.g., List<User, String> → ['User', 'String'], Vec<User> → ['User'] * * Used by extractSimpleTypeName to unwrap nullable wrappers (Optional<User> → User). * * Handles language-specific AST structures: * - TS/Java/Rust/Go: generic_type > type_arguments > type nodes * - C#: generic_type > type_argument_list > type nodes * - Kotlin: generic_type > type_arguments > type_projection > type nodes * * Note: Go slices/maps use slice_type/map_type, not generic_type — those are * NOT handled here. Use language-specific extractors for Go container types. * * @param typeNode A generic_type or parameterized_type AST node (or any node — * returns [] for non-generic types). * @returns Array of resolved type argument names. Unresolvable arguments are omitted. */ export const extractGenericTypeArgs = (typeNode: SyntaxNode, depth = 0): string[] => { if (depth > 50) return []; // Unwrap wrapper nodes that may sit above the generic_type if (typeNode.type === 'type_annotation' || typeNode.type === 'type' || typeNode.type === 'user_type' || typeNode.type === 'nullable_type' || typeNode.type === 'optional_type') { const inner = typeNode.firstNamedChild; if (inner) return extractGenericTypeArgs(inner, depth + 1); return []; } // Only process generic/parameterized type nodes (includes C#'s generic_name) if (typeNode.type !== 'generic_type' && typeNode.type !== 'parameterized_type' && typeNode.type !== 'generic_name') { return []; } // Find the type_arguments / type_argument_list child let argsNode: SyntaxNode | null = null; for (let i = 0; i < typeNode.namedChildCount; i++) { const child = typeNode.namedChild(i); if (child && (child.type === 'type_arguments' || child.type === 'type_argument_list')) { argsNode = child; break; } } if (!argsNode) return []; const result: string[] = []; for (let i = 0; i < argsNode.namedChildCount; i++) { let argNode = argsNode.namedChild(i); if (!argNode) continue; // Kotlin: type_arguments > type_projection > user_type > type_identifier if (argNode.type === 'type_projection') { argNode = argNode.firstNamedChild; if (!argNode) continue; } const name = extractSimpleTypeName(argNode); if (name) result.push(name); } return result; }; /** * Match Ruby constructor assignment: `user = User.new` or `service = Models::User.new`. * Returns { varName, calleeName } or undefined if the node is not a Ruby constructor assignment. * Handles both simple constants and scope_resolution (namespaced) receivers. */ export const extractRubyConstructorAssignment = ( node: SyntaxNode, ): { varName: string; calleeName: string } | undefined => { if (node.type !== 'assignment') return undefined; const left = node.childForFieldName('left'); const right = node.childForFieldName('right'); if (!left || !right) return undefined; if (left.type !== 'identifier' && left.type !== 'constant') return undefined; if (right.type !== 'call') return undefined; const method = right.childForFieldName('method'); if (!method || method.text !== 'new') return undefined; const receiver = right.childForFieldName('receiver'); if (!receiver) return undefined; let calleeName: string; if (receiver.type === 'constant') { calleeName = receiver.text; } else if (receiver.type === 'scope_resolution') { // Models::User → extract last segment "User" const last = receiver.lastNamedChild; if (!last || last.type !== 'constant') return undefined; calleeName = last.text; } else { return undefined; } return { varName: left.text, calleeName }; }; /** * Check if an AST node has an explicit type annotation. * Checks both named fields ('type') and child nodes ('type_annotation'). * Used by constructor binding scanners to skip annotated declarations. */ export const hasTypeAnnotation = (node: SyntaxNode): boolean => { if (node.childForFieldName('type')) return true; for (let i = 0; i < node.childCount; i++) { if (node.child(i)?.type === 'type_annotation') return true; } return false; }; /** Bare nullable keywords that should not produce a receiver binding. */ const NULLABLE_KEYWORDS = new Set(['null', 'undefined', 'void', 'None', 'nil']); /** * Strip nullable wrappers from a type name string. * Used by both lookupInEnv (TypeEnv annotations) and extractReturnTypeName * (return-type text) to normalize types before receiver lookup. * * "User | null" → "User" * "User | undefined" → "User" * "User | null | undefined" → "User" * "User?" → "User" * "User | Repo" → undefined (genuine union — refuse) * "null" → undefined */ export const stripNullable = (typeName: string): string | undefined => { let text = typeName.trim(); if (!text) return undefined; if (NULLABLE_KEYWORDS.has(text)) return undefined; // Strip nullable suffix: User? → User if (text.endsWith('?')) text = text.slice(0, -1).trim(); // Strip union with null/undefined/None/nil/void if (text.includes('|')) { const parts = text.split('|').map(p => p.trim()).filter(p => p !== '' && !NULLABLE_KEYWORDS.has(p) ); if (parts.length === 1) return parts[0]; return undefined; // genuine union or all-nullable — refuse } return text || undefined; }; /** * Unwrap an await_expression to get the inner value. * Returns the node itself if not an await_expression, or null if input is null. */ export const unwrapAwait = (node: SyntaxNode | null): SyntaxNode | null => { if (!node) return null; return node.type === 'await_expression' ? node.firstNamedChild : node; }; /** * Extract the callee name from a call_expression node. * Navigates to the 'function' field (or first named child) and extracts a simple type name. */ export const extractCalleeName = (callNode: SyntaxNode): string | undefined => { const func = callNode.childForFieldName('function') ?? callNode.firstNamedChild; if (!func) return undefined; return extractSimpleTypeName(func); }; /** Find the first named child with the given node type */ export const findChildByType = (node: SyntaxNode, type: string): SyntaxNode | null => { for (let i = 0; i < node.namedChildCount; i++) { const child = node.namedChild(i); if (child?.type === type) return child; } return null; }; // Internal helper: extract the first comma-separated argument from a string, // respecting nested angle-bracket and square-bracket depth. function extractFirstArg(args: string): string { let depth = 0; for (let i = 0; i < args.length; i++) { const ch = args[i]; if (ch === '<' || ch === '[') depth++; else if (ch === '>' || ch === ']') depth--; else if (ch === ',' && depth === 0) return args.slice(0, i).trim(); } return args.trim(); } /** * Extract element type from a container type string. * Uses bracket-balanced parsing (no regex) for generic argument extraction. * Returns undefined for ambiguous or unparseable strings. * * Handles: * - Array<User> → User (generic angle brackets) * - User[] → User (array suffix) * - []User → User (Go slice prefix) * - List[User] → User (Python subscript) * - [User] → User (Swift array sugar) * - vector<User> → User (C++ container) * - Vec<User> → User (Rust container) * * For multi-argument generics (Map<K, V>), returns the first or last type arg * based on `pos` ('first' for keys, 'last' for values — default 'last'). * Returns undefined when the extracted type is not a simple word. */ export function extractElementTypeFromString(typeStr: string, pos: TypeArgPosition = 'last'): string | undefined { if (!typeStr || typeStr.length === 0 || typeStr.length > 2048) return undefined; // 1. Array suffix: User[] → User if (typeStr.endsWith('[]')) { const base = typeStr.slice(0, -2).trim(); return base && /^\w+$/.test(base) ? base : undefined; } // 2. Go slice prefix: []User → User if (typeStr.startsWith('[]')) { const element = typeStr.slice(2).trim(); return element && /^\w+$/.test(element) ? element : undefined; } // 3. Swift array sugar: [User] → User // Must start with '[', end with ']', and contain no angle brackets // (to avoid confusing with List[User] handled below). if (typeStr.startsWith('[') && typeStr.endsWith(']') && !typeStr.includes('<')) { const element = typeStr.slice(1, -1).trim(); return element && /^\w+$/.test(element) ? element : undefined; } // 4. Generic bracket-balanced extraction: Array<User> / List[User] / Vec<User> // Find the first opening bracket (< or [) and pick the one that appears first. const openAngle = typeStr.indexOf('<'); const openSquare = typeStr.indexOf('['); let openIdx = -1; let openChar = ''; let closeChar = ''; if (openAngle >= 0 && (openSquare < 0 || openAngle < openSquare)) { openIdx = openAngle; openChar = '<'; closeChar = '>'; } else if (openSquare >= 0) { openIdx = openSquare; openChar = '['; closeChar = ']'; } if (openIdx < 0) return undefined; // Walk bracket-balanced from the character after the opening bracket to find // the matching close bracket, tracking depth for nested brackets. // All bracket types (<, >, [, ]) contribute to depth uniformly, but only the // selected closeChar can match at depth 0 (prevents cross-bracket miscounting). let depth = 0; const start = openIdx + 1; let lastCommaIdx = -1; // Track last top-level comma for 'last' position for (let i = start; i < typeStr.length; i++) { const ch = typeStr[i]; if (ch === '<' || ch === '[') { depth++; } else if (ch === '>' || ch === ']') { if (depth === 0) { // At depth 0 — only match if it is our selected close bracket. if (ch !== closeChar) return undefined; // mismatched bracket = malformed if (pos === 'last' && lastCommaIdx >= 0) { // Return last arg (text after last comma) const lastArg = typeStr.slice(lastCommaIdx + 1, i).trim(); return lastArg && /^\w+$/.test(lastArg) ? lastArg : undefined; } const inner = typeStr.slice(start, i).trim(); const firstArg = extractFirstArg(inner); return firstArg && /^\w+$/.test(firstArg) ? firstArg : undefined; } depth--; } else if (ch === ',' && depth === 0) { if (pos === 'first') { // Return first arg (text before first comma) const arg = typeStr.slice(start, i).trim(); return arg && /^\w+$/.test(arg) ? arg : undefined; } lastCommaIdx = i; } } return undefined; } // ── Return type text helpers ───────────────────────────────────────────── // extractReturnTypeName works on raw return-type text already stored in // SymbolDefinition (e.g. "User", "Promise<User>", "User | null", "*User"). // Extracts the base user-defined type name. /** Primitive / built-in types that should NOT produce a receiver binding. */ const PRIMITIVE_TYPES = new Set([ 'string', 'number', 'boolean', 'void', 'int', 'float', 'double', 'long', 'short', 'byte', 'char', 'bool', 'str', 'i8', 'i16', 'i32', 'i64', 'u8', 'u16', 'u32', 'u64', 'f32', 'f64', 'usize', 'isize', 'undefined', 'null', 'None', 'nil', ]); /** * Extract a simple type name from raw return-type text. * Handles common patterns: * "User" → "User" * "Promise<User>" → "User" (unwrap wrapper generics) * "Option<User>" → "User" * "Result<User, Error>" → "User" (first type arg) * "User | null" → "User" (strip nullable union) * "User?" → "User" (strip nullable suffix) * "*User" → "User" (Go pointer) * "&User" → "User" (Rust reference) * Returns undefined for complex types or primitives. */ const WRAPPER_GENERICS = new Set([ 'Promise', 'Observable', 'Future', 'CompletableFuture', 'Task', 'ValueTask', // async wrappers 'Option', 'Some', 'Optional', 'Maybe', // nullable wrappers 'Result', 'Either', // result wrappers // Rust smart pointers (Deref to inner type) 'Rc', 'Arc', 'Weak', // pointer types 'MutexGuard', 'RwLockReadGuard', 'RwLockWriteGuard', // guard types 'Ref', 'RefMut', // RefCell guards 'Cow', // copy-on-write // Containers (List, Array, Vec, Set, etc.) are intentionally excluded — // methods are called on the container, not the element type. // Non-wrapper generics return the base type (e.g., List) via the else branch. ]); /** * Extracts the first type argument from a comma-separated generic argument string, * respecting nested angle brackets. For example: * "Result<User, Error>" → "Result<User, Error>" (no top-level comma) * "User, Error" → "User" * "Map<K, V>, string" → "Map<K, V>" */ function extractFirstGenericArg(args: string): string { let depth = 0; for (let i = 0; i < args.length; i++) { if (args[i] === '<') depth++; else if (args[i] === '>') depth--; else if (args[i] === ',' && depth === 0) return args.slice(0, i).trim(); } return args.trim(); } /** * Extract the first non-lifetime type argument from a generic argument string. * Skips Rust lifetime parameters (e.g., `'a`, `'_`) to find the actual type. * "'_, User" → "User" * "'a, User" → "User" * "User, Error" → "User" (no lifetime — delegates to extractFirstGenericArg) */ function extractFirstTypeArg(args: string): string { let remaining = args; while (remaining) { const first = extractFirstGenericArg(remaining); if (!first.startsWith("'")) return first; // Skip past this lifetime arg + the comma separator const commaIdx = remaining.indexOf(',', first.length); if (commaIdx < 0) return first; // only lifetimes — fall through remaining = remaining.slice(commaIdx + 1).trim(); } return args.trim(); } const MAX_RETURN_TYPE_INPUT_LENGTH = 2048; const MAX_RETURN_TYPE_LENGTH = 512; export const extractReturnTypeName = (raw: string, depth = 0): string | undefined => { if (depth > 10) return undefined; if (raw.length > MAX_RETURN_TYPE_INPUT_LENGTH) return undefined; let text = raw.trim(); if (!text) return undefined; // Strip pointer/reference prefixes: *User, &User, &mut User text = text.replace(/^[&*]+\s*(mut\s+)?/, ''); // Strip nullable suffix: User? text = text.replace(/\?$/, ''); // Handle union types: "User | null" → "User" if (text.includes('|')) { const parts = text.split('|').map(p => p.trim()).filter(p => p !== 'null' && p !== 'undefined' && p !== 'void' && p !== 'None' && p !== 'nil' ); if (parts.length === 1) text = parts[0]; else return undefined; // genuine union — too complex } // Handle generics: Promise<User> → unwrap if wrapper, else take base const genericMatch = text.match(/^(\w+)\s*<(.+)>$/); if (genericMatch) { const [, base, args] = genericMatch; if (WRAPPER_GENERICS.has(base)) { // Take the first non-lifetime type argument, using bracket-balanced splitting // so that nested generics like Result<User, Error> are not split at the inner // comma. Lifetime parameters (Rust 'a, '_) are skipped. const firstArg = extractFirstTypeArg(args); return extractReturnTypeName(firstArg, depth + 1); } // Non-wrapper generic: return the base type (e.g., Map<K,V> → Map) return PRIMITIVE_TYPES.has(base.toLowerCase()) ? undefined : base; } // Bare wrapper type without generic argument (e.g. Task, Promise, Option) // should not produce a binding — these are meaningless without a type parameter if (WRAPPER_GENERICS.has(text)) return undefined; // Handle qualified names: models.User → User, Models::User → User, \App\Models\User → User if (text.includes('::') || text.includes('.') || text.includes('\\')) { text = text.split(/::|[.\\]/).pop()!; } // Final check: skip primitives if (PRIMITIVE_TYPES.has(text) || PRIMITIVE_TYPES.has(text.toLowerCase())) return undefined; // Must start with uppercase (class/type convention) or be a valid identifier if (!/^[A-Z_]\w*$/.test(text)) return undefined; // If the final extracted type name is too long, reject it if (text.length > MAX_RETURN_TYPE_LENGTH) return undefined; return text; }; // ── Property declared-type extraction ──────────────────────────────────── // Shared between parse-worker (worker path) and parsing-processor (sequential path). /** * Extract the declared type of a property/field from its AST definition node. * Handles cross-language patterns: * - TypeScript: `name: Type` → type_annotation child * - Java: `Type name` → type child on field_declaration * - C#: `Type Name { get; set; }` → type child on property_declaration * - Go: `Name Type` → type child on field_declaration * - Kotlin: `var name: Type` → variable_declaration child with type field * * Returns the normalized type name, or undefined if no type can be extracted. */ export const extractPropertyDeclaredType = (definitionNode: SyntaxNode | null): string | undefined => { if (!definitionNode) return undefined; // Strategy 1: Look for a `type` or `type_annotation` named field const typeNode = definitionNode.childForFieldName?.('type'); if (typeNode) { const typeName = extractSimpleTypeName(typeNode); if (typeName) return typeName; // Fallback: use the raw text (for complex types like User[] or List<User>) const text = typeNode.text?.trim(); if (text && text.length < 100) return text; } // Strategy 2: Walk children looking for type_annotation (TypeScript pattern) for (let i = 0; i < definitionNode.childCount; i++) { const child = definitionNode.child(i); if (!child) continue; if (child.type === 'type_annotation') { // Type annotation has the actual type as a child for (let j = 0; j < child.childCount; j++) { const typeChild = child.child(j); if (typeChild && typeChild.type !== ':') { const typeName = extractSimpleTypeName(typeChild); if (typeName) return typeName; const text = typeChild.text?.trim(); if (text && text.length < 100) return text; } } } } // Strategy 3: For Java field_declaration, the type is a sibling of variable_declarator // AST: (field_declaration type: (type_identifier) declarator: (variable_declarator ...)) const parentDecl = definitionNode.parent; if (parentDecl) { const parentType = parentDecl.childForFieldName?.('type'); if (parentType) { const typeName = extractSimpleTypeName(parentType); if (typeName) return typeName; } } // Strategy 4: Kotlin property_declaration — type is nested inside variable_declaration child // AST: (property_declaration (variable_declaration (simple_identifier) ":" (user_type (type_identifier)))) // Kotlin's variable_declaration has NO named 'type' field — children are all positional. for (let i = 0; i < definitionNode.childCount; i++) { const child = definitionNode.child(i); if (child?.type === 'variable_declaration') { // Try named field first (works for other languages sharing this strategy) const varType = child.childForFieldName?.('type'); if (varType) { const typeName = extractSimpleTypeName(varType); if (typeName) return typeName; const text = varType.text?.trim(); if (text && text.length < 100) return text; } // Fallback: walk unnamed children for user_type / type_identifier (Kotlin) for (let j = 0; j < child.namedChildCount; j++) { const varChild = child.namedChild(j); if (varChild && (varChild.type === 'user_type' || varChild.type === 'type_identifier' || varChild.type === 'nullable_type' || varChild.type === 'generic_type')) { const typeName = extractSimpleTypeName(varChild); if (typeName) return typeName; } } } } // Strategy 5: PHP @var PHPDoc — look for preceding comment with @var Type // Handles pre-PHP-7.4 code: /** @var Address */ public $address; const prevSibling = definitionNode.previousNamedSibling ?? definitionNode.parent?.previousNamedSibling; if (prevSibling?.type === 'comment') { const commentText = prevSibling.text; const varMatch = commentText?.match(/@var\s+([A-Z][\w\\]*)/); if (varMatch) { // Strip namespace prefix: \App\Models\User → User const raw = varMatch[1]; const base = raw.includes('\\') ? raw.split('\\').pop()! : raw; if (base && /^[A-Z]\w*$/.test(base)) return base; } } return undefined; }; ================================================ FILE: gitnexus/src/core/ingestion/type-extractors/swift.ts ================================================ import type { SyntaxNode } from '../utils.js'; import type { LanguageTypeConfig, ParameterExtractor, TypeBindingExtractor, InitializerExtractor, ClassNameLookup, ConstructorBindingScanner } from './types.js'; import { extractSimpleTypeName, extractVarName, findChildByType, hasTypeAnnotation } from './shared.js'; const DECLARATION_NODE_TYPES: ReadonlySet<string> = new Set([ 'property_declaration', ]); /** Swift: let x: Foo = ... */ const extractDeclaration: TypeBindingExtractor = (node: SyntaxNode, env: Map<string, string>): void => { // Swift property_declaration has pattern and type_annotation const pattern = node.childForFieldName('pattern') ?? findChildByType(node, 'pattern'); const typeAnnotation = node.childForFieldName('type') ?? findChildByType(node, 'type_annotation'); if (!pattern || !typeAnnotation) return; const varName = extractVarName(pattern) ?? pattern.text; const typeName = extractSimpleTypeName(typeAnnotation); if (varName && typeName) env.set(varName, typeName); }; /** Swift: parameter → name: type */ const extractParameter: ParameterExtractor = (node: SyntaxNode, env: Map<string, string>): void => { let nameNode: SyntaxNode | null = null; let typeNode: SyntaxNode | null = null; if (node.type === 'parameter') { nameNode = node.childForFieldName('name') ?? node.childForFieldName('internal_name'); typeNode = node.childForFieldName('type'); } else { nameNode = node.childForFieldName('name') ?? node.childForFieldName('pattern'); typeNode = node.childForFieldName('type'); } if (!nameNode || !typeNode) return; const varName = extractVarName(nameNode); const typeName = extractSimpleTypeName(typeNode); if (varName && typeName) env.set(varName, typeName); }; /** Swift: let user = User(name: "alice") — infer type from call when callee is a known class. * Swift initializers are syntactically identical to function calls, so we verify * against classNames (which may include cross-file SymbolTable lookups). */ const extractInitializer: InitializerExtractor = (node: SyntaxNode, env: Map<string, string>, classNames: ClassNameLookup): void => { if (node.type !== 'property_declaration') return; // Skip if has type annotation — extractDeclaration handled it if (node.childForFieldName('type') || findChildByType(node, 'type_annotation')) return; // Find pattern (variable name) const pattern = node.childForFieldName('pattern') ?? findChildByType(node, 'pattern'); if (!pattern) return; const varName = extractVarName(pattern) ?? pattern.text; if (!varName || env.has(varName)) return; // Find call_expression in the value const callExpr = findChildByType(node, 'call_expression'); if (!callExpr) return; const callee = callExpr.firstNamedChild; if (!callee) return; // Direct call: User(name: "alice") if (callee.type === 'simple_identifier') { const calleeName = callee.text; if (calleeName && classNames.has(calleeName)) { env.set(varName, calleeName); } return; } // Explicit init: User.init(name: "alice") — navigation_expression with .init suffix if (callee.type === 'navigation_expression') { const receiver = callee.firstNamedChild; const suffix = callee.lastNamedChild; if (receiver?.type === 'simple_identifier' && suffix?.text === 'init') { const calleeName = receiver.text; if (calleeName && classNames.has(calleeName)) { env.set(varName, calleeName); } } } }; /** Swift: let user = User(name: "alice") — scan property_declaration for constructor binding */ const scanConstructorBinding: ConstructorBindingScanner = (node) => { if (node.type !== 'property_declaration') return undefined; if (hasTypeAnnotation(node)) return undefined; const pattern = node.childForFieldName('pattern'); if (!pattern) return undefined; const varName = pattern.text; if (!varName) return undefined; let callExpr: SyntaxNode | null = null; for (let i = 0; i < node.namedChildCount; i++) { const child = node.namedChild(i); if (child?.type === 'call_expression') { callExpr = child; break; } } if (!callExpr) return undefined; const callee = callExpr.firstNamedChild; if (!callee) return undefined; if (callee.type === 'simple_identifier') { return { varName, calleeName: callee.text }; } if (callee.type === 'navigation_expression') { const receiver = callee.firstNamedChild; const suffix = callee.lastNamedChild; if (receiver?.type === 'simple_identifier' && suffix?.text === 'init') { return { varName, calleeName: receiver.text }; } // General qualified call: service.getUser() → extract method name. // tree-sitter-swift may wrap the identifier in navigation_suffix, so // check both direct simple_identifier and navigation_suffix > simple_identifier. if (suffix?.type === 'simple_identifier') { return { varName, calleeName: suffix.text }; } if (suffix?.type === 'navigation_suffix') { const inner = suffix.lastNamedChild; if (inner?.type === 'simple_identifier') { return { varName, calleeName: inner.text }; } } } return undefined; }; export const typeConfig: LanguageTypeConfig = { declarationNodeTypes: DECLARATION_NODE_TYPES, extractDeclaration, extractParameter, extractInitializer, scanConstructorBinding, }; ================================================ FILE: gitnexus/src/core/ingestion/type-extractors/types.ts ================================================ import type { SyntaxNode } from '../utils.js'; /** Extracts type bindings from a declaration node into the env map */ export type TypeBindingExtractor = (node: SyntaxNode, env: Map<string, string>) => void; /** Extracts type bindings from a parameter node into the env map */ export type ParameterExtractor = (node: SyntaxNode, env: Map<string, string>) => void; /** Minimal interface for checking whether a name is a known class/struct. * Narrower than ReadonlySet — only `.has()` is used by extractors. */ export type ClassNameLookup = { has(name: string): boolean }; /** Extracts type bindings from a constructor-call initializer, with access to known class names */ export type InitializerExtractor = (node: SyntaxNode, env: Map<string, string>, classNames: ClassNameLookup) => void; /** Scans an AST node for untyped `var = callee()` patterns for return-type inference. * Returns { varName, calleeName } if the node matches, undefined otherwise. * `receiverClassName` — optional hint for method calls on known receivers * (e.g. $this->getUser() in PHP provides the enclosing class name). */ export type ConstructorBindingScanner = (node: SyntaxNode) => { varName: string; calleeName: string; receiverClassName?: string } | undefined; /** Extracts a return type string from a method/function definition node. * Used for languages where return types are expressed in comments (e.g. YARD @return [Type]) * rather than in AST fields. Returns undefined if no return type can be determined. */ export type ReturnTypeExtractor = (node: SyntaxNode) => string | undefined; /** Infer the type name of a literal AST node for overload disambiguation. * Returns the canonical type name (e.g. 'int', 'String', 'boolean') or undefined * for non-literal nodes. Only used when resolveCallTarget has multiple candidates * with parameterTypes — ~1-3% of call sites. */ export type LiteralTypeInferrer = (node: SyntaxNode) => string | undefined; /** Detect constructor-style call expressions that don't use `new` keyword. * Returns the constructor class name if the node's initializer is a constructor call, * or undefined otherwise. Used for virtual dispatch in languages like Kotlin * where constructors are syntactically identical to function calls, and C++ * where smart pointer factory functions (make_shared/make_unique) wrap constructors. */ export type ConstructorTypeDetector = (node: SyntaxNode, classNames: ClassNameLookup) => string | undefined; /** Unwrap a declared type name to its inner type for virtual dispatch comparison. * E.g., C++ shared_ptr<Animal> → Animal. Returns undefined if no unwrapping applies. */ export type DeclaredTypeUnwrapper = (declaredType: string, typeNode: SyntaxNode) => string | undefined; /** Narrow lookup interface for resolving a callee name → return type name. * Backed by SymbolTable.lookupFuzzyCallable; passed via ForLoopExtractorContext. * Conservative: returns undefined when the callee is ambiguous (0 or 2+ matches). */ export interface ReturnTypeLookup { /** Processed type name after stripping wrappers (e.g., 'User' from 'Promise<User>'). * Use for call-result variable bindings (`const b = foo()`). */ lookupReturnType(callee: string): string | undefined; /** Raw return type as declared in the symbol (e.g., '[]User', 'List<User>'). * Use for iterable-element extraction (`for v := range foo()`). */ lookupRawReturnType(callee: string): string | undefined; } /** Context object passed to ForLoopExtractor. * Groups the four parameters that were previously positional. */ export interface ForLoopExtractorContext { /** Mutable type-env for the current scope — extractor writes bindings here */ scopeEnv: Map<string, string>; /** Maps `scope\0varName` to the declaration's type annotation AST node */ declarationTypeNodes: ReadonlyMap<string, SyntaxNode>; /** Current scope key, e.g. `"process@42"` */ scope: string; /** Resolves a callee name to its declared return type (undefined = unknown/ambiguous) */ returnTypeLookup: ReturnTypeLookup; } /** Extracts loop variable type binding from a for-each statement. */ export type ForLoopExtractor = (node: SyntaxNode, ctx: ForLoopExtractorContext) => void; /** Discriminated union for pending Tier-2 propagation items. * - `copy` — `const b = a` (identifier alias, propagate a's type to b) * - `callResult` — `const b = foo()` (bind b to foo's declared return type) * - `fieldAccess` — `const b = a.field` (bind b to field's declaredType on a's type) * - `methodCallResult` — `const b = a.method()` (bind b to method's returnType on a's type) */ export type PendingAssignment = | { kind: 'copy'; lhs: string; rhs: string } | { kind: 'callResult'; lhs: string; callee: string } | { kind: 'fieldAccess'; lhs: string; receiver: string; field: string } | { kind: 'methodCallResult'; lhs: string; receiver: string; method: string }; /** Extracts a pending assignment for Tier 2 propagation. * Returns a PendingAssignment when the RHS is a bare identifier (`copy`), a * call expression (`callResult`), a field access (`fieldAccess`), or a * method call with receiver (`methodCallResult`) and the LHS has no resolved type yet. * May return an array of PendingAssignment items for destructuring patterns * (e.g., `const { a, b } = obj` emits N fieldAccess items). * Returns undefined if the node is not a matching assignment. */ export type PendingAssignmentExtractor = ( node: SyntaxNode, scopeEnv: ReadonlyMap<string, string>, ) => PendingAssignment | PendingAssignment[] | undefined; /** Result of a pattern binding extraction. */ export interface PatternBindingResult { varName: string; typeName: string; /** Optional: AST node whose position range should be used for the patternOverride. * When present, the override uses this node's range instead of the auto-detected * branch scope. Used by null-check narrowing to target the if-body specifically. */ narrowingRange?: { startIndex: number; endIndex: number }; } /** Extracts a typed variable binding from a pattern-matching construct. * Returns { varName, typeName } for patterns that introduce NEW variables * or narrow existing variables (null-check narrowing). * Examples: `if let Some(user) = opt` (Rust), `x instanceof User user` (Java), * `if (x != null)` (null-check narrowing in TS/Kotlin/C#). * Conservative: returns undefined when the source variable's type is unknown. * * @param scopeEnv Read-only view of already-resolved type bindings in the current scope. * @param declarationTypeNodes Maps `scope\0varName` to the original declaration's type * annotation AST node. Allows extracting generic type arguments (e.g., T from Result<T,E>) * that are stripped during normal TypeEnv extraction. * @param scope Current scope key (e.g. `"process@42"`) for declarationTypeNodes lookups. */ export type PatternBindingExtractor = ( node: SyntaxNode, scopeEnv: ReadonlyMap<string, string>, declarationTypeNodes: ReadonlyMap<string, SyntaxNode>, scope: string, ) => PatternBindingResult | undefined; /** Per-language type extraction configuration */ export interface LanguageTypeConfig { /** Allow pattern binding to overwrite existing scopeEnv entries. * WARNING: Enables function-scope type pollution. Only for languages with * smart-cast semantics (e.g., Kotlin `when/is`) where the subject variable * already exists in scopeEnv from its declaration. */ readonly allowPatternBindingOverwrite?: boolean; /** Node types that represent typed declarations for this language */ declarationNodeTypes: ReadonlySet<string>; /** AST node types for for-each/for-in statements with explicit element types. */ forLoopNodeTypes?: ReadonlySet<string>; /** Optional allowlist of AST node types on which extractPatternBinding should run. * When present, extractPatternBinding is only invoked for nodes whose type is in this set, * short-circuiting the call for all other node types. When absent, every node is passed to * extractPatternBinding (legacy behaviour). */ patternBindingNodeTypes?: ReadonlySet<string>; /** Extract a (varName → typeName) binding from a declaration node */ extractDeclaration: TypeBindingExtractor; /** Extract a (varName → typeName) binding from a parameter node */ extractParameter: ParameterExtractor; /** Extract a (varName → typeName) binding from a constructor-call initializer. * Called as fallback when extractDeclaration produces no binding for a declaration node. * Only for languages with syntactic constructor markers (new, composite_literal, ::new). * Receives classNames — the set of class/struct names visible in the current file's AST. */ extractInitializer?: InitializerExtractor; /** Scan for untyped `var = callee()` assignments for return-type inference. * Called on every AST node during buildTypeEnv walk; returns undefined for non-matches. * The callee binding is unverified — the caller must confirm against the SymbolTable. */ scanConstructorBinding?: ConstructorBindingScanner; /** Extract return type from comment-based annotations (e.g. YARD @return [Type]). * Called as fallback when extractMethodSignature finds no AST-based return type. */ extractReturnType?: ReturnTypeExtractor; /** Extract loop variable → type binding from a for-each AST node. */ extractForLoopBinding?: ForLoopExtractor; /** Extract pending assignment for Tier 2 propagation. * Called on declaration/assignment nodes; returns a PendingAssignment when the RHS * is a bare identifier (copy) or call expression (callResult) and the LHS has no * resolved type yet. Language-specific because AST shapes differ widely. */ extractPendingAssignment?: PendingAssignmentExtractor; /** Extract a typed variable binding from a pattern-matching construct. * Called on every AST node; returns { varName, typeName } when the node introduces a new * typed variable via pattern matching (e.g. `if let Some(x) = opt`, `x instanceof T t`). * The extractor receives the current scope's resolved bindings (read-only) to look up the * source variable's type. Returns undefined for non-matching nodes or unknown source types. */ extractPatternBinding?: PatternBindingExtractor; inferLiteralType?: LiteralTypeInferrer; detectConstructorType?: ConstructorTypeDetector; unwrapDeclaredType?: DeclaredTypeUnwrapper; } ================================================ FILE: gitnexus/src/core/ingestion/type-extractors/typescript.ts ================================================ import type { SyntaxNode } from '../utils.js'; import type { LanguageTypeConfig, ParameterExtractor, TypeBindingExtractor, InitializerExtractor, ClassNameLookup, ConstructorBindingScanner, ReturnTypeExtractor, PendingAssignmentExtractor, PendingAssignment, ForLoopExtractor, PatternBindingExtractor, LiteralTypeInferrer } from './types.js'; import { extractSimpleTypeName, extractVarName, hasTypeAnnotation, unwrapAwait, extractCalleeName, extractElementTypeFromString, extractGenericTypeArgs, resolveIterableElementType, methodToTypeArgPosition, type TypeArgPosition } from './shared.js'; const DECLARATION_NODE_TYPES: ReadonlySet<string> = new Set([ 'lexical_declaration', 'variable_declaration', 'function_declaration', // JSDoc @param on function declarations 'method_definition', // JSDoc @param on class methods 'public_field_definition', // class field: private users: User[] ]); const normalizeJsDocType = (raw: string): string | undefined => { let type = raw.trim(); // Strip JSDoc nullable/non-nullable prefixes: ?User → User, !User → User if (type.startsWith('?') || type.startsWith('!')) type = type.slice(1); // Strip union with null/undefined/void: User|null → User const parts = type.split('|').map(p => p.trim()).filter(p => p !== 'null' && p !== 'undefined' && p !== 'void' ); if (parts.length !== 1) return undefined; // ambiguous union type = parts[0]; // Strip module: prefix — module:models.User → models.User if (type.startsWith('module:')) type = type.slice(7); // Take last segment of dotted path: models.User → User const segments = type.split('.'); type = segments[segments.length - 1]; // Strip generic wrapper: Promise<User> → Promise (base type, not inner) const genericMatch = type.match(/^(\w+)\s*</); if (genericMatch) type = genericMatch[1]; // Simple identifier check if (/^\w+$/.test(type)) return type; return undefined; }; /** Regex to extract JSDoc @param annotations: `@param {Type} name` */ const JSDOC_PARAM_RE = /@param\s*\{([^}]+)\}\s+\[?(\w+)[\]=]?[^\s]*/g; /** * Collect JSDoc @param type bindings from comment nodes preceding a function/method. * Returns a map of paramName → typeName. */ const collectJsDocParams = (funcNode: SyntaxNode): Map<string, string> => { const commentTexts: string[] = []; let sibling = funcNode.previousSibling; while (sibling) { if (sibling.type === 'comment') { commentTexts.unshift(sibling.text); } else if (sibling.isNamed && sibling.type !== 'decorator') { break; } sibling = sibling.previousSibling; } if (commentTexts.length === 0) return new Map(); const params = new Map<string, string>(); const commentBlock = commentTexts.join('\n'); JSDOC_PARAM_RE.lastIndex = 0; let match: RegExpExecArray | null; while ((match = JSDOC_PARAM_RE.exec(commentBlock)) !== null) { const typeName = normalizeJsDocType(match[1]); const paramName = match[2]; if (typeName) { params.set(paramName, typeName); } } return params; }; /** * TypeScript: const x: Foo = ..., let x: Foo * Also: JSDoc @param annotations on function/method definitions (for .js files). */ const extractDeclaration: TypeBindingExtractor = (node: SyntaxNode, env: Map<string, string>): void => { // JSDoc @param on functions/methods — pre-populate env with param types if (node.type === 'function_declaration' || node.type === 'method_definition') { const jsDocParams = collectJsDocParams(node); for (const [paramName, typeName] of jsDocParams) { if (!env.has(paramName)) env.set(paramName, typeName); } return; } // Class field: `private users: User[]` — public_field_definition has name + type fields directly. if (node.type === 'public_field_definition') { const nameNode = node.childForFieldName('name'); const typeAnnotation = node.childForFieldName('type'); if (!nameNode || !typeAnnotation) return; const varName = nameNode.text; if (!varName) return; const typeName = extractSimpleTypeName(typeAnnotation); if (typeName) env.set(varName, typeName); return; } for (let i = 0; i < node.namedChildCount; i++) { const declarator = node.namedChild(i); if (declarator?.type !== 'variable_declarator') continue; const nameNode = declarator.childForFieldName('name'); const typeAnnotation = declarator.childForFieldName('type'); if (!nameNode || !typeAnnotation) continue; const varName = extractVarName(nameNode); const typeName = extractSimpleTypeName(typeAnnotation); if (varName && typeName) env.set(varName, typeName); } }; /** TypeScript: required_parameter / optional_parameter → name: type */ const extractParameter: ParameterExtractor = (node: SyntaxNode, env: Map<string, string>): void => { let nameNode: SyntaxNode | null = null; let typeNode: SyntaxNode | null = null; if (node.type === 'required_parameter' || node.type === 'optional_parameter') { nameNode = node.childForFieldName('pattern') ?? node.childForFieldName('name'); typeNode = node.childForFieldName('type'); } else { // Generic fallback nameNode = node.childForFieldName('name') ?? node.childForFieldName('pattern'); typeNode = node.childForFieldName('type'); } if (!nameNode || !typeNode) return; const varName = extractVarName(nameNode); const typeName = extractSimpleTypeName(typeNode); if (varName && typeName) env.set(varName, typeName); }; /** TypeScript: const x = new User() — infer type from new_expression */ const extractInitializer: InitializerExtractor = (node: SyntaxNode, env: Map<string, string>, _classNames: ClassNameLookup): void => { for (let i = 0; i < node.namedChildCount; i++) { const declarator = node.namedChild(i); if (declarator?.type !== 'variable_declarator') continue; // Only activate when there is no explicit type annotation — extractDeclaration already // handles the annotated case and this function is called as a fallback. if (declarator.childForFieldName('type') !== null) continue; let valueNode = declarator.childForFieldName('value'); // Unwrap `new User() as T`, `new User()!`, and double-cast `new User() as unknown as T` while (valueNode?.type === 'as_expression' || valueNode?.type === 'non_null_expression') { valueNode = valueNode.firstNamedChild; } if (valueNode?.type !== 'new_expression') continue; const constructorNode = valueNode.childForFieldName('constructor'); if (!constructorNode) continue; const nameNode = declarator.childForFieldName('name'); if (!nameNode) continue; const varName = extractVarName(nameNode); const typeName = extractSimpleTypeName(constructorNode); if (varName && typeName) env.set(varName, typeName); } }; /** * TypeScript/JavaScript: const user = getUser() — variable_declarator with call_expression value. * Only matches unannotated declarators; annotated ones are handled by extractDeclaration. * await is unwrapped: const user = await fetchUser() → callee = 'fetchUser'. */ const scanConstructorBinding: ConstructorBindingScanner = (node) => { if (node.type !== 'variable_declarator') return undefined; if (hasTypeAnnotation(node)) return undefined; const nameNode = node.childForFieldName('name'); if (!nameNode || nameNode.type !== 'identifier') return undefined; const value = unwrapAwait(node.childForFieldName('value')); if (!value || value.type !== 'call_expression') return undefined; const calleeName = extractCalleeName(value); if (!calleeName) return undefined; return { varName: nameNode.text, calleeName }; }; /** Regex to extract @returns or @return from JSDoc comments: `@returns {Type}` */ const JSDOC_RETURN_RE = /@returns?\s*\{([^}]+)\}/; /** * Minimal sanitization for JSDoc return types — preserves generic wrappers * (e.g. `Promise<User>`) so that extractReturnTypeName in call-processor * can apply WRAPPER_GENERICS unwrapping. Unlike normalizeJsDocType (which * strips generics), this only strips JSDoc-specific syntax markers. */ const sanitizeReturnType = (raw: string): string | undefined => { let type = raw.trim(); // Strip JSDoc nullable/non-nullable prefixes: ?User → User, !User → User if (type.startsWith('?') || type.startsWith('!')) type = type.slice(1); // Strip module: prefix — module:models.User → models.User if (type.startsWith('module:')) type = type.slice(7); // Reject unions (ambiguous) if (type.includes('|')) return undefined; if (!type) return undefined; return type; }; /** * Extract return type from JSDoc `@returns {Type}` or `@return {Type}` annotation * preceding a function/method definition. Walks backwards through preceding siblings * looking for comment nodes containing the annotation. */ const extractReturnType: ReturnTypeExtractor = (node) => { let sibling = node.previousSibling; while (sibling) { if (sibling.type === 'comment') { const match = JSDOC_RETURN_RE.exec(sibling.text); if (match) return sanitizeReturnType(match[1]); } else if (sibling.isNamed && sibling.type !== 'decorator') break; sibling = sibling.previousSibling; } return undefined; }; const FOR_LOOP_NODE_TYPES: ReadonlySet<string> = new Set([ 'for_in_statement', ]); /** TS function/method node types that carry a parameters list. */ const TS_FUNCTION_NODE_TYPES = new Set([ 'function_declaration', 'function_expression', 'arrow_function', 'method_definition', 'generator_function', 'generator_function_declaration', ]); /** * Extract element type from a TypeScript type annotation AST node. * Handles: * type_annotation ": User[]" → array_type → type_identifier "User" * type_annotation ": Array<User>" → generic_type → extractGenericTypeArgs → "User" * Falls back to text-based extraction via extractElementTypeFromString. */ const extractTsElementTypeFromAnnotation = (typeAnnotation: SyntaxNode, pos: TypeArgPosition = 'last', depth = 0): string | undefined => { if (depth > 50) return undefined; // Unwrap type_annotation (the node text includes ': ' prefix) const inner = typeAnnotation.type === 'type_annotation' ? (typeAnnotation.firstNamedChild ?? typeAnnotation) : typeAnnotation; // readonly User[] — readonly_type wraps array_type: unwrap and recurse if (inner.type === 'readonly_type') { const wrapped = inner.firstNamedChild; if (wrapped) return extractTsElementTypeFromAnnotation(wrapped, pos, depth + 1); } // User[] — array_type: first named child is the element type if (inner.type === 'array_type') { const elem = inner.firstNamedChild; if (elem) return extractSimpleTypeName(elem); } // Array<User>, Map<string, User> — generic_type // pos determines which type arg: 'first' for keys, 'last' for values if (inner.type === 'generic_type') { const args = extractGenericTypeArgs(inner); if (args.length >= 1) return pos === 'first' ? args[0] : args[args.length - 1]; } // Fallback: strip ': ' prefix from type_annotation text and use string extraction const rawText = inner.text; return extractElementTypeFromString(rawText, pos); }; /** * Search a statement_block (function body) for a variable_declarator named `iterableName` * that has a type annotation, preceding the given `beforeNode`. * Returns the element type from the type annotation, or undefined. */ const findTsLocalDeclElementType = ( iterableName: string, blockNode: SyntaxNode, beforeNode: SyntaxNode, pos: TypeArgPosition = 'last', ): string | undefined => { for (let i = 0; i < blockNode.namedChildCount; i++) { const stmt = blockNode.namedChild(i); if (!stmt) continue; // Stop when we reach the for-loop itself if (stmt === beforeNode || stmt.startIndex >= beforeNode.startIndex) break; // Look for lexical_declaration or variable_declaration if (stmt.type !== 'lexical_declaration' && stmt.type !== 'variable_declaration') continue; for (let j = 0; j < stmt.namedChildCount; j++) { const decl = stmt.namedChild(j); if (decl?.type !== 'variable_declarator') continue; const nameNode = decl.childForFieldName('name'); if (nameNode?.text !== iterableName) continue; const typeAnnotation = decl.childForFieldName('type'); if (typeAnnotation) return extractTsElementTypeFromAnnotation(typeAnnotation, pos); } } return undefined; }; /** * Walk up the AST from a for-loop node to find the enclosing function scope, * then search (1) its parameter list and (2) local declarations in the body * for a variable named `iterableName` with a container type annotation. * Returns the element type extracted from the annotation, or undefined. */ const findTsIterableElementType = (iterableName: string, startNode: SyntaxNode, pos: TypeArgPosition = 'last'): string | undefined => { let current: SyntaxNode | null = startNode.parent; // Capture the immediate statement_block parent to search local declarations const blockNode = current?.type === 'statement_block' ? current : null; while (current) { if (TS_FUNCTION_NODE_TYPES.has(current.type)) { // Search function parameters const paramsNode = current.childForFieldName('parameters') ?? current.childForFieldName('formal_parameters'); if (paramsNode) { for (let i = 0; i < paramsNode.namedChildCount; i++) { const param = paramsNode.namedChild(i); if (!param) continue; const patternNode = param.childForFieldName('pattern') ?? param.childForFieldName('name'); if (patternNode?.text === iterableName) { const typeAnnotation = param.childForFieldName('type'); if (typeAnnotation) return extractTsElementTypeFromAnnotation(typeAnnotation, pos); } } } // Search local declarations in the function body (statement_block) if (blockNode) { const result = findTsLocalDeclElementType(iterableName, blockNode, startNode, pos); if (result) return result; } break; // stop at the nearest function boundary } current = current.parent; } return undefined; }; /** * TypeScript/JavaScript: for (const user of users) where users has a known array type. * * Both `for...of` and `for...in` use the same `for_in_statement` AST node in tree-sitter. * We differentiate by checking for the `of` keyword among the unnamed children. * * Tier 1c: resolves the element type via three strategies in priority order: * 1. declarationTypeNodes — raw type annotation AST node (covers Array<User> from declarations) * 2. scopeEnv string — extractElementTypeFromString on the stored type (covers locally annotated vars) * 3. AST walk — walks up to the enclosing function's parameters to read User[] annotations directly * Only handles `for...of`; `for...in` produces string keys, not element types. */ const extractForLoopBinding: ForLoopExtractor = (node, { scopeEnv, declarationTypeNodes, scope, returnTypeLookup }): void => { if (node.type !== 'for_in_statement') return; // Confirm this is `for...of`, not `for...in`, by scanning unnamed children for the keyword text. let isForOf = false; for (let i = 0; i < node.childCount; i++) { const child = node.child(i); if (child && !child.isNamed && child.text === 'of') { isForOf = true; break; } } if (!isForOf) return; // The iterable is the `right` field — may be identifier, member_expression, or call_expression. const rightNode = node.childForFieldName('right'); let iterableName: string | undefined; let methodName: string | undefined; let callExprElementType: string | undefined; if (rightNode?.type === 'identifier') { iterableName = rightNode.text; } else if (rightNode?.type === 'member_expression') { const prop = rightNode.childForFieldName('property'); if (prop) iterableName = prop.text; } else if (rightNode?.type === 'call_expression') { // entries.values() → call_expression > function: member_expression > object + property // this.repos.values() → nested member_expression: extract property from inner member // getUsers() → call_expression > function: identifier (Phase 7.3 — return-type path) const fn = rightNode.childForFieldName('function'); if (fn?.type === 'member_expression') { const obj = fn.childForFieldName('object'); const prop = fn.childForFieldName('property'); if (obj?.type === 'identifier') { iterableName = obj.text; } else if (obj?.type === 'member_expression') { // this.repos.values() → obj = this.repos → extract 'repos' const innerProp = obj.childForFieldName('property'); if (innerProp) iterableName = innerProp.text; } if (prop?.type === 'property_identifier') methodName = prop.text; } else if (fn?.type === 'identifier') { // Direct function call: for (const user of getUsers()) const rawReturn = returnTypeLookup.lookupRawReturnType(fn.text); if (rawReturn) callExprElementType = extractElementTypeFromString(rawReturn); } } if (!iterableName && !callExprElementType) return; let elementType: string | undefined; if (callExprElementType) { elementType = callExprElementType; } else { // Look up the container's base type name for descriptor-aware resolution const containerTypeName = scopeEnv.get(iterableName!); const typeArgPos = methodToTypeArgPosition(methodName, containerTypeName); elementType = resolveIterableElementType( iterableName!, node, scopeEnv, declarationTypeNodes, scope, extractTsElementTypeFromAnnotation, findTsIterableElementType, typeArgPos, ); } if (!elementType) return; // The loop variable is the `left` field. const leftNode = node.childForFieldName('left'); if (!leftNode) return; // Handle destructured for-of: for (const [k, v] of entries) // AST: left = array_pattern directly (no variable_declarator wrapper) // Bind the LAST identifier to the element type (value in [key, value] patterns) if (leftNode.type === 'array_pattern') { const lastChild = leftNode.lastNamedChild; if (lastChild?.type === 'identifier') { scopeEnv.set(lastChild.text, elementType); } return; } if (leftNode.type === 'object_pattern') { // Object destructuring (e.g., `for (const { id } of users)`) destructures // into fields of the element type. Without field-level resolution, we cannot // bind individual properties to their correct types. Skip to avoid false bindings. return; } let loopVarNode: SyntaxNode | null = leftNode; // `const user` parses as: left → variable_declarator containing an identifier named `user` if (loopVarNode.type === 'variable_declarator') { loopVarNode = loopVarNode.childForFieldName('name') ?? loopVarNode.firstNamedChild; } if (!loopVarNode) return; const loopVarName = extractVarName(loopVarNode); if (loopVarName) scopeEnv.set(loopVarName, elementType); }; /** TS/JS: const alias = u → variable_declarator with name/value fields. * Also handles destructuring: `const { a, b } = obj` → N fieldAccess items. */ const extractPendingAssignment: PendingAssignmentExtractor = (node, scopeEnv) => { for (let i = 0; i < node.namedChildCount; i++) { const child = node.namedChild(i); if (!child || child.type !== 'variable_declarator') continue; const nameNode = child.childForFieldName('name'); const valueNode = child.childForFieldName('value'); if (!nameNode || !valueNode) continue; // Object destructuring: `const { address, name } = user` // Emits N fieldAccess items — one per destructured binding. if (nameNode.type === 'object_pattern' && valueNode.type === 'identifier') { const receiver = valueNode.text; const items: PendingAssignment[] = []; for (let j = 0; j < nameNode.namedChildCount; j++) { const prop = nameNode.namedChild(j); if (!prop) continue; if (prop.type === 'shorthand_property_identifier_pattern') { // `const { name } = user` → shorthand: varName = fieldName const varName = prop.text; if (!scopeEnv.has(varName)) { items.push({ kind: 'fieldAccess', lhs: varName, receiver, field: varName }); } } else if (prop.type === 'pair_pattern') { // `const { address: addr } = user` → pair_pattern: key=field, value=varName const keyNode = prop.childForFieldName('key'); const valNode = prop.childForFieldName('value'); if (keyNode && valNode) { const fieldName = keyNode.text; const varName = valNode.text; if (!scopeEnv.has(varName)) { items.push({ kind: 'fieldAccess', lhs: varName, receiver, field: fieldName }); } } } } if (items.length > 0) return items; continue; } const lhs = nameNode.text; if (scopeEnv.has(lhs)) continue; if (valueNode.type === 'identifier') return { kind: 'copy', lhs, rhs: valueNode.text }; // member_expression RHS → fieldAccess (a.field, this.field) if (valueNode.type === 'member_expression') { const obj = valueNode.childForFieldName('object'); const prop = valueNode.childForFieldName('property'); if (obj && prop?.type === 'property_identifier' && (obj.type === 'identifier' || obj.type === 'this')) { return { kind: 'fieldAccess', lhs, receiver: obj.text, field: prop.text }; } continue; } // Unwrap await: `const user = await fetchUser()` or `await a.getC()` const callNode = unwrapAwait(valueNode); if (!callNode || callNode.type !== 'call_expression') continue; const funcNode = callNode.childForFieldName('function'); if (!funcNode) continue; // Simple call → callResult: getUser() if (funcNode.type === 'identifier') { return { kind: 'callResult', lhs, callee: funcNode.text }; } // Method call with receiver → methodCallResult: a.getC() if (funcNode.type === 'member_expression') { const obj = funcNode.childForFieldName('object'); const prop = funcNode.childForFieldName('property'); if (obj && prop?.type === 'property_identifier' && (obj.type === 'identifier' || obj.type === 'this')) { return { kind: 'methodCallResult', lhs, receiver: obj.text, method: prop.text }; } } } return undefined; }; /** Null-check keywords that indicate a null-comparison in binary expressions. */ const NULL_CHECK_KEYWORDS = new Set(['null', 'undefined']); /** * Find the if-body (consequence) block for a null-check binary_expression. * Walks up from the binary_expression through parenthesized_expression to if_statement, * then returns the consequence block (statement_block). * * AST structure: if_statement > parenthesized_expression > binary_expression * if_statement > statement_block (consequence) */ const findIfConsequenceBlock = (binaryExpr: SyntaxNode): SyntaxNode | undefined => { // Walk up to find the if_statement (typically: binary_expression > parenthesized_expression > if_statement) let current = binaryExpr.parent; while (current) { if (current.type === 'if_statement') { // The consequence is the first statement_block child of if_statement for (let i = 0; i < current.childCount; i++) { const child = current.child(i); if (child?.type === 'statement_block') return child; } return undefined; } // Stop climbing at function/block boundaries — don't cross scope if (current.type === 'function_declaration' || current.type === 'function_expression' || current.type === 'arrow_function' || current.type === 'method_definition') return undefined; current = current.parent; } return undefined; }; /** TS instanceof narrowing: `x instanceof User` → bind x to User. * Also handles null-check narrowing: `x !== null`, `x != undefined` etc. * instanceof: first-writer-wins (no prior type binding). * null-check: position-indexed narrowing via narrowingRange. */ const extractPatternBinding: PatternBindingExtractor = (node, scopeEnv, declarationTypeNodes, scope) => { if (node.type !== 'binary_expression') return undefined; // Check for instanceof first (existing behavior) const instanceofOp = node.children.find(c => !c.isNamed && c.text === 'instanceof'); if (instanceofOp) { const left = node.namedChild(0); const right = node.namedChild(1); if (left?.type !== 'identifier' || right?.type !== 'identifier') return undefined; return { varName: left.text, typeName: right.text }; } // Null-check narrowing: x !== null, x != null, x !== undefined, x != undefined const op = node.children.find(c => !c.isNamed && (c.text === '!==' || c.text === '!=')); if (!op) return undefined; const left = node.namedChild(0); const right = node.namedChild(1); if (!left || !right) return undefined; // Determine which side is the variable and which is null/undefined let varNode: SyntaxNode | undefined; let isNullCheck = false; if (left.type === 'identifier' && NULL_CHECK_KEYWORDS.has(right.text)) { varNode = left; isNullCheck = true; } else if (right.type === 'identifier' && NULL_CHECK_KEYWORDS.has(left.text)) { varNode = right; isNullCheck = true; } if (!isNullCheck || !varNode) return undefined; const varName = varNode.text; // Look up the variable's resolved type (already stripped of nullable by extractSimpleTypeName) const resolvedType = scopeEnv.get(varName); if (!resolvedType) return undefined; // Check if the original declaration type was nullable by looking at the raw AST type node. // extractSimpleTypeName already strips nullable markers, so we need the original to know // if narrowing is meaningful (i.e., the variable was declared as nullable). const declTypeNode = declarationTypeNodes.get(`${scope}\0${varName}`); if (!declTypeNode) return undefined; const declText = declTypeNode.text; // Only narrow if the original declaration was nullable if (!declText.includes('null') && !declText.includes('undefined')) return undefined; // Find the if-body block to scope the narrowing const ifBody = findIfConsequenceBlock(node); if (!ifBody) return undefined; return { varName, typeName: resolvedType, narrowingRange: { startIndex: ifBody.startIndex, endIndex: ifBody.endIndex }, }; }; /** Infer the type of a literal AST node for TypeScript overload disambiguation. */ const inferTsLiteralType: LiteralTypeInferrer = (node) => { switch (node.type) { case 'number': return 'number'; case 'string': case 'template_string': return 'string'; case 'true': case 'false': return 'boolean'; case 'null': return 'null'; case 'undefined': return 'undefined'; case 'regex': return 'RegExp'; default: return undefined; } }; export const typeConfig: LanguageTypeConfig = { declarationNodeTypes: DECLARATION_NODE_TYPES, forLoopNodeTypes: FOR_LOOP_NODE_TYPES, patternBindingNodeTypes: new Set(['binary_expression']), extractDeclaration, extractParameter, extractInitializer, scanConstructorBinding, extractReturnType, extractForLoopBinding, extractPendingAssignment, extractPatternBinding, inferLiteralType: inferTsLiteralType, }; ================================================ FILE: gitnexus/src/core/ingestion/utils.ts ================================================ import type Parser from 'tree-sitter'; import { SupportedLanguages } from '../../config/supported-languages.js'; import { generateId } from '../../lib/utils.js'; import { extractSimpleTypeName } from './type-extractors/shared.js'; /** Tree-sitter AST node. Re-exported for use across ingestion modules. */ export type SyntaxNode = Parser.SyntaxNode; /** * Ordered list of definition capture keys for tree-sitter query matches. * Used to extract the definition node from a capture map. */ export const DEFINITION_CAPTURE_KEYS = [ 'definition.function', 'definition.class', 'definition.interface', 'definition.method', 'definition.struct', 'definition.enum', 'definition.namespace', 'definition.module', 'definition.trait', 'definition.impl', 'definition.type', 'definition.const', 'definition.static', 'definition.typedef', 'definition.macro', 'definition.union', 'definition.property', 'definition.record', 'definition.delegate', 'definition.annotation', 'definition.constructor', 'definition.template', ] as const; /** Extract the definition node from a tree-sitter query capture map. */ export const getDefinitionNodeFromCaptures = (captureMap: Record<string, any>): SyntaxNode | null => { for (const key of DEFINITION_CAPTURE_KEYS) { if (captureMap[key]) return captureMap[key]; } return null; }; /** * Node types that represent function/method definitions across languages. * Used to find the enclosing function for a call site. */ export const FUNCTION_NODE_TYPES = new Set([ // TypeScript/JavaScript 'function_declaration', 'arrow_function', 'function_expression', 'method_definition', 'generator_function_declaration', // Python 'function_definition', // Common async variants 'async_function_declaration', 'async_arrow_function', // Java 'method_declaration', 'constructor_declaration', // C/C++ // 'function_definition' already included above // Go // 'method_declaration' already included from Java // C# 'local_function_statement', // Rust 'function_item', 'impl_item', // Methods inside impl blocks // PHP 'anonymous_function', // Kotlin 'lambda_literal', // Swift 'init_declaration', 'deinit_declaration', // Ruby 'method', // def foo 'singleton_method', // def self.foo ]); /** * Node types for standard function declarations that need C/C++ declarator handling. * Used by extractFunctionName to determine how to extract the function name. */ export const FUNCTION_DECLARATION_TYPES = new Set([ 'function_declaration', 'function_definition', 'async_function_declaration', 'generator_function_declaration', 'function_item', ]); /** * Built-in function/method names that should not be tracked as call targets. * Covers JS/TS, Python, Kotlin, C/C++, PHP, Swift standard library functions. */ export const BUILT_IN_NAMES = new Set([ // JavaScript/TypeScript 'console', 'log', 'warn', 'error', 'info', 'debug', 'setTimeout', 'setInterval', 'clearTimeout', 'clearInterval', 'parseInt', 'parseFloat', 'isNaN', 'isFinite', 'encodeURI', 'decodeURI', 'encodeURIComponent', 'decodeURIComponent', 'JSON', 'parse', 'stringify', 'Object', 'Array', 'String', 'Number', 'Boolean', 'Symbol', 'BigInt', 'Map', 'Set', 'WeakMap', 'WeakSet', 'Promise', 'resolve', 'reject', 'then', 'catch', 'finally', 'Math', 'Date', 'RegExp', 'Error', 'require', 'import', 'export', 'fetch', 'Response', 'Request', 'useState', 'useEffect', 'useCallback', 'useMemo', 'useRef', 'useContext', 'useReducer', 'useLayoutEffect', 'useImperativeHandle', 'useDebugValue', 'createElement', 'createContext', 'createRef', 'forwardRef', 'memo', 'lazy', 'map', 'filter', 'reduce', 'forEach', 'find', 'findIndex', 'some', 'every', 'includes', 'indexOf', 'slice', 'splice', 'concat', 'join', 'split', 'push', 'pop', 'shift', 'unshift', 'sort', 'reverse', 'keys', 'values', 'entries', 'assign', 'freeze', 'seal', 'hasOwnProperty', 'toString', 'valueOf', // Python 'print', 'len', 'range', 'str', 'int', 'float', 'list', 'dict', 'set', 'tuple', 'append', 'extend', 'update', // NOTE: 'open', 'read', 'write', 'close' removed — these are real C POSIX syscalls 'type', 'isinstance', 'issubclass', 'getattr', 'setattr', 'hasattr', 'enumerate', 'zip', 'sorted', 'reversed', 'min', 'max', 'sum', 'abs', // Kotlin stdlib 'println', 'print', 'readLine', 'require', 'requireNotNull', 'check', 'assert', 'lazy', 'error', 'listOf', 'mapOf', 'setOf', 'mutableListOf', 'mutableMapOf', 'mutableSetOf', 'arrayOf', 'sequenceOf', 'also', 'apply', 'run', 'with', 'takeIf', 'takeUnless', 'TODO', 'buildString', 'buildList', 'buildMap', 'buildSet', 'repeat', 'synchronized', // Kotlin coroutine builders & scope functions 'launch', 'async', 'runBlocking', 'withContext', 'coroutineScope', 'supervisorScope', 'delay', // Kotlin Flow operators 'flow', 'flowOf', 'collect', 'emit', 'onEach', 'catch', 'buffer', 'conflate', 'distinctUntilChanged', 'flatMapLatest', 'flatMapMerge', 'combine', 'stateIn', 'shareIn', 'launchIn', // Kotlin infix stdlib functions 'to', 'until', 'downTo', 'step', // C/C++ standard library 'printf', 'fprintf', 'sprintf', 'snprintf', 'vprintf', 'vfprintf', 'vsprintf', 'vsnprintf', 'scanf', 'fscanf', 'sscanf', 'malloc', 'calloc', 'realloc', 'free', 'memcpy', 'memmove', 'memset', 'memcmp', 'strlen', 'strcpy', 'strncpy', 'strcat', 'strncat', 'strcmp', 'strncmp', 'strstr', 'strchr', 'strrchr', 'atoi', 'atol', 'atof', 'strtol', 'strtoul', 'strtoll', 'strtoull', 'strtod', 'sizeof', 'offsetof', 'typeof', 'assert', 'abort', 'exit', '_exit', 'fopen', 'fclose', 'fread', 'fwrite', 'fseek', 'ftell', 'rewind', 'fflush', 'fgets', 'fputs', // Linux kernel common macros/helpers (not real call targets) 'likely', 'unlikely', 'BUG', 'BUG_ON', 'WARN', 'WARN_ON', 'WARN_ONCE', 'IS_ERR', 'PTR_ERR', 'ERR_PTR', 'IS_ERR_OR_NULL', 'ARRAY_SIZE', 'container_of', 'list_for_each_entry', 'list_for_each_entry_safe', 'min', 'max', 'clamp', 'abs', 'swap', 'pr_info', 'pr_warn', 'pr_err', 'pr_debug', 'pr_notice', 'pr_crit', 'pr_emerg', 'printk', 'dev_info', 'dev_warn', 'dev_err', 'dev_dbg', 'GFP_KERNEL', 'GFP_ATOMIC', 'spin_lock', 'spin_unlock', 'spin_lock_irqsave', 'spin_unlock_irqrestore', 'mutex_lock', 'mutex_unlock', 'mutex_init', 'kfree', 'kmalloc', 'kzalloc', 'kcalloc', 'krealloc', 'kvmalloc', 'kvfree', 'get', 'put', // C# / .NET built-ins 'Console', 'WriteLine', 'ReadLine', 'Write', 'Task', 'Run', 'Wait', 'WhenAll', 'WhenAny', 'FromResult', 'Delay', 'ContinueWith', 'ConfigureAwait', 'GetAwaiter', 'GetResult', 'ToString', 'GetType', 'Equals', 'GetHashCode', 'ReferenceEquals', 'Add', 'Remove', 'Contains', 'Clear', 'Count', 'Any', 'All', 'Where', 'Select', 'SelectMany', 'OrderBy', 'OrderByDescending', 'GroupBy', 'First', 'FirstOrDefault', 'Single', 'SingleOrDefault', 'Last', 'LastOrDefault', 'ToList', 'ToArray', 'ToDictionary', 'AsEnumerable', 'AsQueryable', 'Aggregate', 'Sum', 'Average', 'Min', 'Max', 'Distinct', 'Skip', 'Take', 'String', 'Format', 'IsNullOrEmpty', 'IsNullOrWhiteSpace', 'Concat', 'Join', 'Trim', 'TrimStart', 'TrimEnd', 'Split', 'Replace', 'StartsWith', 'EndsWith', 'Convert', 'ToInt32', 'ToDouble', 'ToBoolean', 'ToByte', 'Math', 'Abs', 'Ceiling', 'Floor', 'Round', 'Pow', 'Sqrt', 'Dispose', 'Close', 'TryParse', 'Parse', 'AddRange', 'RemoveAt', 'RemoveAll', 'FindAll', 'Exists', 'TrueForAll', 'ContainsKey', 'TryGetValue', 'AddOrUpdate', 'Throw', 'ThrowIfNull', // PHP built-ins 'echo', 'isset', 'empty', 'unset', 'list', 'array', 'compact', 'extract', 'count', 'strlen', 'strpos', 'strrpos', 'substr', 'strtolower', 'strtoupper', 'trim', 'ltrim', 'rtrim', 'str_replace', 'str_contains', 'str_starts_with', 'str_ends_with', 'sprintf', 'vsprintf', 'printf', 'number_format', 'array_map', 'array_filter', 'array_reduce', 'array_push', 'array_pop', 'array_shift', 'array_unshift', 'array_slice', 'array_splice', 'array_merge', 'array_keys', 'array_values', 'array_key_exists', 'in_array', 'array_search', 'array_unique', 'usort', 'rsort', 'json_encode', 'json_decode', 'serialize', 'unserialize', 'intval', 'floatval', 'strval', 'boolval', 'is_null', 'is_string', 'is_int', 'is_array', 'is_object', 'is_numeric', 'is_bool', 'is_float', 'var_dump', 'print_r', 'var_export', 'date', 'time', 'strtotime', 'mktime', 'microtime', 'file_exists', 'file_get_contents', 'file_put_contents', 'is_file', 'is_dir', 'preg_match', 'preg_match_all', 'preg_replace', 'preg_split', 'header', 'session_start', 'session_destroy', 'ob_start', 'ob_end_clean', 'ob_get_clean', 'dd', 'dump', // Swift/iOS built-ins and standard library 'print', 'debugPrint', 'dump', 'fatalError', 'precondition', 'preconditionFailure', 'assert', 'assertionFailure', 'NSLog', 'abs', 'min', 'max', 'zip', 'stride', 'sequence', 'repeatElement', 'swap', 'withUnsafePointer', 'withUnsafeMutablePointer', 'withUnsafeBytes', 'autoreleasepool', 'unsafeBitCast', 'unsafeDowncast', 'numericCast', 'type', 'MemoryLayout', // Swift collection/string methods (common noise) 'map', 'flatMap', 'compactMap', 'filter', 'reduce', 'forEach', 'contains', 'first', 'last', 'prefix', 'suffix', 'dropFirst', 'dropLast', 'sorted', 'reversed', 'enumerated', 'joined', 'split', 'append', 'insert', 'remove', 'removeAll', 'removeFirst', 'removeLast', 'isEmpty', 'count', 'index', 'startIndex', 'endIndex', // UIKit/Foundation common methods (noise in call graph) 'addSubview', 'removeFromSuperview', 'layoutSubviews', 'setNeedsLayout', 'layoutIfNeeded', 'setNeedsDisplay', 'invalidateIntrinsicContentSize', 'addTarget', 'removeTarget', 'addGestureRecognizer', 'addConstraint', 'addConstraints', 'removeConstraint', 'removeConstraints', 'NSLocalizedString', 'Bundle', 'reloadData', 'reloadSections', 'reloadRows', 'performBatchUpdates', 'register', 'dequeueReusableCell', 'dequeueReusableSupplementaryView', 'beginUpdates', 'endUpdates', 'insertRows', 'deleteRows', 'insertSections', 'deleteSections', 'present', 'dismiss', 'pushViewController', 'popViewController', 'popToRootViewController', 'performSegue', 'prepare', // GCD / async 'DispatchQueue', 'async', 'sync', 'asyncAfter', 'Task', 'withCheckedContinuation', 'withCheckedThrowingContinuation', // Combine 'sink', 'store', 'assign', 'receive', 'subscribe', // Notification / KVO 'addObserver', 'removeObserver', 'post', 'NotificationCenter', // Rust standard library (common noise in call graphs) 'unwrap', 'expect', 'unwrap_or', 'unwrap_or_else', 'unwrap_or_default', 'ok', 'err', 'is_ok', 'is_err', 'map', 'map_err', 'and_then', 'or_else', 'clone', 'to_string', 'to_owned', 'into', 'from', 'as_ref', 'as_mut', 'iter', 'into_iter', 'collect', 'map', 'filter', 'fold', 'for_each', 'len', 'is_empty', 'push', 'pop', 'insert', 'remove', 'contains', 'format', 'write', 'writeln', 'panic', 'unreachable', 'todo', 'unimplemented', 'vec', 'println', 'eprintln', 'dbg', 'lock', 'read', 'write', 'try_lock', 'spawn', 'join', 'sleep', 'Some', 'None', 'Ok', 'Err', // Ruby built-ins and Kernel methods 'puts', 'p', 'pp', 'raise', 'fail', 'require', 'require_relative', 'load', 'autoload', 'include', 'extend', 'prepend', 'attr_accessor', 'attr_reader', 'attr_writer', 'public', 'private', 'protected', 'module_function', 'lambda', 'proc', 'block_given?', 'nil?', 'is_a?', 'kind_of?', 'instance_of?', 'respond_to?', 'freeze', 'frozen?', 'dup', 'tap', 'yield_self', // Ruby enumerables 'each', 'select', 'reject', 'detect', 'collect', 'inject', 'flat_map', 'each_with_object', 'each_with_index', 'any?', 'all?', 'none?', 'count', 'first', 'last', 'sort_by', 'min_by', 'max_by', 'group_by', 'partition', 'compact', 'flatten', 'uniq', ]); /** Check if a name is a built-in function or common noise that should be filtered out */ export const isBuiltInOrNoise = (name: string): boolean => BUILT_IN_NAMES.has(name); /** AST node types that represent a class-like container (for HAS_METHOD edge extraction) */ export const CLASS_CONTAINER_TYPES = new Set([ 'class_declaration', 'abstract_class_declaration', 'interface_declaration', 'struct_declaration', 'record_declaration', 'class_specifier', 'struct_specifier', 'impl_item', 'trait_item', 'struct_item', 'enum_item', 'class_definition', 'trait_declaration', 'protocol_declaration', // Ruby 'class', 'module', // Kotlin 'object_declaration', 'companion_object', ]); export const CONTAINER_TYPE_TO_LABEL: Record<string, string> = { class_declaration: 'Class', abstract_class_declaration: 'Class', interface_declaration: 'Interface', struct_declaration: 'Struct', struct_specifier: 'Struct', class_specifier: 'Class', class_definition: 'Class', impl_item: 'Impl', trait_item: 'Trait', struct_item: 'Struct', enum_item: 'Enum', trait_declaration: 'Trait', record_declaration: 'Record', protocol_declaration: 'Interface', class: 'Class', module: 'Module', object_declaration: 'Class', companion_object: 'Class', }; /** Walk up AST to find enclosing class/struct/interface/impl, return its generateId or null. * For Go method_declaration nodes, extracts receiver type (e.g. `func (u *User) Save()` → User struct). */ export const findEnclosingClassId = (node: any, filePath: string): string | null => { let current = node.parent; while (current) { // Go: method_declaration has a receiver parameter with the struct type if (current.type === 'method_declaration') { const receiver = current.childForFieldName?.('receiver'); if (receiver) { // receiver is a parameter_list: (u *User) or (u User) const paramDecl = receiver.namedChildren?.find?.((c: any) => c.type === 'parameter_declaration'); if (paramDecl) { const typeNode = paramDecl.childForFieldName?.('type'); if (typeNode) { // Unwrap pointer_type (*User → User) const inner = typeNode.type === 'pointer_type' ? typeNode.firstNamedChild : typeNode; if (inner && (inner.type === 'type_identifier' || inner.type === 'identifier')) { return generateId('Struct', `${filePath}:${inner.text}`); } } } } } // Go: type_declaration wrapping a struct_type (type User struct { ... }) // field_declaration → field_declaration_list → struct_type → type_spec → type_declaration if (current.type === 'type_declaration') { const typeSpec = current.children?.find((c: any) => c.type === 'type_spec'); if (typeSpec) { const typeBody = typeSpec.childForFieldName?.('type'); if (typeBody?.type === 'struct_type' || typeBody?.type === 'interface_type') { const nameNode = typeSpec.childForFieldName?.('name'); if (nameNode) { const label = typeBody.type === 'struct_type' ? 'Struct' : 'Interface'; return generateId(label, `${filePath}:${nameNode.text}`); } } } } if (CLASS_CONTAINER_TYPES.has(current.type)) { // Rust impl_item: for `impl Trait for Struct {}`, pick the type after `for` if (current.type === 'impl_item') { const children = current.children ?? []; const forIdx = children.findIndex((c: any) => c.text === 'for'); if (forIdx !== -1) { const nameNode = children.slice(forIdx + 1).find((c: any) => c.type === 'type_identifier' || c.type === 'identifier' ); if (nameNode) { return generateId('Impl', `${filePath}:${nameNode.text}`); } } // Fall through: plain `impl Struct {}` — use first type_identifier below } const nameNode = current.childForFieldName?.('name') ?? current.children?.find((c: any) => c.type === 'type_identifier' || c.type === 'identifier' || c.type === 'name' || c.type === 'constant' ); if (nameNode) { const label = CONTAINER_TYPE_TO_LABEL[current.type] || 'Class'; return generateId(label, `${filePath}:${nameNode.text}`); } } current = current.parent; } return null; }; /** * Extract function name and label from a function_definition or similar AST node. * Handles C/C++ qualified_identifier (ClassName::MethodName) and other language patterns. */ export const extractFunctionName = (node: SyntaxNode): { funcName: string | null; label: string } => { let funcName: string | null = null; let label = 'Function'; // Swift init/deinit if (node.type === 'init_declaration' || node.type === 'deinit_declaration') { return { funcName: node.type === 'init_declaration' ? 'init' : 'deinit', label: 'Constructor', }; } if (FUNCTION_DECLARATION_TYPES.has(node.type)) { // C/C++: function_definition -> [pointer_declarator ->] function_declarator -> qualified_identifier/identifier // Unwrap pointer_declarator / reference_declarator wrappers to reach function_declarator let declarator = node.childForFieldName?.('declarator'); if (!declarator) { for (let i = 0; i < node.childCount; i++) { const c = node.child(i); if (c?.type === 'function_declarator') { declarator = c; break; } } } while (declarator && (declarator.type === 'pointer_declarator' || declarator.type === 'reference_declarator')) { let nextDeclarator = declarator.childForFieldName?.('declarator'); if (!nextDeclarator) { for (let i = 0; i < declarator.childCount; i++) { const c = declarator.child(i); if (c?.type === 'function_declarator' || c?.type === 'pointer_declarator' || c?.type === 'reference_declarator') { nextDeclarator = c; break; } } } declarator = nextDeclarator; } if (declarator) { let innerDeclarator = declarator.childForFieldName?.('declarator'); if (!innerDeclarator) { for (let i = 0; i < declarator.childCount; i++) { const c = declarator.child(i); if (c?.type === 'qualified_identifier' || c?.type === 'identifier' || c?.type === 'field_identifier' || c?.type === 'parenthesized_declarator') { innerDeclarator = c; break; } } } if (innerDeclarator?.type === 'qualified_identifier') { let nameNode = innerDeclarator.childForFieldName?.('name'); if (!nameNode) { for (let i = 0; i < innerDeclarator.childCount; i++) { const c = innerDeclarator.child(i); if (c?.type === 'identifier') { nameNode = c; break; } } } if (nameNode?.text) { funcName = nameNode.text; label = 'Method'; } } else if (innerDeclarator?.type === 'identifier' || innerDeclarator?.type === 'field_identifier') { // field_identifier is used for method names inside C++ class bodies funcName = innerDeclarator.text; if (innerDeclarator.type === 'field_identifier') label = 'Method'; } else if (innerDeclarator?.type === 'parenthesized_declarator') { let nestedId: SyntaxNode | null = null; for (let i = 0; i < innerDeclarator.childCount; i++) { const c = innerDeclarator.child(i); if (c?.type === 'qualified_identifier' || c?.type === 'identifier') { nestedId = c; break; } } if (nestedId?.type === 'qualified_identifier') { let nameNode = nestedId.childForFieldName?.('name'); if (!nameNode) { for (let i = 0; i < nestedId.childCount; i++) { const c = nestedId.child(i); if (c?.type === 'identifier') { nameNode = c; break; } } } if (nameNode?.text) { funcName = nameNode.text; label = 'Method'; } } else if (nestedId?.type === 'identifier') { funcName = nestedId.text; } } } // Fallback for other languages (Kotlin uses simple_identifier, Swift uses simple_identifier) if (!funcName) { let nameNode = node.childForFieldName?.('name'); if (!nameNode) { for (let i = 0; i < node.childCount; i++) { const c = node.child(i); if (c?.type === 'identifier' || c?.type === 'property_identifier' || c?.type === 'simple_identifier') { nameNode = c; break; } } } funcName = nameNode?.text; } } else if (node.type === 'impl_item') { let funcItem: SyntaxNode | null = null; for (let i = 0; i < node.childCount; i++) { const c = node.child(i); if (c?.type === 'function_item') { funcItem = c; break; } } if (funcItem) { let nameNode = funcItem.childForFieldName?.('name'); if (!nameNode) { for (let i = 0; i < funcItem.childCount; i++) { const c = funcItem.child(i); if (c?.type === 'identifier') { nameNode = c; break; } } } funcName = nameNode?.text; label = 'Method'; } } else if (node.type === 'method_definition') { let nameNode = node.childForFieldName?.('name'); if (!nameNode) { for (let i = 0; i < node.childCount; i++) { const c = node.child(i); if (c?.type === 'property_identifier') { nameNode = c; break; } } } funcName = nameNode?.text; label = 'Method'; } else if (node.type === 'method_declaration' || node.type === 'constructor_declaration') { let nameNode = node.childForFieldName?.('name'); if (!nameNode) { for (let i = 0; i < node.childCount; i++) { const c = node.child(i); if (c?.type === 'identifier') { nameNode = c; break; } } } funcName = nameNode?.text; label = 'Method'; } else if (node.type === 'arrow_function' || node.type === 'function_expression') { const parent = node.parent; if (parent?.type === 'variable_declarator') { let nameNode = parent.childForFieldName?.('name'); if (!nameNode) { for (let i = 0; i < parent.childCount; i++) { const c = parent.child(i); if (c?.type === 'identifier') { nameNode = c; break; } } } funcName = nameNode?.text; } } else if (node.type === 'method' || node.type === 'singleton_method') { let nameNode = node.childForFieldName?.('name'); if (!nameNode) { for (let i = 0; i < node.childCount; i++) { const c = node.child(i); if (c?.type === 'identifier') { nameNode = c; break; } } } funcName = nameNode?.text; label = 'Method'; } return { funcName, label }; }; /** * Yield control to the event loop so spinners/progress can render. * Call periodically in hot loops to prevent UI freezes. */ export const yieldToEventLoop = (): Promise<void> => new Promise(resolve => setImmediate(resolve)); /** Ruby extensionless filenames recognised as Ruby source */ const RUBY_EXTENSIONLESS_FILES = new Set(['Rakefile', 'Gemfile', 'Guardfile', 'Vagrantfile', 'Brewfile']); /** * Find a child of `childType` within a sibling node of `siblingType`. * Used for Kotlin AST traversal where visibility_modifier lives inside a modifiers sibling. */ export const findSiblingChild = (parent: any, siblingType: string, childType: string): any | null => { for (let i = 0; i < parent.childCount; i++) { const sibling = parent.child(i); if (sibling?.type === siblingType) { for (let j = 0; j < sibling.childCount; j++) { const child = sibling.child(j); if (child?.type === childType) return child; } } } return null; }; /** * Map file extension to SupportedLanguage enum */ export const getLanguageFromFilename = (filename: string): SupportedLanguages | null => { // TypeScript (including TSX) if (filename.endsWith('.tsx')) return SupportedLanguages.TypeScript; if (filename.endsWith('.ts')) return SupportedLanguages.TypeScript; // JavaScript (including JSX) if (filename.endsWith('.jsx')) return SupportedLanguages.JavaScript; if (filename.endsWith('.js')) return SupportedLanguages.JavaScript; // Python if (filename.endsWith('.py')) return SupportedLanguages.Python; // Java if (filename.endsWith('.java')) return SupportedLanguages.Java; // C source files if (filename.endsWith('.c')) return SupportedLanguages.C; // C++ (all common extensions, including .h) // .h is parsed as C++ because tree-sitter-cpp is a strict superset of C, so pure-C // headers parse correctly, and C++ headers (classes, templates) are handled properly. if (filename.endsWith('.cpp') || filename.endsWith('.cc') || filename.endsWith('.cxx') || filename.endsWith('.h') || filename.endsWith('.hpp') || filename.endsWith('.hxx') || filename.endsWith('.hh')) return SupportedLanguages.CPlusPlus; // C# if (filename.endsWith('.cs')) return SupportedLanguages.CSharp; // Go if (filename.endsWith('.go')) return SupportedLanguages.Go; // Rust if (filename.endsWith('.rs')) return SupportedLanguages.Rust; // Kotlin if (filename.endsWith('.kt') || filename.endsWith('.kts')) return SupportedLanguages.Kotlin; // PHP (all common extensions) if (filename.endsWith('.php') || filename.endsWith('.phtml') || filename.endsWith('.php3') || filename.endsWith('.php4') || filename.endsWith('.php5') || filename.endsWith('.php8')) { return SupportedLanguages.PHP; } // Ruby (extensions) if (filename.endsWith('.rb') || filename.endsWith('.rake') || filename.endsWith('.gemspec')) { return SupportedLanguages.Ruby; } // Ruby (extensionless files) const basename = filename.split('/').pop() || filename; if (RUBY_EXTENSIONLESS_FILES.has(basename)) { return SupportedLanguages.Ruby; } // Swift (extensions) if (filename.endsWith('.swift')) return SupportedLanguages.Swift; return null; }; export interface MethodSignature { parameterCount: number | undefined; /** Number of required (non-optional, non-default) parameters. * Only set when fewer than parameterCount — enables range-based arity filtering. * undefined means all parameters are required (or metadata unavailable). */ requiredParameterCount: number | undefined; /** Per-parameter type names extracted via extractSimpleTypeName. * Only populated for languages with method overloading (Java, Kotlin, C#, C++). * undefined (not []) when no types are extractable — avoids empty array allocations. */ parameterTypes: string[] | undefined; returnType: string | undefined; } const CALL_ARGUMENT_LIST_TYPES = new Set([ 'arguments', 'argument_list', 'value_arguments', ]); /** * Extract parameter count and return type text from an AST method/function node. * Works across languages by looking for common AST patterns. */ export const extractMethodSignature = (node: SyntaxNode | null | undefined): MethodSignature => { let parameterCount: number | undefined = 0; let requiredCount = 0; let returnType: string | undefined; let isVariadic = false; const paramTypes: string[] = []; if (!node) return { parameterCount, requiredParameterCount: undefined, parameterTypes: undefined, returnType }; const paramListTypes = new Set([ 'formal_parameters', 'parameters', 'parameter_list', 'function_parameters', 'method_parameters', 'function_value_parameters', ]); // Node types that indicate variadic/rest parameters const VARIADIC_PARAM_TYPES = new Set([ 'variadic_parameter_declaration', // Go: ...string 'variadic_parameter', // Rust: extern "C" fn(...) 'spread_parameter', // Java: Object... args 'list_splat_pattern', // Python: *args 'dictionary_splat_pattern', // Python: **kwargs ]); /** AST node types that represent parameters with default values. */ const OPTIONAL_PARAM_TYPES = new Set([ 'optional_parameter', // TypeScript, Ruby: (x?: number), (x: number = 5), def f(x = 5) 'default_parameter', // Python: def f(x=5) 'typed_default_parameter', // Python: def f(x: int = 5) 'optional_parameter_declaration', // C++: void f(int x = 5) ]); /** Check if a parameter node has a default value (handles Kotlin, C#, Swift, PHP * where defaults are expressed as child nodes rather than distinct node types). */ const hasDefaultValue = (paramNode: SyntaxNode): boolean => { if (OPTIONAL_PARAM_TYPES.has(paramNode.type)) return true; // C#, Swift, PHP: check for '=' token or equals_value_clause child for (let i = 0; i < paramNode.childCount; i++) { const c = paramNode.child(i); if (!c) continue; if (c.type === '=' || c.type === 'equals_value_clause') return true; } // Kotlin: default values are siblings of the parameter node, not children. // The AST is: parameter, =, <literal> — all at function_value_parameters level. // Check if the immediately following sibling is '=' (default value separator). const sib = paramNode.nextSibling; if (sib && sib.type === '=') return true; return false; }; const findParameterList = (current: SyntaxNode): SyntaxNode | null => { for (const child of current.children) { if (paramListTypes.has(child.type)) return child; } for (const child of current.children) { const nested = findParameterList(child); if (nested) return nested; } return null; }; const parameterList = ( paramListTypes.has(node.type) ? node // node itself IS the parameter list (e.g. C# primary constructors) : node.childForFieldName?.('parameters') ?? findParameterList(node) ); if (parameterList && paramListTypes.has(parameterList.type)) { for (const param of parameterList.namedChildren) { if (param.type === 'comment') continue; if (param.text === 'self' || param.text === '&self' || param.text === '&mut self' || param.type === 'self_parameter') { continue; } // Kotlin: default values are siblings of the parameter node inside // function_value_parameters, so they appear as named children (e.g. // string_literal, integer_literal, boolean_literal, call_expression). // Skip any named child that isn't a parameter-like or modifier node. if (param.type.endsWith('_literal') || param.type === 'call_expression' || param.type === 'navigation_expression' || param.type === 'prefix_expression' || param.type === 'parenthesized_expression') { continue; } // Check for variadic parameter types if (VARIADIC_PARAM_TYPES.has(param.type)) { isVariadic = true; continue; } // TypeScript/JavaScript: rest parameter — required_parameter containing rest_pattern if (param.type === 'required_parameter' || param.type === 'optional_parameter') { for (const child of param.children) { if (child.type === 'rest_pattern') { isVariadic = true; break; } } if (isVariadic) continue; } // Kotlin: vararg modifier on a regular parameter if (param.type === 'parameter' || param.type === 'formal_parameter') { const prev = param.previousSibling; if (prev?.type === 'parameter_modifiers' && prev.text.includes('vararg')) { isVariadic = true; } } // Extract parameter type name for overload disambiguation. // Works for Java (formal_parameter), Kotlin (parameter), C# (parameter), // C++ (parameter_declaration). Uses childForFieldName('type') which is the // standard tree-sitter field for typed parameters across these languages. // Kotlin uses positional children instead of 'type' field — fall back to // searching for user_type/nullable_type/predefined_type children. const paramTypeNode = param.childForFieldName('type'); if (paramTypeNode) { const typeName = extractSimpleTypeName(paramTypeNode); paramTypes.push(typeName ?? 'unknown'); } else { // Kotlin: parameter → [simple_identifier, user_type|nullable_type] let found = false; for (const child of param.namedChildren) { if (child.type === 'user_type' || child.type === 'nullable_type' || child.type === 'type_identifier' || child.type === 'predefined_type') { const typeName = extractSimpleTypeName(child); paramTypes.push(typeName ?? 'unknown'); found = true; break; } } if (!found) paramTypes.push('unknown'); } if (!hasDefaultValue(param)) requiredCount++; parameterCount++; } // C/C++: bare `...` token in parameter list (not a named child — check all children) if (!isVariadic) { for (const child of parameterList.children) { if (!child.isNamed && child.text === '...') { isVariadic = true; break; } } } } // Return type extraction — language-specific field names // Go: 'result' field is either a type_identifier or parameter_list (multi-return) const goResult = node.childForFieldName?.('result'); if (goResult) { if (goResult.type === 'parameter_list') { // Multi-return: extract first parameter's type only (e.g. (*User, error) → *User) const firstParam = goResult.firstNamedChild; if (firstParam?.type === 'parameter_declaration') { const typeNode = firstParam.childForFieldName('type'); if (typeNode) returnType = typeNode.text; } else if (firstParam) { // Unnamed return types: (string, error) — first child is a bare type node returnType = firstParam.text; } } else { returnType = goResult.text; } } // Rust: 'return_type' field — the value IS the type node (e.g. primitive_type, type_identifier). // Skip if the node is a type_annotation (TS/Python), which is handled by the generic loop below. if (!returnType) { const rustReturn = node.childForFieldName?.('return_type'); if (rustReturn && rustReturn.type !== 'type_annotation') { returnType = rustReturn.text; } } // C/C++: 'type' field on function_definition if (!returnType) { const cppType = node.childForFieldName?.('type'); if (cppType && cppType.text !== 'void') { returnType = cppType.text; } } // C#: 'returns' field on method_declaration if (!returnType) { const csReturn = node.childForFieldName?.('returns'); if (csReturn && csReturn.text !== 'void') { returnType = csReturn.text; } } // TS/Rust/Python/C#/Kotlin: type_annotation or return_type child if (!returnType) { for (const child of node.children) { if (child.type === 'type_annotation' || child.type === 'return_type') { const typeNode = child.children.find((c) => c.isNamed); if (typeNode) returnType = typeNode.text; } } } // Kotlin: fun getUser(): User — return type is a bare user_type child of // function_declaration. The Kotlin grammar does NOT wrap it in type_annotation // or return_type; it appears as a direct child after function_value_parameters. // Note: Kotlin uses function_value_parameters (not a field), so we find it by type. if (!returnType) { let paramsEnd = -1; for (let i = 0; i < node.childCount; i++) { const child = node.child(i); if (!child) continue; if (child.type === 'function_value_parameters' || child.type === 'value_parameters') { paramsEnd = child.endIndex; } if (paramsEnd >= 0 && child.type === 'user_type' && child.startIndex > paramsEnd) { returnType = child.text; break; } } } if (isVariadic) parameterCount = undefined; // Only include parameterTypes when at least one type was successfully extracted. // Use undefined (not []) to avoid empty array allocations for untyped parameters. const hasTypes = paramTypes.length > 0 && paramTypes.some(t => t !== 'unknown'); // Only set requiredParameterCount when it differs from total — saves memory on the common case. const requiredParameterCount = (!isVariadic && requiredCount < (parameterCount ?? 0)) ? requiredCount : undefined; return { parameterCount, requiredParameterCount, parameterTypes: hasTypes ? paramTypes : undefined, returnType }; }; /** * Count direct arguments for a call expression across common tree-sitter grammars. * Returns undefined when the argument container cannot be located cheaply. */ export const countCallArguments = (callNode: SyntaxNode | null | undefined): number | undefined => { if (!callNode) return undefined; // Direct field or direct child (most languages) let argsNode: SyntaxNode | null | undefined = callNode.childForFieldName('arguments') ?? callNode.children.find((child) => CALL_ARGUMENT_LIST_TYPES.has(child.type)); // Kotlin/Swift: call_expression → call_suffix → value_arguments // Search one level deeper for languages that wrap arguments in a suffix node if (!argsNode) { for (const child of callNode.children) { if (!child.isNamed) continue; const nested = child.children.find((gc) => CALL_ARGUMENT_LIST_TYPES.has(gc.type)); if (nested) { argsNode = nested; break; } } } if (!argsNode) return undefined; let count = 0; for (const child of argsNode.children) { if (!child.isNamed) continue; if (child.type === 'comment') continue; count++; } return count; }; // ── Call-form discrimination (Phase 1, Step D) ───────────────────────── /** * AST node types that indicate a member-access wrapper around the callee name. * When nameNode.parent.type is one of these, the call is a member call. */ const MEMBER_ACCESS_NODE_TYPES = new Set([ 'member_expression', // TS/JS: obj.method() 'attribute', // Python: obj.method() 'member_access_expression', // C#: obj.Method() 'field_expression', // Rust/C++: obj.method() / ptr->method() 'selector_expression', // Go: obj.Method() 'navigation_suffix', // Kotlin/Swift: obj.method() — nameNode sits inside navigation_suffix 'member_binding_expression', // C#: user?.Method() — null-conditional access ]); /** * Call node types that are inherently constructor invocations. * Only includes patterns that the tree-sitter queries already capture as @call. */ const CONSTRUCTOR_CALL_NODE_TYPES = new Set([ 'constructor_invocation', // Kotlin: Foo() 'new_expression', // TS/JS/C++: new Foo() 'object_creation_expression', // Java/C#/PHP: new Foo() 'implicit_object_creation_expression', // C# 9: User u = new(...) 'composite_literal', // Go: User{...} 'struct_expression', // Rust: User { ... } ]); /** * AST node types for scoped/qualified calls (e.g., Foo::new() in Rust, Foo::bar() in C++). */ const SCOPED_CALL_NODE_TYPES = new Set([ 'scoped_identifier', // Rust: Foo::new() 'qualified_identifier', // C++: ns::func() ]); type CallForm = 'free' | 'member' | 'constructor'; /** * Infer whether a captured call site is a free call, member call, or constructor. * Returns undefined if the form cannot be determined. * * Works by inspecting the AST structure between callNode (@call) and nameNode (@call.name). * No tree-sitter query changes needed — the distinction is in the node types. */ export const inferCallForm = ( callNode: SyntaxNode, nameNode: SyntaxNode, ): CallForm | undefined => { // 1. Constructor: callNode itself is a constructor invocation (Kotlin) if (CONSTRUCTOR_CALL_NODE_TYPES.has(callNode.type)) { return 'constructor'; } // 2. Member call: nameNode's parent is a member-access wrapper const nameParent = nameNode.parent; if (nameParent && MEMBER_ACCESS_NODE_TYPES.has(nameParent.type)) { return 'member'; } // 3. PHP: the callNode itself distinguishes member vs free calls if (callNode.type === 'member_call_expression' || callNode.type === 'nullsafe_member_call_expression') { return 'member'; } if (callNode.type === 'scoped_call_expression') { return 'member'; // static call Foo::bar() } // 4. Java method_invocation: member if it has an 'object' field if (callNode.type === 'method_invocation' && callNode.childForFieldName('object')) { return 'member'; } // 4b. Ruby call with receiver: obj.method if (callNode.type === 'call' && callNode.childForFieldName('receiver')) { return 'member'; } // 5. Scoped calls (Rust Foo::new(), C++ ns::func()): treat as free // The receiver is a type, not an instance — handled differently in Phase 3 if (nameParent && SCOPED_CALL_NODE_TYPES.has(nameParent.type)) { return 'free'; } // 6. Default: if nameNode is a direct child of callNode, it's a free call if (nameNode.parent === callNode || nameParent?.parent === callNode) { return 'free'; } return undefined; }; /** * Extract the receiver identifier for member calls. * Only captures simple identifiers — returns undefined for complex expressions * like getUser().save() or arr[0].method(). */ const SIMPLE_RECEIVER_TYPES = new Set([ 'identifier', 'simple_identifier', 'variable_name', // PHP $variable (tree-sitter-php) 'name', // PHP name node 'this', // TS/JS/Java/C# this.method() 'self', // Rust/Python self.method() 'super', // TS/JS/Java/Kotlin/Ruby super.method() 'super_expression', // Kotlin wraps super in super_expression 'base', // C# base.Method() 'parent', // PHP parent::method() 'constant', // Ruby CONSTANT.method() (uppercase identifiers) ]); export const extractReceiverName = ( nameNode: SyntaxNode, ): string | undefined => { const parent = nameNode.parent; if (!parent) return undefined; // PHP: member_call_expression / nullsafe_member_call_expression — receiver is on the callNode // Java: method_invocation — receiver is the 'object' field on callNode // For these, parent of nameNode is the call itself, so check the call's object field const callNode = parent.parent ?? parent; let receiver: SyntaxNode | null = null; // Try standard field names used across grammars receiver = parent.childForFieldName('object') // TS/JS member_expression, Python attribute, PHP, Java ?? parent.childForFieldName('value') // Rust field_expression ?? parent.childForFieldName('operand') // Go selector_expression ?? parent.childForFieldName('expression') // C# member_access_expression ?? parent.childForFieldName('argument'); // C++ field_expression // Java method_invocation: 'object' field is on the callNode, not on nameNode's parent if (!receiver && callNode.type === 'method_invocation') { receiver = callNode.childForFieldName('object'); } // PHP: member_call_expression has 'object' on the call node if (!receiver && (callNode.type === 'member_call_expression' || callNode.type === 'nullsafe_member_call_expression')) { receiver = callNode.childForFieldName('object'); } // Ruby: call node has 'receiver' field if (!receiver && parent.type === 'call') { receiver = parent.childForFieldName('receiver'); } // PHP scoped_call_expression (parent::method(), self::method()): // nameNode's direct parent IS the scoped_call_expression (name is a direct child) if (!receiver && (parent.type === 'scoped_call_expression' || callNode.type === 'scoped_call_expression')) { const scopedCall = parent.type === 'scoped_call_expression' ? parent : callNode; receiver = scopedCall.childForFieldName('scope'); // relative_scope wraps 'parent'/'self'/'static' — unwrap to get the keyword if (receiver?.type === 'relative_scope') { receiver = receiver.firstChild; } } // C# null-conditional: user?.Save() → conditional_access_expression wraps member_binding_expression if (!receiver && parent.type === 'member_binding_expression') { const condAccess = parent.parent; if (condAccess?.type === 'conditional_access_expression') { receiver = condAccess.firstNamedChild; } } // Kotlin/Swift: navigation_expression target is the first child if (!receiver && parent.type === 'navigation_suffix') { const navExpr = parent.parent; if (navExpr?.type === 'navigation_expression') { // First named child is the target (receiver) for (const child of navExpr.children) { if (child.isNamed && child !== parent) { receiver = child; break; } } } } if (!receiver) return undefined; // Only capture simple identifiers — refuse complex expressions if (SIMPLE_RECEIVER_TYPES.has(receiver.type)) { return receiver.text; } // Python super().method(): receiver is a call node `super()` — extract the function name if (receiver.type === 'call') { const func = receiver.childForFieldName('function'); if (func?.text === 'super') return 'super'; } return undefined; }; /** * Extract the raw receiver AST node for a member call. * Unlike extractReceiverName, this returns the receiver node regardless of its type — * including call_expression / method_invocation nodes that appear in chained calls * like `svc.getUser().save()`. * * Returns undefined when the call is not a member call or when no receiver node * can be found (e.g. top-level free calls). */ export const extractReceiverNode = ( nameNode: SyntaxNode, ): SyntaxNode | undefined => { const parent = nameNode.parent; if (!parent) return undefined; const callNode = parent.parent ?? parent; let receiver: SyntaxNode | null = null; receiver = parent.childForFieldName('object') ?? parent.childForFieldName('value') ?? parent.childForFieldName('operand') ?? parent.childForFieldName('expression') ?? parent.childForFieldName('argument'); if (!receiver && callNode.type === 'method_invocation') { receiver = callNode.childForFieldName('object'); } if (!receiver && (callNode.type === 'member_call_expression' || callNode.type === 'nullsafe_member_call_expression')) { receiver = callNode.childForFieldName('object'); } if (!receiver && parent.type === 'call') { receiver = parent.childForFieldName('receiver'); } if (!receiver && (parent.type === 'scoped_call_expression' || callNode.type === 'scoped_call_expression')) { const scopedCall = parent.type === 'scoped_call_expression' ? parent : callNode; receiver = scopedCall.childForFieldName('scope'); if (receiver?.type === 'relative_scope') { receiver = receiver.firstChild; } } if (!receiver && parent.type === 'member_binding_expression') { const condAccess = parent.parent; if (condAccess?.type === 'conditional_access_expression') { receiver = condAccess.firstNamedChild; } } if (!receiver && parent.type === 'navigation_suffix') { const navExpr = parent.parent; if (navExpr?.type === 'navigation_expression') { for (const child of navExpr.children) { if (child.isNamed && child !== parent) { receiver = child; break; } } } } return receiver ?? undefined; }; export const isVerboseIngestionEnabled = (): boolean => { const raw = process.env.GITNEXUS_VERBOSE; if (!raw) return false; const value = raw.toLowerCase(); return value === '1' || value === 'true' || value === 'yes'; }; // ── Chained-call extraction ─────────────────────────────────────────────── /** Node types representing call expressions across supported languages. */ export const CALL_EXPRESSION_TYPES = new Set([ 'call_expression', // TS/JS/C/C++/Go/Rust 'method_invocation', // Java 'member_call_expression', // PHP 'nullsafe_member_call_expression', // PHP ?. 'call', // Python/Ruby 'invocation_expression', // C# ]); /** * Hard limit on chain depth to prevent runaway recursion. * For `a.b().c().d()`, the chain has depth 2 (b and c before d). */ export const MAX_CHAIN_DEPTH = 3; /** * Walk a receiver AST node that is itself a call expression, accumulating the * chain of intermediate method names up to MAX_CHAIN_DEPTH. * * For `svc.getUser().save()`, called with the receiver of `save` (getUser() call): * returns { chain: ['getUser'], baseReceiverName: 'svc' } * * For `a.b().c().d()`, called with the receiver of `d` (c() call): * returns { chain: ['b', 'c'], baseReceiverName: 'a' } */ export function extractCallChain( receiverCallNode: SyntaxNode, ): { chain: string[]; baseReceiverName: string | undefined } | undefined { const chain: string[] = []; let current: SyntaxNode = receiverCallNode; while (CALL_EXPRESSION_TYPES.has(current.type) && chain.length < MAX_CHAIN_DEPTH) { // Extract the method name from this call node. const funcNode = current.childForFieldName?.('function') ?? current.childForFieldName?.('name') ?? current.childForFieldName?.('method'); // Ruby `call` node let methodName: string | undefined; let innerReceiver: SyntaxNode | null = null; if (funcNode) { // member_expression / attribute: last named child is the method identifier methodName = funcNode.lastNamedChild?.text ?? funcNode.text; } // Kotlin/Swift: call_expression exposes callee as firstNamedChild, not a field. // navigation_expression: method name is in navigation_suffix → simple_identifier. if (!funcNode && current.type === 'call_expression') { const callee = current.firstNamedChild; if (callee?.type === 'navigation_expression') { const suffix = callee.lastNamedChild; if (suffix?.type === 'navigation_suffix') { methodName = suffix.lastNamedChild?.text; // The receiver is the part of navigation_expression before the suffix for (let i = 0; i < callee.namedChildCount; i++) { const child = callee.namedChild(i); if (child && child.type !== 'navigation_suffix') { innerReceiver = child; break; } } } } } if (!methodName) break; chain.unshift(methodName); // build chain outermost-last // Walk into the receiver of this call to continue the chain if (!innerReceiver && funcNode) { innerReceiver = funcNode.childForFieldName?.('object') ?? funcNode.childForFieldName?.('value') ?? funcNode.childForFieldName?.('operand') ?? funcNode.childForFieldName?.('expression'); } // Java method_invocation: object field is on the call node if (!innerReceiver && current.type === 'method_invocation') { innerReceiver = current.childForFieldName?.('object'); } // PHP member_call_expression if (!innerReceiver && (current.type === 'member_call_expression' || current.type === 'nullsafe_member_call_expression')) { innerReceiver = current.childForFieldName?.('object'); } // Ruby `call` node: receiver field is on the call node itself if (!innerReceiver && current.type === 'call') { innerReceiver = current.childForFieldName?.('receiver'); } if (!innerReceiver) break; if (CALL_EXPRESSION_TYPES.has(innerReceiver.type)) { current = innerReceiver; // continue walking } else { // Reached a simple identifier — the base receiver return { chain, baseReceiverName: innerReceiver.text || undefined }; } } return chain.length > 0 ? { chain, baseReceiverName: undefined } : undefined; } /** Node types representing member/field access across languages. */ const FIELD_ACCESS_NODE_TYPES = new Set([ 'member_expression', // TS/JS 'member_access_expression', // C# 'selector_expression', // Go 'field_expression', // Rust/C++ 'field_access', // Java 'attribute', // Python 'navigation_expression', // Kotlin/Swift 'member_binding_expression', // C# null-conditional (user?.Address) ]); /** One step in a mixed receiver chain. */ export type MixedChainStep = { kind: 'field' | 'call'; name: string }; /** * Walk a receiver AST node that may interleave field accesses and method calls, * building a unified chain of steps up to MAX_CHAIN_DEPTH. * * For `svc.getUser().address.save()`, called with the receiver of `save` * (`svc.getUser().address`, a field access node): * returns { chain: [{ kind:'call', name:'getUser' }, { kind:'field', name:'address' }], * baseReceiverName: 'svc' } * * For `user.getAddress().city.getName()`, called with receiver of `getName` * (`user.getAddress().city`): * returns { chain: [{ kind:'call', name:'getAddress' }, { kind:'field', name:'city' }], * baseReceiverName: 'user' } * * Pure field chains and pure call chains are special cases (all steps same kind). */ export function extractMixedChain( receiverNode: SyntaxNode, ): { chain: MixedChainStep[]; baseReceiverName: string | undefined } | undefined { const chain: MixedChainStep[] = []; let current: SyntaxNode = receiverNode; while (chain.length < MAX_CHAIN_DEPTH) { if (CALL_EXPRESSION_TYPES.has(current.type)) { // ── Call expression: extract method name + inner receiver ──────────── const funcNode = current.childForFieldName?.('function') ?? current.childForFieldName?.('name') ?? current.childForFieldName?.('method'); let methodName: string | undefined; let innerReceiver: SyntaxNode | null = null; if (funcNode) { methodName = funcNode.lastNamedChild?.text ?? funcNode.text; } // Kotlin/Swift: call_expression → navigation_expression if (!funcNode && current.type === 'call_expression') { const callee = current.firstNamedChild; if (callee?.type === 'navigation_expression') { const suffix = callee.lastNamedChild; if (suffix?.type === 'navigation_suffix') { methodName = suffix.lastNamedChild?.text; for (let i = 0; i < callee.namedChildCount; i++) { const child = callee.namedChild(i); if (child && child.type !== 'navigation_suffix') { innerReceiver = child; break; } } } } } if (!methodName) break; chain.unshift({ kind: 'call', name: methodName }); if (!innerReceiver && funcNode) { innerReceiver = funcNode.childForFieldName?.('object') ?? funcNode.childForFieldName?.('value') ?? funcNode.childForFieldName?.('operand') ?? funcNode.childForFieldName?.('argument') // C/C++ field_expression ?? funcNode.childForFieldName?.('expression') ?? null; } if (!innerReceiver && current.type === 'method_invocation') { innerReceiver = current.childForFieldName?.('object') ?? null; } if (!innerReceiver && (current.type === 'member_call_expression' || current.type === 'nullsafe_member_call_expression')) { innerReceiver = current.childForFieldName?.('object') ?? null; } if (!innerReceiver && current.type === 'call') { innerReceiver = current.childForFieldName?.('receiver') ?? null; } if (!innerReceiver) break; if (CALL_EXPRESSION_TYPES.has(innerReceiver.type) || FIELD_ACCESS_NODE_TYPES.has(innerReceiver.type)) { current = innerReceiver; } else { return { chain, baseReceiverName: innerReceiver.text || undefined }; } } else if (FIELD_ACCESS_NODE_TYPES.has(current.type)) { // ── Field/member access: extract property name + inner object ───────── let propertyName: string | undefined; let innerObject: SyntaxNode | null = null; if (current.type === 'navigation_expression') { for (const child of current.children ?? []) { if (child.type === 'navigation_suffix') { for (const sc of child.children ?? []) { if (sc.isNamed && sc.type !== '.') { propertyName = sc.text; break; } } } else if (child.isNamed && !innerObject) { innerObject = child; } } } else if (current.type === 'attribute') { innerObject = current.childForFieldName?.('object') ?? null; propertyName = current.childForFieldName?.('attribute')?.text; } else { innerObject = current.childForFieldName?.('object') ?? current.childForFieldName?.('value') ?? current.childForFieldName?.('operand') ?? current.childForFieldName?.('argument') // C/C++ field_expression ?? current.childForFieldName?.('expression') ?? null; propertyName = (current.childForFieldName?.('property') ?? current.childForFieldName?.('field') ?? current.childForFieldName?.('name'))?.text; } if (!propertyName) break; chain.unshift({ kind: 'field', name: propertyName }); if (!innerObject) break; if (CALL_EXPRESSION_TYPES.has(innerObject.type) || FIELD_ACCESS_NODE_TYPES.has(innerObject.type)) { current = innerObject; } else { return { chain, baseReceiverName: innerObject.text || undefined }; } } else { // Simple identifier — this is the base receiver return chain.length > 0 ? { chain, baseReceiverName: current.text || undefined } : undefined; } } return chain.length > 0 ? { chain, baseReceiverName: undefined } : undefined; } ================================================ FILE: gitnexus/src/core/ingestion/workers/parse-worker.ts ================================================ import { parentPort } from 'node:worker_threads'; import Parser from 'tree-sitter'; import JavaScript from 'tree-sitter-javascript'; import TypeScript from 'tree-sitter-typescript'; import Python from 'tree-sitter-python'; import Java from 'tree-sitter-java'; import C from 'tree-sitter-c'; import CPP from 'tree-sitter-cpp'; import CSharp from 'tree-sitter-c-sharp'; import Go from 'tree-sitter-go'; import Rust from 'tree-sitter-rust'; import PHP from 'tree-sitter-php'; import Ruby from 'tree-sitter-ruby'; import { createRequire } from 'node:module'; import { SupportedLanguages } from '../../../config/supported-languages.js'; import { LANGUAGE_QUERIES } from '../tree-sitter-queries.js'; import { getTreeSitterBufferSize, TREE_SITTER_MAX_BUFFER } from '../constants.js'; // tree-sitter-swift is an optionalDependency — may not be installed const _require = createRequire(import.meta.url); let Swift: any = null; try { Swift = _require('tree-sitter-swift'); } catch {} // tree-sitter-kotlin is an optionalDependency — may not be installed let Kotlin: any = null; try { Kotlin = _require('tree-sitter-kotlin'); } catch {} import { getLanguageFromFilename, FUNCTION_NODE_TYPES, extractFunctionName, isBuiltInOrNoise, getDefinitionNodeFromCaptures, findEnclosingClassId, extractMethodSignature, countCallArguments, inferCallForm, extractReceiverName, extractReceiverNode, extractMixedChain, type MixedChainStep, } from '../utils.js'; import { buildTypeEnv } from '../type-env.js'; import type { ConstructorBinding } from '../type-env.js'; import { isNodeExported } from '../export-detection.js'; import { detectFrameworkFromAST } from '../framework-detection.js'; import { typeConfigs } from '../type-extractors/index.js'; import { generateId } from '../../../lib/utils.js'; import { extractNamedBindings } from '../named-binding-extraction.js'; import { appendKotlinWildcard } from '../resolvers/index.js'; import { callRouters } from '../call-routing.js'; import { extractPropertyDeclaredType } from '../type-extractors/shared.js'; import type { NodeLabel } from '../../graph/types.js'; // ============================================================================ // Types for serializable results // ============================================================================ interface ParsedNode { id: string; label: string; properties: { name: string; filePath: string; startLine: number; endLine: number; language: SupportedLanguages; isExported: boolean; astFrameworkMultiplier?: number; astFrameworkReason?: string; description?: string; parameterCount?: number; requiredParameterCount?: number; returnType?: string; }; } interface ParsedRelationship { id: string; sourceId: string; targetId: string; type: 'DEFINES' | 'HAS_METHOD' | 'HAS_PROPERTY'; confidence: number; reason: string; } interface ParsedSymbol { filePath: string; name: string; nodeId: string; type: NodeLabel; parameterCount?: number; requiredParameterCount?: number; parameterTypes?: string[]; returnType?: string; declaredType?: string; ownerId?: string; } export interface ExtractedImport { filePath: string; rawImportPath: string; language: SupportedLanguages; /** Named bindings from the import (e.g., import {User as U} → [{local:'U', exported:'User'}]) */ namedBindings?: { local: string; exported: string }[]; } export interface ExtractedCall { filePath: string; calledName: string; /** generateId of enclosing function, or generateId('File', filePath) for top-level */ sourceId: string; argCount?: number; /** Discriminates free function calls from member/constructor calls */ callForm?: 'free' | 'member' | 'constructor'; /** Simple identifier of the receiver for member calls (e.g., 'user' in user.save()) */ receiverName?: string; /** Resolved type name of the receiver (e.g., 'User' for user.save() when user: User) */ receiverTypeName?: string; /** * Unified mixed chain when the receiver is a chain of field accesses and/or method calls. * Steps are ordered base-first (innermost to outermost). Examples: * `svc.getUser().save()` → chain=[{kind:'call',name:'getUser'}], receiverName='svc' * `user.address.save()` → chain=[{kind:'field',name:'address'}], receiverName='user' * `svc.getUser().address.save()` → chain=[{kind:'call',name:'getUser'},{kind:'field',name:'address'}] * Length is capped at MAX_CHAIN_DEPTH (3). */ receiverMixedChain?: MixedChainStep[]; } export interface ExtractedAssignment { filePath: string; /** generateId of enclosing function, or generateId('File', filePath) for top-level */ sourceId: string; /** Receiver text (e.g., 'user' from user.address = value) */ receiverText: string; /** Property name being written (e.g., 'address') */ propertyName: string; /** Resolved type name of the receiver if available from TypeEnv */ receiverTypeName?: string; } export interface ExtractedHeritage { filePath: string; className: string; parentName: string; /** 'extends' | 'implements' | 'trait-impl' | 'include' | 'extend' | 'prepend' */ kind: string; } export interface ExtractedRoute { filePath: string; httpMethod: string; routePath: string | null; controllerName: string | null; methodName: string | null; middleware: string[]; prefix: string | null; lineNumber: number; } /** Constructor bindings keyed by filePath for cross-file type resolution */ export interface FileConstructorBindings { filePath: string; bindings: ConstructorBinding[]; } export interface ParseWorkerResult { nodes: ParsedNode[]; relationships: ParsedRelationship[]; symbols: ParsedSymbol[]; imports: ExtractedImport[]; calls: ExtractedCall[]; assignments: ExtractedAssignment[]; heritage: ExtractedHeritage[]; routes: ExtractedRoute[]; constructorBindings: FileConstructorBindings[]; skippedLanguages: Record<string, number>; fileCount: number; } export interface ParseWorkerInput { path: string; content: string; } // ============================================================================ // Worker-local parser + language map // ============================================================================ const parser = new Parser(); const languageMap: Record<string, any> = { [SupportedLanguages.JavaScript]: JavaScript, [SupportedLanguages.TypeScript]: TypeScript.typescript, [`${SupportedLanguages.TypeScript}:tsx`]: TypeScript.tsx, [SupportedLanguages.Python]: Python, [SupportedLanguages.Java]: Java, [SupportedLanguages.C]: C, [SupportedLanguages.CPlusPlus]: CPP, [SupportedLanguages.CSharp]: CSharp, [SupportedLanguages.Go]: Go, [SupportedLanguages.Rust]: Rust, ...(Kotlin ? { [SupportedLanguages.Kotlin]: Kotlin } : {}), [SupportedLanguages.PHP]: PHP.php_only, [SupportedLanguages.Ruby]: Ruby, ...(Swift ? { [SupportedLanguages.Swift]: Swift } : {}), }; /** * Check if a language grammar is available in this worker. * Duplicated from parser-loader.ts because workers can't import from the main thread. * Extra filePath parameter needed to distinguish .tsx from .ts (different grammars * under the same SupportedLanguages.TypeScript key). */ const isLanguageAvailable = (language: SupportedLanguages, filePath: string): boolean => { const key = language === SupportedLanguages.TypeScript && filePath.endsWith('.tsx') ? `${language}:tsx` : language; return key in languageMap && languageMap[key] != null; }; const setLanguage = (language: SupportedLanguages, filePath: string): void => { const key = language === SupportedLanguages.TypeScript && filePath.endsWith('.tsx') ? `${language}:tsx` : language; const lang = languageMap[key]; if (!lang) throw new Error(`Unsupported language: ${language}`); parser.setLanguage(lang); }; // isNodeExported imported from ../export-detection.js (shared module) // ============================================================================ // Enclosing function detection (for call extraction) // ============================================================================ /** Walk up AST to find enclosing function, return its generateId or null for top-level */ const findEnclosingFunctionId = (node: any, filePath: string): string | null => { let current = node.parent; while (current) { if (FUNCTION_NODE_TYPES.has(current.type)) { const { funcName, label } = extractFunctionName(current); if (funcName) { return generateId(label, `${filePath}:${funcName}`); } } current = current.parent; } return null; }; // ============================================================================ // Label detection from capture map // ============================================================================ const getLabelFromCaptures = (captureMap: Record<string, any>): NodeLabel | null => { // Skip imports (handled separately) and calls if (captureMap['import'] || captureMap['call']) return null; if (!captureMap['name']) return null; if (captureMap['definition.function']) return 'Function'; if (captureMap['definition.class']) return 'Class'; if (captureMap['definition.interface']) return 'Interface'; if (captureMap['definition.method']) return 'Method'; if (captureMap['definition.struct']) return 'Struct'; if (captureMap['definition.enum']) return 'Enum'; if (captureMap['definition.namespace']) return 'Namespace'; if (captureMap['definition.module']) return 'Module'; if (captureMap['definition.trait']) return 'Trait'; if (captureMap['definition.impl']) return 'Impl'; if (captureMap['definition.type']) return 'TypeAlias'; if (captureMap['definition.const']) return 'Const'; if (captureMap['definition.static']) return 'Static'; if (captureMap['definition.typedef']) return 'Typedef'; if (captureMap['definition.macro']) return 'Macro'; if (captureMap['definition.union']) return 'Union'; if (captureMap['definition.property']) return 'Property'; if (captureMap['definition.record']) return 'Record'; if (captureMap['definition.delegate']) return 'Delegate'; if (captureMap['definition.annotation']) return 'Annotation'; if (captureMap['definition.constructor']) return 'Constructor'; if (captureMap['definition.template']) return 'Template'; return 'CodeElement'; }; // DEFINITION_CAPTURE_KEYS and getDefinitionNodeFromCaptures imported from ../utils.js // ============================================================================ // Process a batch of files // ============================================================================ const processBatch = (files: ParseWorkerInput[], onProgress?: (filesProcessed: number) => void): ParseWorkerResult => { const result: ParseWorkerResult = { nodes: [], relationships: [], symbols: [], imports: [], calls: [], assignments: [], heritage: [], routes: [], constructorBindings: [], skippedLanguages: {}, fileCount: 0, }; // Group by language to minimize setLanguage calls const byLanguage = new Map<SupportedLanguages, ParseWorkerInput[]>(); for (const file of files) { const lang = getLanguageFromFilename(file.path); if (!lang) continue; let list = byLanguage.get(lang); if (!list) { list = []; byLanguage.set(lang, list); } list.push(file); } let totalProcessed = 0; let lastReported = 0; const PROGRESS_INTERVAL = 100; // report every 100 files const onFileProcessed = onProgress ? () => { totalProcessed++; if (totalProcessed - lastReported >= PROGRESS_INTERVAL) { lastReported = totalProcessed; onProgress(totalProcessed); } } : undefined; for (const [language, langFiles] of byLanguage) { const queryString = LANGUAGE_QUERIES[language]; if (!queryString) continue; // Track if we need to handle tsx separately const tsxFiles: ParseWorkerInput[] = []; const regularFiles: ParseWorkerInput[] = []; if (language === SupportedLanguages.TypeScript) { for (const f of langFiles) { if (f.path.endsWith('.tsx')) { tsxFiles.push(f); } else { regularFiles.push(f); } } } else { regularFiles.push(...langFiles); } // Process regular files for this language if (regularFiles.length > 0) { if (isLanguageAvailable(language, regularFiles[0].path)) { try { setLanguage(language, regularFiles[0].path); processFileGroup(regularFiles, language, queryString, result, onFileProcessed); } catch { // parser unavailable — skip this language group } } else { result.skippedLanguages[language] = (result.skippedLanguages[language] || 0) + regularFiles.length; } } // Process tsx files separately (different grammar) if (tsxFiles.length > 0) { if (isLanguageAvailable(language, tsxFiles[0].path)) { try { setLanguage(language, tsxFiles[0].path); processFileGroup(tsxFiles, language, queryString, result, onFileProcessed); } catch { // parser unavailable — skip this language group } } else { result.skippedLanguages[language] = (result.skippedLanguages[language] || 0) + tsxFiles.length; } } } return result; }; // ============================================================================ // PHP Eloquent metadata extraction // ============================================================================ /** Eloquent model properties whose array values are worth indexing */ const ELOQUENT_ARRAY_PROPS = new Set(['fillable', 'casts', 'hidden', 'guarded', 'with', 'appends']); /** Eloquent relationship method names */ const ELOQUENT_RELATIONS = new Set([ 'hasMany', 'hasOne', 'belongsTo', 'belongsToMany', 'morphTo', 'morphMany', 'morphOne', 'morphToMany', 'morphedByMany', 'hasManyThrough', 'hasOneThrough', ]); function findDescendant(node: any, type: string): any { if (node.type === type) return node; for (const child of (node.children ?? [])) { const found = findDescendant(child, type); if (found) return found; } return null; } function extractStringContent(node: any): string | null { if (!node) return null; const content = node.children?.find((c: any) => c.type === 'string_content'); if (content) return content.text; if (node.type === 'string_content') return node.text; return null; } /** * For a PHP property_declaration node, extract array values as a description string. * Returns null if not an Eloquent model property or no array values found. */ function extractPhpPropertyDescription(propName: string, propDeclNode: any): string | null { if (!ELOQUENT_ARRAY_PROPS.has(propName)) return null; const arrayNode = findDescendant(propDeclNode, 'array_creation_expression'); if (!arrayNode) return null; const items: string[] = []; for (const child of (arrayNode.children ?? [])) { if (child.type !== 'array_element_initializer') continue; const children = child.children ?? []; const arrowIdx = children.findIndex((c: any) => c.type === '=>'); if (arrowIdx !== -1) { // key => value pair (used in $casts) const key = extractStringContent(children[arrowIdx - 1]); const val = extractStringContent(children[arrowIdx + 1]); if (key && val) items.push(`${key}:${val}`); } else { // Simple value (used in $fillable, $hidden, etc.) const val = extractStringContent(children[0]); if (val) items.push(val); } } return items.length > 0 ? items.join(', ') : null; } /** * For a PHP method_declaration node, detect if it defines an Eloquent relationship. * Returns description like "hasMany(Post)" or null. */ function extractEloquentRelationDescription(methodNode: any): string | null { function findRelationCall(node: any): any { if (node.type === 'member_call_expression') { const children = node.children ?? []; const objectNode = children.find((c: any) => c.type === 'variable_name' && c.text === '$this'); const nameNode = children.find((c: any) => c.type === 'name'); if (objectNode && nameNode && ELOQUENT_RELATIONS.has(nameNode.text)) return node; } for (const child of (node.children ?? [])) { const found = findRelationCall(child); if (found) return found; } return null; } const callNode = findRelationCall(methodNode); if (!callNode) return null; const relType = callNode.children?.find((c: any) => c.type === 'name')?.text; const argsNode = callNode.children?.find((c: any) => c.type === 'arguments'); let targetModel: string | null = null; if (argsNode) { const firstArg = argsNode.children?.find((c: any) => c.type === 'argument'); if (firstArg) { const classConstant = firstArg.children?.find((c: any) => c.type === 'class_constant_access_expression' ); if (classConstant) { targetModel = classConstant.children?.find((c: any) => c.type === 'name')?.text ?? null; } } } if (relType && targetModel) return `${relType}(${targetModel})`; if (relType) return relType; return null; } // ============================================================================ // Laravel Route Extraction (procedural AST walk) // ============================================================================ interface RouteGroupContext { middleware: string[]; prefix: string | null; controller: string | null; } const ROUTE_HTTP_METHODS = new Set([ 'get', 'post', 'put', 'patch', 'delete', 'options', 'any', 'match', ]); const ROUTE_RESOURCE_METHODS = new Set(['resource', 'apiResource']); const RESOURCE_ACTIONS = ['index', 'create', 'store', 'show', 'edit', 'update', 'destroy']; const API_RESOURCE_ACTIONS = ['index', 'store', 'show', 'update', 'destroy']; /** Check if node is a scoped_call_expression with object 'Route' */ function isRouteStaticCall(node: any): boolean { if (node.type !== 'scoped_call_expression') return false; const obj = node.childForFieldName?.('object') ?? node.children?.[0]; return obj?.text === 'Route'; } /** Get the method name from a scoped_call_expression or member_call_expression */ function getCallMethodName(node: any): string | null { const nameNode = node.childForFieldName?.('name') ?? node.children?.find((c: any) => c.type === 'name'); return nameNode?.text ?? null; } /** Get the arguments node from a call expression */ function getArguments(node: any): any { return node.children?.find((c: any) => c.type === 'arguments') ?? null; } /** Find the closure body inside arguments */ function findClosureBody(argsNode: any): any | null { if (!argsNode) return null; for (const child of argsNode.children ?? []) { if (child.type === 'argument') { for (const inner of child.children ?? []) { if (inner.type === 'anonymous_function' || inner.type === 'arrow_function') { return inner.childForFieldName?.('body') ?? inner.children?.find((c: any) => c.type === 'compound_statement'); } } } if (child.type === 'anonymous_function' || child.type === 'arrow_function') { return child.childForFieldName?.('body') ?? child.children?.find((c: any) => c.type === 'compound_statement'); } } return null; } /** Extract first string argument from arguments node */ function extractFirstStringArg(argsNode: any): string | null { if (!argsNode) return null; for (const child of argsNode.children ?? []) { const target = child.type === 'argument' ? child.children?.[0] : child; if (!target) continue; if (target.type === 'string' || target.type === 'encapsed_string') { return extractStringContent(target); } } return null; } /** Extract middleware from arguments — handles string or array */ function extractMiddlewareArg(argsNode: any): string[] { if (!argsNode) return []; for (const child of argsNode.children ?? []) { const target = child.type === 'argument' ? child.children?.[0] : child; if (!target) continue; if (target.type === 'string' || target.type === 'encapsed_string') { const val = extractStringContent(target); return val ? [val] : []; } if (target.type === 'array_creation_expression') { const items: string[] = []; for (const el of target.children ?? []) { if (el.type === 'array_element_initializer') { const str = el.children?.find((c: any) => c.type === 'string' || c.type === 'encapsed_string'); const val = str ? extractStringContent(str) : null; if (val) items.push(val); } } return items; } } return []; } /** Extract Controller::class from arguments */ function extractClassArg(argsNode: any): string | null { if (!argsNode) return null; for (const child of argsNode.children ?? []) { const target = child.type === 'argument' ? child.children?.[0] : child; if (target?.type === 'class_constant_access_expression') { return target.children?.find((c: any) => c.type === 'name')?.text ?? null; } } return null; } /** Extract controller class name from arguments: [Controller::class, 'method'] or 'Controller@method' */ function extractControllerTarget(argsNode: any): { controller: string | null; method: string | null } { if (!argsNode) return { controller: null, method: null }; const args: any[] = []; for (const child of argsNode.children ?? []) { if (child.type === 'argument') args.push(child.children?.[0]); else if (child.type !== '(' && child.type !== ')' && child.type !== ',') args.push(child); } // Second arg is the handler const handlerNode = args[1]; if (!handlerNode) return { controller: null, method: null }; // Array syntax: [UserController::class, 'index'] if (handlerNode.type === 'array_creation_expression') { let controller: string | null = null; let method: string | null = null; const elements: any[] = []; for (const el of handlerNode.children ?? []) { if (el.type === 'array_element_initializer') elements.push(el); } if (elements[0]) { const classAccess = findDescendant(elements[0], 'class_constant_access_expression'); if (classAccess) { controller = classAccess.children?.find((c: any) => c.type === 'name')?.text ?? null; } } if (elements[1]) { const str = findDescendant(elements[1], 'string'); method = str ? extractStringContent(str) : null; } return { controller, method }; } // String syntax: 'UserController@index' if (handlerNode.type === 'string' || handlerNode.type === 'encapsed_string') { const text = extractStringContent(handlerNode); if (text?.includes('@')) { const [controller, method] = text.split('@'); return { controller, method }; } } // Class reference: UserController::class (invokable controller) if (handlerNode.type === 'class_constant_access_expression') { const controller = handlerNode.children?.find((c: any) => c.type === 'name')?.text ?? null; return { controller, method: '__invoke' }; } return { controller: null, method: null }; } interface ChainedRouteCall { isRouteFacade: boolean; terminalMethod: string; attributes: { method: string; argsNode: any }[]; terminalArgs: any; node: any; } /** * Unwrap a chained call like Route::middleware('auth')->prefix('api')->group(fn) */ function unwrapRouteChain(node: any): ChainedRouteCall | null { if (node.type !== 'member_call_expression') return null; const terminalMethod = getCallMethodName(node); if (!terminalMethod) return null; const terminalArgs = getArguments(node); const attributes: { method: string; argsNode: any }[] = []; let current = node.children?.[0]; while (current) { if (current.type === 'member_call_expression') { const method = getCallMethodName(current); const args = getArguments(current); if (method) attributes.unshift({ method, argsNode: args }); current = current.children?.[0]; } else if (current.type === 'scoped_call_expression') { const obj = current.childForFieldName?.('object') ?? current.children?.[0]; if (obj?.text !== 'Route') return null; const method = getCallMethodName(current); const args = getArguments(current); if (method) attributes.unshift({ method, argsNode: args }); return { isRouteFacade: true, terminalMethod, attributes, terminalArgs, node }; } else { break; } } return null; } /** Parse Route::group(['middleware' => ..., 'prefix' => ...], fn) array syntax */ function parseArrayGroupArgs(argsNode: any): RouteGroupContext { const ctx: RouteGroupContext = { middleware: [], prefix: null, controller: null }; if (!argsNode) return ctx; for (const child of argsNode.children ?? []) { const target = child.type === 'argument' ? child.children?.[0] : child; if (target?.type === 'array_creation_expression') { for (const el of target.children ?? []) { if (el.type !== 'array_element_initializer') continue; const children = el.children ?? []; const arrowIdx = children.findIndex((c: any) => c.type === '=>'); if (arrowIdx === -1) continue; const key = extractStringContent(children[arrowIdx - 1]); const val = children[arrowIdx + 1]; if (key === 'middleware') { if (val?.type === 'string') { const s = extractStringContent(val); if (s) ctx.middleware.push(s); } else if (val?.type === 'array_creation_expression') { for (const item of val.children ?? []) { if (item.type === 'array_element_initializer') { const str = item.children?.find((c: any) => c.type === 'string'); const s = str ? extractStringContent(str) : null; if (s) ctx.middleware.push(s); } } } } else if (key === 'prefix') { ctx.prefix = extractStringContent(val) ?? null; } else if (key === 'controller') { if (val?.type === 'class_constant_access_expression') { ctx.controller = val.children?.find((c: any) => c.type === 'name')?.text ?? null; } } } } } return ctx; } function extractLaravelRoutes(tree: any, filePath: string): ExtractedRoute[] { const routes: ExtractedRoute[] = []; function resolveStack(stack: RouteGroupContext[]): { middleware: string[]; prefix: string | null; controller: string | null } { const middleware: string[] = []; let prefix: string | null = null; let controller: string | null = null; for (const ctx of stack) { middleware.push(...ctx.middleware); if (ctx.prefix) prefix = prefix ? `${prefix}/${ctx.prefix}`.replace(/\/+/g, '/') : ctx.prefix; if (ctx.controller) controller = ctx.controller; } return { middleware, prefix, controller }; } function emitRoute( httpMethod: string, argsNode: any, lineNumber: number, groupStack: RouteGroupContext[], chainAttrs: { method: string; argsNode: any }[], ) { const effective = resolveStack(groupStack); for (const attr of chainAttrs) { if (attr.method === 'middleware') effective.middleware.push(...extractMiddlewareArg(attr.argsNode)); if (attr.method === 'prefix') { const p = extractFirstStringArg(attr.argsNode); if (p) effective.prefix = effective.prefix ? `${effective.prefix}/${p}` : p; } if (attr.method === 'controller') { const cls = extractClassArg(attr.argsNode); if (cls) effective.controller = cls; } } const routePath = extractFirstStringArg(argsNode); if (ROUTE_RESOURCE_METHODS.has(httpMethod)) { const target = extractControllerTarget(argsNode); const actions = httpMethod === 'apiResource' ? API_RESOURCE_ACTIONS : RESOURCE_ACTIONS; for (const action of actions) { routes.push({ filePath, httpMethod, routePath, controllerName: target.controller ?? effective.controller, methodName: action, middleware: [...effective.middleware], prefix: effective.prefix, lineNumber, }); } } else { const target = extractControllerTarget(argsNode); routes.push({ filePath, httpMethod, routePath, controllerName: target.controller ?? effective.controller, methodName: target.method, middleware: [...effective.middleware], prefix: effective.prefix, lineNumber, }); } } function walk(node: any, groupStack: RouteGroupContext[]) { // Case 1: Simple Route::get(...), Route::post(...), etc. if (isRouteStaticCall(node)) { const method = getCallMethodName(node); if (method && (ROUTE_HTTP_METHODS.has(method) || ROUTE_RESOURCE_METHODS.has(method))) { emitRoute(method, getArguments(node), node.startPosition.row, groupStack, []); return; } if (method === 'group') { const argsNode = getArguments(node); const groupCtx = parseArrayGroupArgs(argsNode); const body = findClosureBody(argsNode); if (body) { groupStack.push(groupCtx); walkChildren(body, groupStack); groupStack.pop(); } return; } } // Case 2: Fluent chain — Route::middleware(...)->group(...) or Route::middleware(...)->get(...) const chain = unwrapRouteChain(node); if (chain) { if (chain.terminalMethod === 'group') { const groupCtx: RouteGroupContext = { middleware: [], prefix: null, controller: null }; for (const attr of chain.attributes) { if (attr.method === 'middleware') groupCtx.middleware.push(...extractMiddlewareArg(attr.argsNode)); if (attr.method === 'prefix') groupCtx.prefix = extractFirstStringArg(attr.argsNode); if (attr.method === 'controller') groupCtx.controller = extractClassArg(attr.argsNode); } const body = findClosureBody(chain.terminalArgs); if (body) { groupStack.push(groupCtx); walkChildren(body, groupStack); groupStack.pop(); } return; } if (ROUTE_HTTP_METHODS.has(chain.terminalMethod) || ROUTE_RESOURCE_METHODS.has(chain.terminalMethod)) { emitRoute(chain.terminalMethod, chain.terminalArgs, node.startPosition.row, groupStack, chain.attributes); return; } } // Default: recurse into children walkChildren(node, groupStack); } function walkChildren(node: any, groupStack: RouteGroupContext[]) { for (const child of node.children ?? []) { walk(child, groupStack); } } walk(tree.rootNode, []); return routes; } const processFileGroup = ( files: ParseWorkerInput[], language: SupportedLanguages, queryString: string, result: ParseWorkerResult, onFileProcessed?: () => void, ): void => { let query: any; try { const lang = parser.getLanguage(); query = new Parser.Query(lang, queryString); } catch (err) { const message = `Query compilation failed for ${language}: ${err instanceof Error ? err.message : String(err)}`; if (parentPort) { parentPort.postMessage({ type: 'warning', message }); } else { console.warn(message); } return; } for (const file of files) { // Skip files larger than the max tree-sitter buffer (32 MB) if (file.content.length > TREE_SITTER_MAX_BUFFER) continue; let tree; try { tree = parser.parse(file.content, undefined, { bufferSize: getTreeSitterBufferSize(file.content.length) }); } catch (err) { console.warn(`Failed to parse file ${file.path}: ${err instanceof Error ? err.message : String(err)}`); continue; } result.fileCount++; onFileProcessed?.(); let matches; try { matches = query.matches(tree.rootNode); } catch (err) { console.warn(`Query execution failed for ${file.path}: ${err instanceof Error ? err.message : String(err)}`); continue; } // Pre-pass: extract heritage from query matches to build parentMap for buildTypeEnv. // Heritage edges (EXTENDS/IMPLEMENTS) are created by heritage-processor which runs // in PARALLEL with call-processor, so the graph edges don't exist when buildTypeEnv // runs. This pre-pass makes parent class information available for type resolution. const fileParentMap = new Map<string, string[]>(); for (const match of matches) { const captureMap: Record<string, any> = {}; for (const c of match.captures) { captureMap[c.name] = c.node; } if (captureMap['heritage.class'] && captureMap['heritage.extends']) { const className: string = captureMap['heritage.class'].text; const parentName: string = captureMap['heritage.extends'].text; // Skip Go named fields (only anonymous fields are struct embedding) const extendsNode = captureMap['heritage.extends']; const fieldDecl = extendsNode.parent; if (fieldDecl?.type === 'field_declaration' && fieldDecl.childForFieldName('name')) continue; let parents = fileParentMap.get(className); if (!parents) { parents = []; fileParentMap.set(className, parents); } if (!parents.includes(parentName)) parents.push(parentName); } } // Build per-file type environment + constructor bindings in a single AST walk. // Constructor bindings are verified against the SymbolTable in processCallsFromExtracted. const parentMap: ReadonlyMap<string, readonly string[]> = fileParentMap; const typeEnv = buildTypeEnv(tree, language, { parentMap }); const callRouter = callRouters[language]; if (typeEnv.constructorBindings.length > 0) { result.constructorBindings.push({ filePath: file.path, bindings: [...typeEnv.constructorBindings] }); } for (const match of matches) { const captureMap: Record<string, any> = {}; for (const c of match.captures) { captureMap[c.name] = c.node; } // Extract import paths before skipping if (captureMap['import'] && captureMap['import.source']) { const rawImportPath = language === SupportedLanguages.Kotlin ? appendKotlinWildcard(captureMap['import.source'].text.replace(/['"<>]/g, ''), captureMap['import']) : captureMap['import.source'].text.replace(/['"<>]/g, ''); const namedBindings = extractNamedBindings(captureMap['import'], language); result.imports.push({ filePath: file.path, rawImportPath, language: language, ...(namedBindings ? { namedBindings } : {}), }); continue; } // Extract assignment sites (field write access) if (captureMap['assignment'] && captureMap['assignment.receiver'] && captureMap['assignment.property']) { const receiverText = captureMap['assignment.receiver'].text; const propertyName = captureMap['assignment.property'].text; if (receiverText && propertyName) { const srcId = findEnclosingFunctionId(captureMap['assignment'], file.path) || generateId('File', file.path); let receiverTypeName: string | undefined; if (typeEnv) { receiverTypeName = typeEnv.lookup(receiverText, captureMap['assignment']) ?? undefined; } result.assignments.push({ filePath: file.path, sourceId: srcId, receiverText, propertyName, ...(receiverTypeName ? { receiverTypeName } : {}), }); } if (!captureMap['call']) continue; } // Extract call sites if (captureMap['call']) { const callNameNode = captureMap['call.name']; if (callNameNode) { const calledName = callNameNode.text; // Dispatch: route language-specific calls (heritage, properties, imports) const routed = callRouter(calledName, captureMap['call']); if (routed) { if (routed.kind === 'skip') continue; if (routed.kind === 'import') { result.imports.push({ filePath: file.path, rawImportPath: routed.importPath, language, }); continue; } if (routed.kind === 'heritage') { for (const item of routed.items) { result.heritage.push({ filePath: file.path, className: item.enclosingClass, parentName: item.mixinName, kind: item.heritageKind, }); } continue; } if (routed.kind === 'properties') { const propEnclosingClassId = findEnclosingClassId(captureMap['call'], file.path); for (const item of routed.items) { const nodeId = generateId('Property', `${file.path}:${item.propName}`); result.nodes.push({ id: nodeId, label: 'Property', properties: { name: item.propName, filePath: file.path, startLine: item.startLine, endLine: item.endLine, language, isExported: true, description: item.accessorType, }, }); result.symbols.push({ filePath: file.path, name: item.propName, nodeId, type: 'Property', ...(propEnclosingClassId ? { ownerId: propEnclosingClassId } : {}), ...(item.declaredType ? { declaredType: item.declaredType } : {}), }); const fileId = generateId('File', file.path); const relId = generateId('DEFINES', `${fileId}->${nodeId}`); result.relationships.push({ id: relId, sourceId: fileId, targetId: nodeId, type: 'DEFINES', confidence: 1.0, reason: '', }); if (propEnclosingClassId) { result.relationships.push({ id: generateId('HAS_PROPERTY', `${propEnclosingClassId}->${nodeId}`), sourceId: propEnclosingClassId, targetId: nodeId, type: 'HAS_PROPERTY', confidence: 1.0, reason: '', }); } } continue; } // kind === 'call' — fall through to normal call processing below } if (!isBuiltInOrNoise(calledName)) { const callNode = captureMap['call']; const sourceId = findEnclosingFunctionId(callNode, file.path) || generateId('File', file.path); const callForm = inferCallForm(callNode, callNameNode); let receiverName = callForm === 'member' ? extractReceiverName(callNameNode) : undefined; let receiverTypeName = receiverName ? typeEnv.lookup(receiverName, callNode) : undefined; let receiverMixedChain: MixedChainStep[] | undefined; // When the receiver is a complex expression (call chain, field chain, or mixed), // extractReceiverName returns undefined. Walk the receiver node to build a unified // mixed chain for deferred resolution in processCallsFromExtracted. if (callForm === 'member' && receiverName === undefined && !receiverTypeName) { const receiverNode = extractReceiverNode(callNameNode); if (receiverNode) { const extracted = extractMixedChain(receiverNode); if (extracted && extracted.chain.length > 0) { receiverMixedChain = extracted.chain; receiverName = extracted.baseReceiverName; // Try the type environment immediately for the base receiver // (covers explicitly-typed locals and annotated parameters). if (receiverName) { receiverTypeName = typeEnv.lookup(receiverName, callNode); } } } } result.calls.push({ filePath: file.path, calledName, sourceId, argCount: countCallArguments(callNode), ...(callForm !== undefined ? { callForm } : {}), ...(receiverName !== undefined ? { receiverName } : {}), ...(receiverTypeName !== undefined ? { receiverTypeName } : {}), ...(receiverMixedChain !== undefined ? { receiverMixedChain } : {}), }); } } continue; } // Extract heritage (extends/implements) if (captureMap['heritage.class']) { if (captureMap['heritage.extends']) { // Go struct embedding: the query matches ALL field_declarations with // type_identifier, but only anonymous fields (no name) are embedded. // Named fields like `Breed string` also match — skip them. const extendsNode = captureMap['heritage.extends']; const fieldDecl = extendsNode.parent; const isNamedField = fieldDecl?.type === 'field_declaration' && fieldDecl.childForFieldName('name'); if (!isNamedField) { result.heritage.push({ filePath: file.path, className: captureMap['heritage.class'].text, parentName: captureMap['heritage.extends'].text, kind: 'extends', }); } } if (captureMap['heritage.implements']) { result.heritage.push({ filePath: file.path, className: captureMap['heritage.class'].text, parentName: captureMap['heritage.implements'].text, kind: 'implements', }); } if (captureMap['heritage.trait']) { result.heritage.push({ filePath: file.path, className: captureMap['heritage.class'].text, parentName: captureMap['heritage.trait'].text, kind: 'trait-impl', }); } if (captureMap['heritage.extends'] || captureMap['heritage.implements'] || captureMap['heritage.trait']) { continue; } } const nodeLabel = getLabelFromCaptures(captureMap); if (!nodeLabel) continue; // C/C++: @definition.function is broad and also matches inline class methods (inside // a class/struct body). Those are already captured by @definition.method, so skip // the duplicate Function entry to prevent double-indexing in globalIndex. if ( (language === SupportedLanguages.CPlusPlus || language === SupportedLanguages.C) && nodeLabel === 'Function' ) { let ancestor = captureMap['definition.function']?.parent; while (ancestor) { if (ancestor.type === 'class_specifier' || ancestor.type === 'struct_specifier') { break; // inside a class body — duplicate of @definition.method } ancestor = ancestor.parent; } if (ancestor) continue; // found a class/struct ancestor → skip } const nameNode = captureMap['name']; // Synthesize name for constructors without explicit @name capture (e.g. Swift init) if (!nameNode && nodeLabel !== 'Constructor') continue; const nodeName = nameNode ? nameNode.text : 'init'; const definitionNode = getDefinitionNodeFromCaptures(captureMap); const startLine = definitionNode ? definitionNode.startPosition.row : (nameNode ? nameNode.startPosition.row : 0); const nodeId = generateId(nodeLabel, `${file.path}:${nodeName}`); let description: string | undefined; if (language === SupportedLanguages.PHP) { if (nodeLabel === 'Property' && captureMap['definition.property']) { description = extractPhpPropertyDescription(nodeName, captureMap['definition.property']) ?? undefined; } else if (nodeLabel === 'Method' && captureMap['definition.method']) { description = extractEloquentRelationDescription(captureMap['definition.method']) ?? undefined; } } const frameworkHint = definitionNode ? detectFrameworkFromAST(language, (definitionNode.text || '').slice(0, 300)) : null; let parameterCount: number | undefined; let requiredParameterCount: number | undefined; let parameterTypes: string[] | undefined; let returnType: string | undefined; let declaredType: string | undefined; if (nodeLabel === 'Function' || nodeLabel === 'Method' || nodeLabel === 'Constructor') { const sig = extractMethodSignature(definitionNode); parameterCount = sig.parameterCount; requiredParameterCount = sig.requiredParameterCount; parameterTypes = sig.parameterTypes; returnType = sig.returnType; // Language-specific return type fallback (e.g. Ruby YARD @return [Type]) // Also upgrades uninformative AST types like PHP `array` with PHPDoc `@return User[]` if ((!returnType || returnType === 'array' || returnType === 'iterable') && definitionNode) { const tc = typeConfigs[language as keyof typeof typeConfigs]; if (tc?.extractReturnType) { const docReturn = tc.extractReturnType(definitionNode); if (docReturn) returnType = docReturn; } } } else if (nodeLabel === 'Property' && definitionNode) { // Extract the declared type for property/field nodes. // Walk the definition node for type annotation children. declaredType = extractPropertyDeclaredType(definitionNode); } result.nodes.push({ id: nodeId, label: nodeLabel, properties: { name: nodeName, filePath: file.path, startLine: definitionNode ? definitionNode.startPosition.row : startLine, endLine: definitionNode ? definitionNode.endPosition.row : startLine, language: language, isExported: isNodeExported(nameNode || definitionNode, nodeName, language), ...(frameworkHint ? { astFrameworkMultiplier: frameworkHint.entryPointMultiplier, astFrameworkReason: frameworkHint.reason, } : {}), ...(description !== undefined ? { description } : {}), ...(parameterCount !== undefined ? { parameterCount } : {}), ...(requiredParameterCount !== undefined ? { requiredParameterCount } : {}), ...(parameterTypes !== undefined ? { parameterTypes } : {}), ...(returnType !== undefined ? { returnType } : {}), }, }); // Compute enclosing class for Method/Constructor/Property/Function — used for both ownerId and HAS_METHOD // Function is included because Kotlin/Rust/Python capture class methods as Function nodes const needsOwner = nodeLabel === 'Method' || nodeLabel === 'Constructor' || nodeLabel === 'Property' || nodeLabel === 'Function'; const enclosingClassId = needsOwner ? findEnclosingClassId(nameNode || definitionNode, file.path) : null; result.symbols.push({ filePath: file.path, name: nodeName, nodeId, type: nodeLabel, ...(parameterCount !== undefined ? { parameterCount } : {}), ...(requiredParameterCount !== undefined ? { requiredParameterCount } : {}), ...(parameterTypes !== undefined ? { parameterTypes } : {}), ...(returnType !== undefined ? { returnType } : {}), ...(declaredType !== undefined ? { declaredType } : {}), ...(enclosingClassId ? { ownerId: enclosingClassId } : {}), }); const fileId = generateId('File', file.path); const relId = generateId('DEFINES', `${fileId}->${nodeId}`); result.relationships.push({ id: relId, sourceId: fileId, targetId: nodeId, type: 'DEFINES', confidence: 1.0, reason: '', }); // ── HAS_METHOD / HAS_PROPERTY: link member to enclosing class ── if (enclosingClassId) { const memberEdgeType = nodeLabel === 'Property' ? 'HAS_PROPERTY' : 'HAS_METHOD'; result.relationships.push({ id: generateId(memberEdgeType, `${enclosingClassId}->${nodeId}`), sourceId: enclosingClassId, targetId: nodeId, type: memberEdgeType, confidence: 1.0, reason: '', }); } } // Extract Laravel routes from route files via procedural AST walk if (language === SupportedLanguages.PHP && (file.path.includes('/routes/') || file.path.startsWith('routes/')) && file.path.endsWith('.php')) { const extractedRoutes = extractLaravelRoutes(tree, file.path); result.routes.push(...extractedRoutes); } } }; // ============================================================================ // Worker message handler — supports sub-batch streaming // ============================================================================ /** Accumulated result across sub-batches */ let accumulated: ParseWorkerResult = { nodes: [], relationships: [], symbols: [], imports: [], calls: [], assignments: [], heritage: [], routes: [], constructorBindings: [], skippedLanguages: {}, fileCount: 0, }; let cumulativeProcessed = 0; const mergeResult = (target: ParseWorkerResult, src: ParseWorkerResult) => { target.nodes.push(...src.nodes); target.relationships.push(...src.relationships); target.symbols.push(...src.symbols); target.imports.push(...src.imports); target.calls.push(...src.calls); target.assignments.push(...src.assignments); target.heritage.push(...src.heritage); target.routes.push(...src.routes); target.constructorBindings.push(...src.constructorBindings); for (const [lang, count] of Object.entries(src.skippedLanguages)) { target.skippedLanguages[lang] = (target.skippedLanguages[lang] || 0) + count; } target.fileCount += src.fileCount; }; parentPort!.on('message', (msg: any) => { try { // Sub-batch mode: { type: 'sub-batch', files: [...] } if (msg && msg.type === 'sub-batch') { const result = processBatch(msg.files, (filesProcessed) => { parentPort!.postMessage({ type: 'progress', filesProcessed: cumulativeProcessed + filesProcessed }); }); cumulativeProcessed += result.fileCount; mergeResult(accumulated, result); // Signal ready for next sub-batch parentPort!.postMessage({ type: 'sub-batch-done' }); return; } // Flush: send accumulated results if (msg && msg.type === 'flush') { parentPort!.postMessage({ type: 'result', data: accumulated }); // Reset for potential reuse accumulated = { nodes: [], relationships: [], symbols: [], imports: [], calls: [], assignments: [], heritage: [], routes: [], constructorBindings: [], skippedLanguages: {}, fileCount: 0 }; cumulativeProcessed = 0; return; } // Legacy single-message mode (backward compat): array of files if (Array.isArray(msg)) { const result = processBatch(msg, (filesProcessed) => { parentPort!.postMessage({ type: 'progress', filesProcessed }); }); parentPort!.postMessage({ type: 'result', data: result }); return; } } catch (err) { const message = err instanceof Error ? err.message : String(err); parentPort!.postMessage({ type: 'error', error: message }); } }); ================================================ FILE: gitnexus/src/core/ingestion/workers/worker-pool.ts ================================================ import { Worker } from 'node:worker_threads'; import os from 'node:os'; import fs from 'node:fs'; import { fileURLToPath } from 'node:url'; export interface WorkerPool { /** * Dispatch items across workers. Items are split into chunks (one per worker), * each worker processes its chunk via sub-batches to limit peak memory, * and results are concatenated back in order. */ dispatch<TInput, TResult>(items: TInput[], onProgress?: (filesProcessed: number) => void): Promise<TResult[]>; /** Terminate all workers. Must be called when done. */ terminate(): Promise<void>; /** Number of workers in the pool */ readonly size: number; } /** * Max files to send to a worker in a single postMessage. * Keeps structured-clone memory bounded per sub-batch. */ const SUB_BATCH_SIZE = 1500; /** Per sub-batch timeout. If a single sub-batch takes longer than this, * likely a pathological file (e.g. minified 50MB JS). Fail fast. */ const SUB_BATCH_TIMEOUT_MS = 30_000; /** * Create a pool of worker threads. */ export const createWorkerPool = (workerUrl: URL, poolSize?: number): WorkerPool => { // Validate worker script exists before spawning to prevent uncaught // MODULE_NOT_FOUND crashes in worker threads (e.g. when running from src/ via vitest) const workerPath = fileURLToPath(workerUrl); if (!fs.existsSync(workerPath)) { throw new Error(`Worker script not found: ${workerPath}`); } const size = poolSize ?? Math.min(8, Math.max(1, os.cpus().length - 1)); const workers: Worker[] = []; for (let i = 0; i < size; i++) { workers.push(new Worker(workerUrl)); } const dispatch = <TInput, TResult>(items: TInput[], onProgress?: (filesProcessed: number) => void): Promise<TResult[]> => { if (items.length === 0) return Promise.resolve([]); const chunkSize = Math.ceil(items.length / size); const chunks: TInput[][] = []; for (let i = 0; i < items.length; i += chunkSize) { chunks.push(items.slice(i, i + chunkSize)); } const workerProgress = new Array(chunks.length).fill(0); const promises = chunks.map((chunk, i) => { const worker = workers[i]; return new Promise<TResult>((resolve, reject) => { let settled = false; let subBatchTimer: ReturnType<typeof setTimeout> | null = null; const cleanup = () => { if (subBatchTimer) clearTimeout(subBatchTimer); worker.removeListener('message', handler); worker.removeListener('error', errorHandler); worker.removeListener('exit', exitHandler); }; const resetSubBatchTimer = () => { if (subBatchTimer) clearTimeout(subBatchTimer); subBatchTimer = setTimeout(() => { if (!settled) { settled = true; cleanup(); reject(new Error(`Worker ${i} sub-batch timed out after ${SUB_BATCH_TIMEOUT_MS / 1000}s (chunk: ${chunk.length} items).`)); } }, SUB_BATCH_TIMEOUT_MS); }; let subBatchIdx = 0; const sendNextSubBatch = () => { const start = subBatchIdx * SUB_BATCH_SIZE; if (start >= chunk.length) { worker.postMessage({ type: 'flush' }); return; } const subBatch = chunk.slice(start, start + SUB_BATCH_SIZE); subBatchIdx++; resetSubBatchTimer(); worker.postMessage({ type: 'sub-batch', files: subBatch }); }; const handler = (msg: any) => { if (settled) return; if (msg && msg.type === 'progress') { workerProgress[i] = msg.filesProcessed; if (onProgress) { const total = workerProgress.reduce((a, b) => a + b, 0); onProgress(total); } } else if (msg && msg.type === 'sub-batch-done') { sendNextSubBatch(); } else if (msg && msg.type === 'error') { settled = true; cleanup(); reject(new Error(`Worker ${i} error: ${msg.error}`)); } else if (msg && msg.type === 'result') { settled = true; cleanup(); resolve(msg.data); } else { settled = true; cleanup(); resolve(msg); } }; const errorHandler = (err: any) => { if (!settled) { settled = true; cleanup(); reject(err); } }; const exitHandler = (code: number) => { if (!settled) { settled = true; cleanup(); reject(new Error(`Worker ${i} exited with code ${code}. Likely OOM or native addon failure.`)); } }; worker.on('message', handler); worker.once('error', errorHandler); worker.once('exit', exitHandler); sendNextSubBatch(); }); }); return Promise.all(promises); }; const terminate = async (): Promise<void> => { await Promise.all(workers.map(w => w.terminate())); workers.length = 0; }; return { dispatch, terminate, size }; }; ================================================ FILE: gitnexus/src/core/lbug/csv-generator.ts ================================================ /** * CSV Generator for LadybugDB Hybrid Schema * * Streams CSV rows directly to disk files in a single pass over graph nodes. * File contents are lazy-read from disk per-node to avoid holding the entire * repo in RAM. Rows are buffered (FLUSH_EVERY) before writing to minimize * per-row Promise overhead. * * RFC 4180 Compliant: * - Fields containing commas, double quotes, or newlines are enclosed in double quotes * - Double quotes within fields are escaped by doubling them ("") * - All fields are consistently quoted for safety with code content */ import fs from 'fs/promises'; import { createWriteStream, WriteStream } from 'fs'; import path from 'path'; import { KnowledgeGraph, GraphNode, NodeLabel } from '../graph/types.js'; import { NodeTableName } from './schema.js'; /** Flush buffered rows to disk every N rows */ const FLUSH_EVERY = 500; // ============================================================================ // CSV ESCAPE UTILITIES // ============================================================================ export const sanitizeUTF8 = (str: string): string => { return str .replace(/\r\n/g, '\n') .replace(/\r/g, '\n') .replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, '') .replace(/[\uD800-\uDFFF]/g, '') .replace(/[\uFFFE\uFFFF]/g, ''); }; export const escapeCSVField = (value: string | number | undefined | null): string => { if (value === undefined || value === null) return '""'; let str = String(value); str = sanitizeUTF8(str); return `"${str.replace(/"/g, '""')}"`; }; export const escapeCSVNumber = (value: number | undefined | null, defaultValue: number = -1): string => { if (value === undefined || value === null) return String(defaultValue); return String(value); }; // ============================================================================ // CONTENT EXTRACTION (lazy — reads from disk on demand) // ============================================================================ export const isBinaryContent = (content: string): boolean => { if (!content || content.length === 0) return false; const sample = content.slice(0, 1000); let nonPrintable = 0; for (let i = 0; i < sample.length; i++) { const code = sample.charCodeAt(i); if ((code < 9) || (code > 13 && code < 32) || code === 127) nonPrintable++; } return (nonPrintable / sample.length) > 0.1; }; /** * LRU content cache — avoids re-reading the same source file for every * symbol defined in it. Sized generously so most files stay cached during * the single-pass node iteration. */ class FileContentCache { private cache = new Map<string, string>(); private accessOrder: string[] = []; private maxSize: number; private repoPath: string; constructor(repoPath: string, maxSize: number = 3000) { this.repoPath = repoPath; this.maxSize = maxSize; } async get(relativePath: string): Promise<string> { if (!relativePath) return ''; const cached = this.cache.get(relativePath); if (cached !== undefined) { // Move to end of accessOrder (LRU promotion) const idx = this.accessOrder.indexOf(relativePath); if (idx !== -1) { this.accessOrder.splice(idx, 1); this.accessOrder.push(relativePath); } return cached; } try { const fullPath = path.join(this.repoPath, relativePath); const content = await fs.readFile(fullPath, 'utf-8'); this.set(relativePath, content); return content; } catch { this.set(relativePath, ''); return ''; } } private set(key: string, value: string) { if (this.cache.size >= this.maxSize) { const oldest = this.accessOrder.shift(); if (oldest) this.cache.delete(oldest); } this.cache.set(key, value); this.accessOrder.push(key); } } const extractContent = async ( node: GraphNode, contentCache: FileContentCache ): Promise<string> => { const filePath = node.properties.filePath; const content = await contentCache.get(filePath); if (!content) return ''; if (node.label === 'Folder') return ''; if (isBinaryContent(content)) return '[Binary file - content not stored]'; if (node.label === 'File') { const MAX_FILE_CONTENT = 10000; return content.length > MAX_FILE_CONTENT ? content.slice(0, MAX_FILE_CONTENT) + '\n... [truncated]' : content; } const startLine = node.properties.startLine; const endLine = node.properties.endLine; if (startLine === undefined || endLine === undefined) return ''; const lines = content.split('\n'); const start = Math.max(0, startLine - 2); const end = Math.min(lines.length - 1, endLine + 2); const snippet = lines.slice(start, end + 1).join('\n'); const MAX_SNIPPET = 5000; return snippet.length > MAX_SNIPPET ? snippet.slice(0, MAX_SNIPPET) + '\n... [truncated]' : snippet; }; // ============================================================================ // BUFFERED CSV WRITER // ============================================================================ class BufferedCSVWriter { private ws: WriteStream; private buffer: string[] = []; rows = 0; constructor(filePath: string, header: string) { this.ws = createWriteStream(filePath, 'utf-8'); // Large repos flush many times — raise listener cap to avoid MaxListenersExceededWarning this.ws.setMaxListeners(50); this.buffer.push(header); } addRow(row: string) { this.buffer.push(row); this.rows++; if (this.buffer.length >= FLUSH_EVERY) { return this.flush(); } return Promise.resolve(); } flush(): Promise<void> { if (this.buffer.length === 0) return Promise.resolve(); const chunk = this.buffer.join('\n') + '\n'; this.buffer.length = 0; return new Promise((resolve, reject) => { this.ws.once('error', reject); const ok = this.ws.write(chunk); if (ok) { this.ws.removeListener('error', reject); resolve(); } else { this.ws.once('drain', () => { this.ws.removeListener('error', reject); resolve(); }); } }); } async finish(): Promise<void> { await this.flush(); return new Promise((resolve, reject) => { this.ws.end(() => resolve()); this.ws.on('error', reject); }); } } // ============================================================================ // STREAMING CSV GENERATION — SINGLE PASS // ============================================================================ export interface StreamedCSVResult { nodeFiles: Map<NodeTableName, { csvPath: string; rows: number }>; relCsvPath: string; relRows: number; } /** * Stream all CSV data directly to disk files. * Iterates graph nodes exactly ONCE — routes each node to the right writer. * File contents are lazy-read from disk with a generous LRU cache. */ export const streamAllCSVsToDisk = async ( graph: KnowledgeGraph, repoPath: string, csvDir: string, ): Promise<StreamedCSVResult> => { // Remove stale CSVs from previous crashed runs, then recreate try { await fs.rm(csvDir, { recursive: true, force: true }); } catch {} await fs.mkdir(csvDir, { recursive: true }); // We open ~30 concurrent write-streams; raise process limit to suppress // MaxListenersExceededWarning (restored after all streams finish). const prevMax = process.getMaxListeners(); process.setMaxListeners(prevMax + 40); const contentCache = new FileContentCache(repoPath); // Create writers for every node type up-front const fileWriter = new BufferedCSVWriter(path.join(csvDir, 'file.csv'), 'id,name,filePath,content'); const folderWriter = new BufferedCSVWriter(path.join(csvDir, 'folder.csv'), 'id,name,filePath'); const codeElementHeader = 'id,name,filePath,startLine,endLine,isExported,content,description'; const functionWriter = new BufferedCSVWriter(path.join(csvDir, 'function.csv'), codeElementHeader); const classWriter = new BufferedCSVWriter(path.join(csvDir, 'class.csv'), codeElementHeader); const interfaceWriter = new BufferedCSVWriter(path.join(csvDir, 'interface.csv'), codeElementHeader); const methodHeader = 'id,name,filePath,startLine,endLine,isExported,content,description,parameterCount,returnType'; const methodWriter = new BufferedCSVWriter(path.join(csvDir, 'method.csv'), methodHeader); const codeElemWriter = new BufferedCSVWriter(path.join(csvDir, 'codeelement.csv'), codeElementHeader); const communityWriter = new BufferedCSVWriter(path.join(csvDir, 'community.csv'), 'id,label,heuristicLabel,keywords,description,enrichedBy,cohesion,symbolCount'); const processWriter = new BufferedCSVWriter(path.join(csvDir, 'process.csv'), 'id,label,heuristicLabel,processType,stepCount,communities,entryPointId,terminalId'); // Multi-language node types share the same CSV shape (no isExported column) const multiLangHeader = 'id,name,filePath,startLine,endLine,content,description'; const MULTI_LANG_TYPES = ['Struct', 'Enum', 'Macro', 'Typedef', 'Union', 'Namespace', 'Trait', 'Impl', 'TypeAlias', 'Const', 'Static', 'Property', 'Record', 'Delegate', 'Annotation', 'Constructor', 'Template', 'Module'] as const; const multiLangWriters = new Map<string, BufferedCSVWriter>(); for (const t of MULTI_LANG_TYPES) { multiLangWriters.set(t, new BufferedCSVWriter(path.join(csvDir, `${t.toLowerCase()}.csv`), multiLangHeader)); } const codeWriterMap: Record<string, BufferedCSVWriter> = { 'Function': functionWriter, 'Class': classWriter, 'Interface': interfaceWriter, 'CodeElement': codeElemWriter, }; const seenFileIds = new Set<string>(); // --- SINGLE PASS over all nodes --- for (const node of graph.iterNodes()) { switch (node.label) { case 'File': { if (seenFileIds.has(node.id)) break; seenFileIds.add(node.id); const content = await extractContent(node, contentCache); await fileWriter.addRow([ escapeCSVField(node.id), escapeCSVField(node.properties.name || ''), escapeCSVField(node.properties.filePath || ''), escapeCSVField(content), ].join(',')); break; } case 'Folder': await folderWriter.addRow([ escapeCSVField(node.id), escapeCSVField(node.properties.name || ''), escapeCSVField(node.properties.filePath || ''), ].join(',')); break; case 'Community': { const keywords = (node.properties as any).keywords || []; const keywordsStr = `[${keywords.map((k: string) => `'${k.replace(/\\/g, '\\\\').replace(/'/g, "''").replace(/,/g, '\\,')}'`).join(',')}]`; await communityWriter.addRow([ escapeCSVField(node.id), escapeCSVField(node.properties.name || ''), escapeCSVField(node.properties.heuristicLabel || ''), keywordsStr, escapeCSVField((node.properties as any).description || ''), escapeCSVField((node.properties as any).enrichedBy || 'heuristic'), escapeCSVNumber(node.properties.cohesion, 0), escapeCSVNumber(node.properties.symbolCount, 0), ].join(',')); break; } case 'Process': { const communities = (node.properties as any).communities || []; const communitiesStr = `[${communities.map((c: string) => `'${c.replace(/'/g, "''")}'`).join(',')}]`; await processWriter.addRow([ escapeCSVField(node.id), escapeCSVField(node.properties.name || ''), escapeCSVField((node.properties as any).heuristicLabel || ''), escapeCSVField((node.properties as any).processType || ''), escapeCSVNumber((node.properties as any).stepCount, 0), escapeCSVField(communitiesStr), escapeCSVField((node.properties as any).entryPointId || ''), escapeCSVField((node.properties as any).terminalId || ''), ].join(',')); break; } case 'Method': { const content = await extractContent(node, contentCache); await methodWriter.addRow([ escapeCSVField(node.id), escapeCSVField(node.properties.name || ''), escapeCSVField(node.properties.filePath || ''), escapeCSVNumber(node.properties.startLine, -1), escapeCSVNumber(node.properties.endLine, -1), node.properties.isExported ? 'true' : 'false', escapeCSVField(content), escapeCSVField((node.properties as any).description || ''), escapeCSVNumber(node.properties.parameterCount, 0), escapeCSVField(node.properties.returnType || ''), ].join(',')); break; } default: { // Code element nodes (Function, Class, Interface, CodeElement) const writer = codeWriterMap[node.label]; if (writer) { const content = await extractContent(node, contentCache); await writer.addRow([ escapeCSVField(node.id), escapeCSVField(node.properties.name || ''), escapeCSVField(node.properties.filePath || ''), escapeCSVNumber(node.properties.startLine, -1), escapeCSVNumber(node.properties.endLine, -1), node.properties.isExported ? 'true' : 'false', escapeCSVField(content), escapeCSVField((node.properties as any).description || ''), ].join(',')); } else { // Multi-language node types (Struct, Impl, Trait, Macro, etc.) const mlWriter = multiLangWriters.get(node.label); if (mlWriter) { const content = await extractContent(node, contentCache); await mlWriter.addRow([ escapeCSVField(node.id), escapeCSVField(node.properties.name || ''), escapeCSVField(node.properties.filePath || ''), escapeCSVNumber(node.properties.startLine, -1), escapeCSVNumber(node.properties.endLine, -1), escapeCSVField(content), escapeCSVField((node.properties as any).description || ''), ].join(',')); } } break; } } } // Finish all node writers const allWriters = [fileWriter, folderWriter, functionWriter, classWriter, interfaceWriter, methodWriter, codeElemWriter, communityWriter, processWriter, ...multiLangWriters.values()]; await Promise.all(allWriters.map(w => w.finish())); // --- Stream relationship CSV --- const relCsvPath = path.join(csvDir, 'relations.csv'); const relWriter = new BufferedCSVWriter(relCsvPath, 'from,to,type,confidence,reason,step'); for (const rel of graph.iterRelationships()) { await relWriter.addRow([ escapeCSVField(rel.sourceId), escapeCSVField(rel.targetId), escapeCSVField(rel.type), escapeCSVNumber(rel.confidence, 1.0), escapeCSVField(rel.reason), escapeCSVNumber((rel as any).step, 0), ].join(',')); } await relWriter.finish(); // Build result map — only include tables that have rows const nodeFiles = new Map<NodeTableName, { csvPath: string; rows: number }>(); const tableMap: [NodeTableName, BufferedCSVWriter][] = [ ['File', fileWriter], ['Folder', folderWriter], ['Function', functionWriter], ['Class', classWriter], ['Interface', interfaceWriter], ['Method', methodWriter], ['CodeElement', codeElemWriter], ['Community', communityWriter], ['Process', processWriter], ...Array.from(multiLangWriters.entries()).map(([name, w]) => [name as NodeTableName, w] as [NodeTableName, BufferedCSVWriter]), ]; for (const [name, writer] of tableMap) { if (writer.rows > 0) { nodeFiles.set(name, { csvPath: path.join(csvDir, `${name.toLowerCase()}.csv`), rows: writer.rows }); } } // Restore original process listener limit process.setMaxListeners(prevMax); return { nodeFiles, relCsvPath, relRows: relWriter.rows }; }; ================================================ FILE: gitnexus/src/core/lbug/lbug-adapter.ts ================================================ import fs from 'fs/promises'; import { createReadStream } from 'fs'; import { createInterface } from 'readline'; import path from 'path'; import lbug from '@ladybugdb/core'; import { KnowledgeGraph } from '../graph/types.js'; import { NODE_TABLES, REL_TABLE_NAME, SCHEMA_QUERIES, EMBEDDING_TABLE_NAME, NodeTableName, } from './schema.js'; import { streamAllCSVsToDisk } from './csv-generator.js'; let db: lbug.Database | null = null; let conn: lbug.Connection | null = null; let currentDbPath: string | null = null; let ftsLoaded = false; /** Expose the current Database for pool adapter reuse in tests. */ export const getDatabase = (): lbug.Database | null => db; // Global session lock for operations that touch module-level lbug globals. // This guarantees no DB switch can happen while an operation is running. let sessionLock: Promise<void> = Promise.resolve(); const runWithSessionLock = async <T>(operation: () => Promise<T>): Promise<T> => { const previous = sessionLock; let release: (() => void) | null = null; sessionLock = new Promise<void>(resolve => { release = resolve; }); await previous; try { return await operation(); } finally { release?.(); } }; const normalizeCopyPath = (filePath: string): string => filePath.replace(/\\/g, '/'); export const initLbug = async (dbPath: string) => { return runWithSessionLock(() => ensureLbugInitialized(dbPath)); }; /** * Execute multiple queries against one repo DB atomically. * While the callback runs, no other request can switch the active DB. */ export const withLbugDb = async <T>(dbPath: string, operation: () => Promise<T>): Promise<T> => { return runWithSessionLock(async () => { await ensureLbugInitialized(dbPath); return operation(); }); }; const ensureLbugInitialized = async (dbPath: string) => { if (conn && currentDbPath === dbPath) { return { db, conn }; } await doInitLbug(dbPath); return { db, conn }; }; const doInitLbug = async (dbPath: string) => { // Different database requested — close the old one first if (conn || db) { try { if (conn) await conn.close(); } catch {} try { if (db) await db.close(); } catch {} conn = null; db = null; currentDbPath = null; ftsLoaded = false; } // LadybugDB stores the database as a single file (not a directory). // If the path already exists, it must be a valid LadybugDB database file. // Remove stale empty directories or files from older versions. try { const stat = await fs.lstat(dbPath); if (stat.isSymbolicLink()) { // Never follow symlinks — just remove the link itself await fs.unlink(dbPath); } else if (stat.isDirectory()) { // Verify path is within expected storage directory before deleting const realPath = await fs.realpath(dbPath); const parentDir = path.dirname(dbPath); const realParent = await fs.realpath(parentDir); if (!realPath.startsWith(realParent + path.sep) && realPath !== realParent) { throw new Error(`Refusing to delete ${dbPath}: resolved path ${realPath} is outside storage directory`); } // Old-style directory database or empty leftover - remove it await fs.rm(dbPath, { recursive: true, force: true }); } // If it's a file, assume it's an existing LadybugDB database - LadybugDB will open it } catch { // Path doesn't exist, which is what LadybugDB wants for a new database } // Ensure parent directory exists const parentDir = path.dirname(dbPath); await fs.mkdir(parentDir, { recursive: true }); db = new lbug.Database(dbPath); conn = new lbug.Connection(db); for (const schemaQuery of SCHEMA_QUERIES) { try { await conn.query(schemaQuery); } catch (err) { // Only ignore "already exists" errors - log everything else const msg = err instanceof Error ? err.message : String(err); if (!msg.includes('already exists')) { console.warn(`⚠️ Schema creation warning: ${msg.slice(0, 120)}`); } } } currentDbPath = dbPath; return { db, conn }; }; export type LbugProgressCallback = (message: string) => void; export const loadGraphToLbug = async ( graph: KnowledgeGraph, repoPath: string, storagePath: string, onProgress?: LbugProgressCallback ) => { if (!conn) { throw new Error('LadybugDB not initialized. Call initLbug first.'); } const log = onProgress || (() => {}); const csvDir = path.join(storagePath, 'csv'); log('Streaming CSVs to disk...'); const csvResult = await streamAllCSVsToDisk(graph, repoPath, csvDir); const validTables = new Set<string>(NODE_TABLES as readonly string[]); const getNodeLabel = (nodeId: string): string => { if (nodeId.startsWith('comm_')) return 'Community'; if (nodeId.startsWith('proc_')) return 'Process'; return nodeId.split(':')[0]; }; // Bulk COPY all node CSVs (sequential — LadybugDB allows only one write txn at a time) const nodeFiles = [...csvResult.nodeFiles.entries()]; const totalSteps = nodeFiles.length + 1; // +1 for relationships let stepsDone = 0; for (const [table, { csvPath, rows }] of nodeFiles) { stepsDone++; log(`Loading nodes ${stepsDone}/${totalSteps}: ${table} (${rows.toLocaleString()} rows)`); const normalizedPath = normalizeCopyPath(csvPath); const copyQuery = getCopyQuery(table, normalizedPath); try { await conn.query(copyQuery); } catch (err) { try { const retryQuery = copyQuery.replace('auto_detect=false)', 'auto_detect=false, IGNORE_ERRORS=true)'); await conn.query(retryQuery); } catch (retryErr) { const retryMsg = retryErr instanceof Error ? retryErr.message : String(retryErr); throw new Error(`COPY failed for ${table}: ${retryMsg.slice(0, 200)}`); } } } // Bulk COPY relationships — split by FROM→TO label pair (LadybugDB requires it) // Stream-read the relation CSV line by line to avoid exceeding V8 max string length let relHeader = ''; const relsByPair = new Map<string, string[]>(); let skippedRels = 0; let totalValidRels = 0; await new Promise<void>((resolve, reject) => { const rl = createInterface({ input: createReadStream(csvResult.relCsvPath, 'utf-8'), crlfDelay: Infinity }); let isFirst = true; rl.on('line', (line) => { if (isFirst) { relHeader = line; isFirst = false; return; } if (!line.trim()) return; const match = line.match(/"([^"]*)","([^"]*)"/); if (!match) { skippedRels++; return; } const fromLabel = getNodeLabel(match[1]); const toLabel = getNodeLabel(match[2]); if (!validTables.has(fromLabel) || !validTables.has(toLabel)) { skippedRels++; return; } const pairKey = `${fromLabel}|${toLabel}`; let list = relsByPair.get(pairKey); if (!list) { list = []; relsByPair.set(pairKey, list); } list.push(line); totalValidRels++; }); rl.on('close', resolve); rl.on('error', reject); }); const insertedRels = totalValidRels; const warnings: string[] = []; if (insertedRels > 0) { log(`Loading edges: ${insertedRels.toLocaleString()} across ${relsByPair.size} types`); let pairIdx = 0; let failedPairEdges = 0; const failedPairLines: string[] = []; for (const [pairKey, lines] of relsByPair) { pairIdx++; const [fromLabel, toLabel] = pairKey.split('|'); const pairCsvPath = path.join(csvDir, `rel_${fromLabel}_${toLabel}.csv`); await fs.writeFile(pairCsvPath, relHeader + '\n' + lines.join('\n'), 'utf-8'); const normalizedPath = normalizeCopyPath(pairCsvPath); const copyQuery = `COPY ${REL_TABLE_NAME} FROM "${normalizedPath}" (from="${fromLabel}", to="${toLabel}", HEADER=true, ESCAPE='"', DELIM=',', QUOTE='"', PARALLEL=false, auto_detect=false)`; if (pairIdx % 5 === 0 || lines.length > 1000) { log(`Loading edges: ${pairIdx}/${relsByPair.size} types (${fromLabel} -> ${toLabel})`); } try { await conn.query(copyQuery); } catch (err) { try { const retryQuery = copyQuery.replace('auto_detect=false)', 'auto_detect=false, IGNORE_ERRORS=true)'); await conn.query(retryQuery); } catch (retryErr) { const retryMsg = retryErr instanceof Error ? retryErr.message : String(retryErr); warnings.push(`${fromLabel}->${toLabel} (${lines.length} edges): ${retryMsg.slice(0, 80)}`); failedPairEdges += lines.length; failedPairLines.push(...lines); } } try { await fs.unlink(pairCsvPath); } catch {} } if (failedPairLines.length > 0) { log(`Inserting ${failedPairEdges} edges individually (missing schema pairs)`); await fallbackRelationshipInserts([relHeader, ...failedPairLines], validTables, getNodeLabel); } } // Cleanup all CSVs try { await fs.unlink(csvResult.relCsvPath); } catch {} for (const [, { csvPath }] of csvResult.nodeFiles) { try { await fs.unlink(csvPath); } catch {} } try { const remaining = await fs.readdir(csvDir); for (const f of remaining) { try { await fs.unlink(path.join(csvDir, f)); } catch {} } } catch {} try { await fs.rmdir(csvDir); } catch {} return { success: true, insertedRels, skippedRels, warnings }; }; // LadybugDB default ESCAPE is '\' (backslash), but our CSV uses RFC 4180 escaping ("" for literal quotes). // Source code content is full of backslashes which confuse the auto-detection. // We MUST explicitly set ESCAPE='"' to use RFC 4180 escaping, and disable auto_detect to prevent // LadybugDB from overriding our settings based on sample rows. const COPY_CSV_OPTS = `(HEADER=true, ESCAPE='"', DELIM=',', QUOTE='"', PARALLEL=false, auto_detect=false)`; // Multi-language table names that were created with backticks in CODE_ELEMENT_BASE // and must always be referenced with backticks in queries const BACKTICK_TABLES = new Set([ 'Struct', 'Enum', 'Macro', 'Typedef', 'Union', 'Namespace', 'Trait', 'Impl', 'TypeAlias', 'Const', 'Static', 'Property', 'Record', 'Delegate', 'Annotation', 'Constructor', 'Template', 'Module', ]); const escapeTableName = (table: string): string => { return BACKTICK_TABLES.has(table) ? `\`${table}\`` : table; }; /** Fallback: insert relationships one-by-one if COPY fails */ const fallbackRelationshipInserts = async ( validRelLines: string[], validTables: Set<string>, getNodeLabel: (id: string) => string ) => { if (!conn) return; const escapeLabel = (label: string): string => { return BACKTICK_TABLES.has(label) ? `\`${label}\`` : label; }; for (let i = 1; i < validRelLines.length; i++) { const line = validRelLines[i]; try { const match = line.match(/"([^"]*)","([^"]*)","([^"]*)",([0-9.]+),"([^"]*)",([0-9-]+)/); if (!match) continue; const [, fromId, toId, relType, confidenceStr, reason, stepStr] = match; const fromLabel = getNodeLabel(fromId); const toLabel = getNodeLabel(toId); if (!validTables.has(fromLabel) || !validTables.has(toLabel)) continue; const confidence = parseFloat(confidenceStr) || 1.0; const step = parseInt(stepStr) || 0; const esc = (s: string) => s.replace(/'/g, "''").replace(/\\/g, '\\\\').replace(/\n/g, '\\n').replace(/\r/g, '\\r'); await conn.query(` MATCH (a:${escapeLabel(fromLabel)} {id: '${esc(fromId)}' }), (b:${escapeLabel(toLabel)} {id: '${esc(toId)}' }) CREATE (a)-[:${REL_TABLE_NAME} {type: '${esc(relType)}', confidence: ${confidence}, reason: '${esc(reason)}', step: ${step}}]->(b) `); } catch { // skip } } }; /** Tables with isExported column (TypeScript/JS-native types) */ const TABLES_WITH_EXPORTED = new Set<string>(['Function', 'Class', 'Interface', 'Method', 'CodeElement']); const getCopyQuery = (table: NodeTableName, filePath: string): string => { const t = escapeTableName(table); if (table === 'File') { return `COPY ${t}(id, name, filePath, content) FROM "${filePath}" ${COPY_CSV_OPTS}`; } if (table === 'Folder') { return `COPY ${t}(id, name, filePath) FROM "${filePath}" ${COPY_CSV_OPTS}`; } if (table === 'Community') { return `COPY ${t}(id, label, heuristicLabel, keywords, description, enrichedBy, cohesion, symbolCount) FROM "${filePath}" ${COPY_CSV_OPTS}`; } if (table === 'Process') { return `COPY ${t}(id, label, heuristicLabel, processType, stepCount, communities, entryPointId, terminalId) FROM "${filePath}" ${COPY_CSV_OPTS}`; } if (table === 'Method') { return `COPY ${t}(id, name, filePath, startLine, endLine, isExported, content, description, parameterCount, returnType) FROM "${filePath}" ${COPY_CSV_OPTS}`; } // TypeScript/JS code element tables have isExported; multi-language tables do not if (TABLES_WITH_EXPORTED.has(table)) { return `COPY ${t}(id, name, filePath, startLine, endLine, isExported, content, description) FROM "${filePath}" ${COPY_CSV_OPTS}`; } // Multi-language tables (Struct, Impl, Trait, Macro, etc.) return `COPY ${t}(id, name, filePath, startLine, endLine, content, description) FROM "${filePath}" ${COPY_CSV_OPTS}`; }; /** * Insert a single node to LadybugDB * @param label - Node type (File, Function, Class, etc.) * @param properties - Node properties * @param dbPath - Path to LadybugDB database (optional if already initialized) */ export const insertNodeToLbug = async ( label: string, properties: Record<string, any>, dbPath?: string ): Promise<boolean> => { // Use provided dbPath or fall back to module-level db const targetDbPath = dbPath || (db ? undefined : null); if (!targetDbPath && !db) { throw new Error('LadybugDB not initialized. Provide dbPath or call initLbug first.'); } try { const escapeValue = (v: any): string => { if (v === null || v === undefined) return 'NULL'; if (typeof v === 'number') return String(v); // Escape backslashes first (for Windows paths), then single quotes return `'${String(v).replace(/\\/g, '\\\\').replace(/'/g, "''").replace(/\n/g, '\\n').replace(/\r/g, '\\r')}'`; }; // Build INSERT query based on node type const t = escapeTableName(label); let query: string; if (label === 'File') { query = `CREATE (n:File {id: ${escapeValue(properties.id)}, name: ${escapeValue(properties.name)}, filePath: ${escapeValue(properties.filePath)}, content: ${escapeValue(properties.content || '')}})`; } else if (label === 'Folder') { query = `CREATE (n:Folder {id: ${escapeValue(properties.id)}, name: ${escapeValue(properties.name)}, filePath: ${escapeValue(properties.filePath)}})`; } else if (TABLES_WITH_EXPORTED.has(label)) { const descPart = properties.description ? `, description: ${escapeValue(properties.description)}` : ''; query = `CREATE (n:${t} {id: ${escapeValue(properties.id)}, name: ${escapeValue(properties.name)}, filePath: ${escapeValue(properties.filePath)}, startLine: ${properties.startLine || 0}, endLine: ${properties.endLine || 0}, isExported: ${!!properties.isExported}, content: ${escapeValue(properties.content || '')}${descPart}})`; } else { // Multi-language tables (Struct, Impl, Trait, Macro, etc.) — no isExported const descPart = properties.description ? `, description: ${escapeValue(properties.description)}` : ''; query = `CREATE (n:${t} {id: ${escapeValue(properties.id)}, name: ${escapeValue(properties.name)}, filePath: ${escapeValue(properties.filePath)}, startLine: ${properties.startLine || 0}, endLine: ${properties.endLine || 0}, content: ${escapeValue(properties.content || '')}${descPart}})`; } // Use per-query connection if dbPath provided (avoids lock conflicts) if (targetDbPath) { const tempDb = new lbug.Database(targetDbPath); const tempConn = new lbug.Connection(tempDb); try { await tempConn.query(query); return true; } finally { try { await tempConn.close(); } catch {} try { await tempDb.close(); } catch {} } } else if (conn) { // Use existing persistent connection (when called from analyze) await conn.query(query); return true; } return false; } catch (e: any) { // Node may already exist or other error console.error(`Failed to insert ${label} node:`, e.message); return false; } }; /** * Batch insert multiple nodes to LadybugDB using a single connection * @param nodes - Array of {label, properties} to insert * @param dbPath - Path to LadybugDB database * @returns Object with success count and error count */ export const batchInsertNodesToLbug = async ( nodes: Array<{ label: string; properties: Record<string, any> }>, dbPath: string ): Promise<{ inserted: number; failed: number }> => { if (nodes.length === 0) return { inserted: 0, failed: 0 }; const escapeValue = (v: any): string => { if (v === null || v === undefined) return 'NULL'; if (typeof v === 'number') return String(v); // Escape backslashes first (for Windows paths), then single quotes, then newlines return `'${String(v).replace(/\\/g, '\\\\').replace(/'/g, "''").replace(/\n/g, '\\n').replace(/\r/g, '\\r')}'`; }; // Open a single connection for all inserts const tempDb = new lbug.Database(dbPath); const tempConn = new lbug.Connection(tempDb); let inserted = 0; let failed = 0; try { for (const { label, properties } of nodes) { try { let query: string; // Use MERGE instead of CREATE for upsert behavior (handles duplicates gracefully) const t = escapeTableName(label); if (label === 'File') { query = `MERGE (n:File {id: ${escapeValue(properties.id)}}) SET n.name = ${escapeValue(properties.name)}, n.filePath = ${escapeValue(properties.filePath)}, n.content = ${escapeValue(properties.content || '')}`; } else if (label === 'Folder') { query = `MERGE (n:Folder {id: ${escapeValue(properties.id)}}) SET n.name = ${escapeValue(properties.name)}, n.filePath = ${escapeValue(properties.filePath)}`; } else if (TABLES_WITH_EXPORTED.has(label)) { const descPart = properties.description ? `, n.description = ${escapeValue(properties.description)}` : ''; query = `MERGE (n:${t} {id: ${escapeValue(properties.id)}}) SET n.name = ${escapeValue(properties.name)}, n.filePath = ${escapeValue(properties.filePath)}, n.startLine = ${properties.startLine || 0}, n.endLine = ${properties.endLine || 0}, n.isExported = ${!!properties.isExported}, n.content = ${escapeValue(properties.content || '')}${descPart}`; } else { const descPart = properties.description ? `, n.description = ${escapeValue(properties.description)}` : ''; query = `MERGE (n:${t} {id: ${escapeValue(properties.id)}}) SET n.name = ${escapeValue(properties.name)}, n.filePath = ${escapeValue(properties.filePath)}, n.startLine = ${properties.startLine || 0}, n.endLine = ${properties.endLine || 0}, n.content = ${escapeValue(properties.content || '')}${descPart}`; } await tempConn.query(query); inserted++; } catch (e: any) { // Don't console.error here - it corrupts MCP JSON-RPC on stderr failed++; } } } finally { try { await tempConn.close(); } catch {} try { await tempDb.close(); } catch {} } return { inserted, failed }; }; export const executeQuery = async (cypher: string): Promise<any[]> => { if (!conn) { throw new Error('LadybugDB not initialized. Call initLbug first.'); } const queryResult = await conn.query(cypher); // LadybugDB uses getAll() instead of hasNext()/getNext() // Query returns QueryResult for single queries, QueryResult[] for multi-statement const result = Array.isArray(queryResult) ? queryResult[0] : queryResult; const rows = await result.getAll(); return rows; }; export const executeWithReusedStatement = async ( cypher: string, paramsList: Array<Record<string, any>> ): Promise<void> => { if (!conn) { throw new Error('LadybugDB not initialized. Call initLbug first.'); } if (paramsList.length === 0) return; const SUB_BATCH_SIZE = 4; for (let i = 0; i < paramsList.length; i += SUB_BATCH_SIZE) { const subBatch = paramsList.slice(i, i + SUB_BATCH_SIZE); const stmt = await conn.prepare(cypher); if (!stmt.isSuccess()) { const errMsg = await stmt.getErrorMessage(); throw new Error(`Prepare failed: ${errMsg}`); } try { for (const params of subBatch) { await conn.execute(stmt, params); } } catch (e) { // Log the error and continue with next batch console.warn('Batch execution error:', e); } // Note: LadybugDB PreparedStatement doesn't require explicit close() } }; export const getLbugStats = async (): Promise<{ nodes: number; edges: number }> => { if (!conn) return { nodes: 0, edges: 0 }; let totalNodes = 0; for (const tableName of NODE_TABLES) { try { const queryResult = await conn.query(`MATCH (n:${escapeTableName(tableName)}) RETURN count(n) AS cnt`); const nodeResult = Array.isArray(queryResult) ? queryResult[0] : queryResult; const nodeRows = await nodeResult.getAll(); if (nodeRows.length > 0) { totalNodes += Number(nodeRows[0]?.cnt ?? nodeRows[0]?.[0] ?? 0); } } catch { // ignore } } let totalEdges = 0; try { const queryResult = await conn.query(`MATCH ()-[r:${REL_TABLE_NAME}]->() RETURN count(r) AS cnt`); const edgeResult = Array.isArray(queryResult) ? queryResult[0] : queryResult; const edgeRows = await edgeResult.getAll(); if (edgeRows.length > 0) { totalEdges = Number(edgeRows[0]?.cnt ?? edgeRows[0]?.[0] ?? 0); } } catch { // ignore } return { nodes: totalNodes, edges: totalEdges }; }; /** * Load cached embeddings from LadybugDB before a rebuild. * Returns all embedding vectors so they can be re-inserted after the graph is reloaded, * avoiding expensive re-embedding of unchanged nodes. */ export const loadCachedEmbeddings = async (): Promise<{ embeddingNodeIds: Set<string>; embeddings: Array<{ nodeId: string; embedding: number[] }>; }> => { if (!conn) { return { embeddingNodeIds: new Set(), embeddings: [] }; } const embeddingNodeIds = new Set<string>(); const embeddings: Array<{ nodeId: string; embedding: number[] }> = []; try { const rows = await conn.query(`MATCH (e:${EMBEDDING_TABLE_NAME}) RETURN e.nodeId AS nodeId, e.embedding AS embedding`); const result = Array.isArray(rows) ? rows[0] : rows; for (const row of await result.getAll()) { const nodeId = String(row.nodeId ?? row[0] ?? ''); if (!nodeId) continue; embeddingNodeIds.add(nodeId); const embedding = row.embedding ?? row[1]; if (embedding) { embeddings.push({ nodeId, embedding: Array.isArray(embedding) ? embedding.map(Number) : Array.from(embedding as any).map(Number), }); } } } catch { /* embedding table may not exist */ } return { embeddingNodeIds, embeddings }; }; export const closeLbug = async (): Promise<void> => { if (conn) { try { await conn.close(); } catch {} conn = null; } if (db) { try { await db.close(); } catch {} db = null; } currentDbPath = null; ftsLoaded = false; }; export const isLbugReady = (): boolean => conn !== null && db !== null; /** * Delete all nodes (and their relationships) for a specific file from LadybugDB * @param filePath - The file path to delete nodes for * @param dbPath - Optional path to LadybugDB for per-query connection * @returns Object with counts of deleted nodes */ export const deleteNodesForFile = async (filePath: string, dbPath?: string): Promise<{ deletedNodes: number }> => { const usePerQuery = !!dbPath; // Set up connection (either use existing or create per-query) let tempDb: lbug.Database | null = null; let tempConn: lbug.Connection | null = null; let targetConn: lbug.Connection | null = conn; if (usePerQuery) { tempDb = new lbug.Database(dbPath); tempConn = new lbug.Connection(tempDb); targetConn = tempConn; } else if (!conn) { throw new Error('LadybugDB not initialized. Provide dbPath or call initLbug first.'); } try { let deletedNodes = 0; const escapedPath = filePath.replace(/'/g, "''"); // Delete nodes from each table that has filePath // DETACH DELETE removes the node and all its relationships for (const tableName of NODE_TABLES) { // Skip tables that don't have filePath (Community, Process) if (tableName === 'Community' || tableName === 'Process') continue; try { // First count how many we'll delete const tn = escapeTableName(tableName); const countResult = await targetConn!.query( `MATCH (n:${tn}) WHERE n.filePath = '${escapedPath}' RETURN count(n) AS cnt` ); const result = Array.isArray(countResult) ? countResult[0] : countResult; const rows = await result.getAll(); const count = Number(rows[0]?.cnt ?? rows[0]?.[0] ?? 0); if (count > 0) { // Delete nodes (and implicitly their relationships via DETACH) await targetConn!.query( `MATCH (n:${tn}) WHERE n.filePath = '${escapedPath}' DETACH DELETE n` ); deletedNodes += count; } } catch (e) { // Some tables may not support this query, skip } } // Also delete any embeddings for nodes in this file try { await targetConn!.query( `MATCH (e:${EMBEDDING_TABLE_NAME}) WHERE e.nodeId STARTS WITH '${escapedPath}' DELETE e` ); } catch { // Embedding table may not exist or nodeId format may differ } return { deletedNodes }; } finally { // Close per-query connection if used if (tempConn) { try { await tempConn.close(); } catch {} } if (tempDb) { try { await tempDb.close(); } catch {} } } }; export const getEmbeddingTableName = (): string => EMBEDDING_TABLE_NAME; // ============================================================================ // Full-Text Search (FTS) Functions // ============================================================================ /** * Load the FTS extension (required before using FTS functions). * Safe to call multiple times — tracks loaded state via module-level ftsLoaded. */ export const loadFTSExtension = async (): Promise<void> => { if (ftsLoaded) return; if (!conn) { throw new Error('LadybugDB not initialized. Call initLbug first.'); } try { await conn.query('INSTALL fts'); await conn.query('LOAD EXTENSION fts'); ftsLoaded = true; } catch (err: any) { const msg = err?.message || ''; if (msg.includes('already loaded') || msg.includes('already installed') || msg.includes('already exists')) { ftsLoaded = true; } else { console.error('GitNexus: FTS extension load failed:', msg); } } }; /** * Create a full-text search index on a table * @param tableName - The node table name (e.g., 'File', 'CodeSymbol') * @param indexName - Name for the FTS index * @param properties - List of properties to index (e.g., ['name', 'code']) * @param stemmer - Stemming algorithm (default: 'porter') */ export const createFTSIndex = async ( tableName: string, indexName: string, properties: string[], stemmer: string = 'porter' ): Promise<void> => { if (!conn) { throw new Error('LadybugDB not initialized. Call initLbug first.'); } await loadFTSExtension(); const propList = properties.map(p => `'${p}'`).join(', '); const query = `CALL CREATE_FTS_INDEX('${tableName}', '${indexName}', [${propList}], stemmer := '${stemmer}')`; try { await conn.query(query); } catch (e: any) { if (!e.message?.includes('already exists')) { throw e; } } }; /** * Query a full-text search index * @param tableName - The node table name * @param indexName - FTS index name * @param query - Search query string * @param limit - Maximum results * @param conjunctive - If true, all terms must match (AND); if false, any term matches (OR) * @returns Array of { node properties, score } */ export const queryFTS = async ( tableName: string, indexName: string, query: string, limit: number = 20, conjunctive: boolean = false ): Promise<Array<{ nodeId: string; name: string; filePath: string; score: number; [key: string]: any }>> => { if (!conn) { throw new Error('LadybugDB not initialized. Call initLbug first.'); } // Escape backslashes and single quotes to prevent Cypher injection const escapedQuery = query.replace(/\\/g, '\\\\').replace(/'/g, "''"); const cypher = ` CALL QUERY_FTS_INDEX('${tableName}', '${indexName}', '${escapedQuery}', conjunctive := ${conjunctive}) RETURN node, score ORDER BY score DESC LIMIT ${limit} `; try { const queryResult = await conn.query(cypher); const result = Array.isArray(queryResult) ? queryResult[0] : queryResult; const rows = await result.getAll(); return rows.map((row: any) => { const node = row.node || row[0] || {}; const score = row.score ?? row[1] ?? 0; return { nodeId: node.nodeId || node.id || '', name: node.name || '', filePath: node.filePath || '', score: typeof score === 'number' ? score : parseFloat(score) || 0, ...node, }; }); } catch (e: any) { // Return empty if index doesn't exist yet if (e.message?.includes('does not exist')) { return []; } throw e; } }; /** * Drop an FTS index */ export const dropFTSIndex = async (tableName: string, indexName: string): Promise<void> => { if (!conn) { throw new Error('LadybugDB not initialized. Call initLbug first.'); } try { await conn.query(`CALL DROP_FTS_INDEX('${tableName}', '${indexName}')`); } catch { // Index may not exist } }; ================================================ FILE: gitnexus/src/core/lbug/schema.ts ================================================ /** * LadybugDB Schema Definitions * * Hybrid Schema: * - Separate node tables for each code element type (File, Function, Class, etc.) * - Single CodeRelation table with 'type' property for all relationships * * This allows LLMs to write natural Cypher queries like: * MATCH (f:Function)-[r:CodeRelation {type: 'CALLS'}]->(g:Function) RETURN f, g */ // ============================================================================ // NODE TABLE NAMES // ============================================================================ export const NODE_TABLES = [ 'File', 'Folder', 'Function', 'Class', 'Interface', 'Method', 'CodeElement', 'Community', 'Process', // Multi-language support 'Struct', 'Enum', 'Macro', 'Typedef', 'Union', 'Namespace', 'Trait', 'Impl', 'TypeAlias', 'Const', 'Static', 'Property', 'Record', 'Delegate', 'Annotation', 'Constructor', 'Template', 'Module' ] as const; export type NodeTableName = typeof NODE_TABLES[number]; // ============================================================================ // RELATION TABLE // ============================================================================ export const REL_TABLE_NAME = 'CodeRelation'; // Valid relation types export const REL_TYPES = ['CONTAINS', 'DEFINES', 'IMPORTS', 'CALLS', 'EXTENDS', 'IMPLEMENTS', 'HAS_METHOD', 'HAS_PROPERTY', 'ACCESSES', 'OVERRIDES', 'MEMBER_OF', 'STEP_IN_PROCESS'] as const; export type RelType = typeof REL_TYPES[number]; // ============================================================================ // EMBEDDING TABLE // ============================================================================ export const EMBEDDING_TABLE_NAME = 'CodeEmbedding'; // ============================================================================ // NODE TABLE SCHEMAS // ============================================================================ export const FILE_SCHEMA = ` CREATE NODE TABLE File ( id STRING, name STRING, filePath STRING, content STRING, PRIMARY KEY (id) )`; export const FOLDER_SCHEMA = ` CREATE NODE TABLE Folder ( id STRING, name STRING, filePath STRING, PRIMARY KEY (id) )`; export const FUNCTION_SCHEMA = ` CREATE NODE TABLE Function ( id STRING, name STRING, filePath STRING, startLine INT64, endLine INT64, isExported BOOLEAN, content STRING, description STRING, PRIMARY KEY (id) )`; export const CLASS_SCHEMA = ` CREATE NODE TABLE Class ( id STRING, name STRING, filePath STRING, startLine INT64, endLine INT64, isExported BOOLEAN, content STRING, description STRING, PRIMARY KEY (id) )`; export const INTERFACE_SCHEMA = ` CREATE NODE TABLE Interface ( id STRING, name STRING, filePath STRING, startLine INT64, endLine INT64, isExported BOOLEAN, content STRING, description STRING, PRIMARY KEY (id) )`; export const METHOD_SCHEMA = ` CREATE NODE TABLE Method ( id STRING, name STRING, filePath STRING, startLine INT64, endLine INT64, isExported BOOLEAN, content STRING, description STRING, parameterCount INT32, returnType STRING, PRIMARY KEY (id) )`; export const CODE_ELEMENT_SCHEMA = ` CREATE NODE TABLE CodeElement ( id STRING, name STRING, filePath STRING, startLine INT64, endLine INT64, isExported BOOLEAN, content STRING, description STRING, PRIMARY KEY (id) )`; // ============================================================================ // COMMUNITY NODE TABLE (for Leiden algorithm clusters) // ============================================================================ export const COMMUNITY_SCHEMA = ` CREATE NODE TABLE Community ( id STRING, label STRING, heuristicLabel STRING, keywords STRING[], description STRING, enrichedBy STRING, cohesion DOUBLE, symbolCount INT32, PRIMARY KEY (id) )`; // ============================================================================ // PROCESS NODE TABLE (for execution flow detection) // ============================================================================ export const PROCESS_SCHEMA = ` CREATE NODE TABLE Process ( id STRING, label STRING, heuristicLabel STRING, processType STRING, stepCount INT32, communities STRING[], entryPointId STRING, terminalId STRING, PRIMARY KEY (id) )`; // ============================================================================ // MULTI-LANGUAGE NODE TABLE SCHEMAS // ============================================================================ // Generic code element with startLine/endLine for C, C++, Rust, Go, Java, C# // description: optional metadata (e.g. Eloquent $fillable fields, relationship targets) const CODE_ELEMENT_BASE = (name: string) => ` CREATE NODE TABLE \`${name}\` ( id STRING, name STRING, filePath STRING, startLine INT64, endLine INT64, content STRING, description STRING, PRIMARY KEY (id) )`; export const STRUCT_SCHEMA = CODE_ELEMENT_BASE('Struct'); export const ENUM_SCHEMA = CODE_ELEMENT_BASE('Enum'); export const MACRO_SCHEMA = CODE_ELEMENT_BASE('Macro'); export const TYPEDEF_SCHEMA = CODE_ELEMENT_BASE('Typedef'); export const UNION_SCHEMA = CODE_ELEMENT_BASE('Union'); export const NAMESPACE_SCHEMA = CODE_ELEMENT_BASE('Namespace'); export const TRAIT_SCHEMA = CODE_ELEMENT_BASE('Trait'); export const IMPL_SCHEMA = CODE_ELEMENT_BASE('Impl'); export const TYPE_ALIAS_SCHEMA = CODE_ELEMENT_BASE('TypeAlias'); export const CONST_SCHEMA = CODE_ELEMENT_BASE('Const'); export const STATIC_SCHEMA = CODE_ELEMENT_BASE('Static'); export const PROPERTY_SCHEMA = CODE_ELEMENT_BASE('Property'); export const RECORD_SCHEMA = CODE_ELEMENT_BASE('Record'); export const DELEGATE_SCHEMA = CODE_ELEMENT_BASE('Delegate'); export const ANNOTATION_SCHEMA = CODE_ELEMENT_BASE('Annotation'); export const CONSTRUCTOR_SCHEMA = CODE_ELEMENT_BASE('Constructor'); export const TEMPLATE_SCHEMA = CODE_ELEMENT_BASE('Template'); export const MODULE_SCHEMA = CODE_ELEMENT_BASE('Module'); // ============================================================================ // RELATION TABLE SCHEMA // Single table with 'type' property - connects all node tables // ============================================================================ export const RELATION_SCHEMA = ` CREATE REL TABLE ${REL_TABLE_NAME} ( FROM File TO File, FROM File TO Folder, FROM File TO Function, FROM File TO Class, FROM File TO Interface, FROM File TO Method, FROM File TO CodeElement, FROM File TO \`Struct\`, FROM File TO \`Enum\`, FROM File TO \`Macro\`, FROM File TO \`Typedef\`, FROM File TO \`Union\`, FROM File TO \`Namespace\`, FROM File TO \`Trait\`, FROM File TO \`Impl\`, FROM File TO \`TypeAlias\`, FROM File TO \`Const\`, FROM File TO \`Static\`, FROM File TO \`Property\`, FROM File TO \`Record\`, FROM File TO \`Delegate\`, FROM File TO \`Annotation\`, FROM File TO \`Constructor\`, FROM File TO \`Template\`, FROM File TO \`Module\`, FROM Folder TO Folder, FROM Folder TO File, FROM Function TO Function, FROM Function TO Method, FROM Function TO Class, FROM Function TO Community, FROM Function TO \`Macro\`, FROM Function TO \`Struct\`, FROM Function TO \`Template\`, FROM Function TO \`Enum\`, FROM Function TO \`Namespace\`, FROM Function TO \`TypeAlias\`, FROM Function TO \`Module\`, FROM Function TO \`Impl\`, FROM Function TO Interface, FROM Function TO \`Constructor\`, FROM Function TO \`Const\`, FROM Function TO \`Typedef\`, FROM Function TO \`Union\`, FROM Function TO \`Property\`, FROM Class TO Method, FROM Class TO Function, FROM Class TO Class, FROM Class TO Interface, FROM Class TO Community, FROM Class TO \`Template\`, FROM Class TO \`TypeAlias\`, FROM Class TO \`Struct\`, FROM Class TO \`Enum\`, FROM Class TO \`Annotation\`, FROM Class TO \`Constructor\`, FROM Class TO \`Trait\`, FROM Class TO \`Macro\`, FROM Class TO \`Impl\`, FROM Class TO \`Union\`, FROM Class TO \`Namespace\`, FROM Class TO \`Typedef\`, FROM Class TO \`Property\`, FROM Method TO Function, FROM Method TO Method, FROM Method TO Class, FROM Method TO Community, FROM Method TO \`Template\`, FROM Method TO \`Struct\`, FROM Method TO \`TypeAlias\`, FROM Method TO \`Enum\`, FROM Method TO \`Macro\`, FROM Method TO \`Namespace\`, FROM Method TO \`Module\`, FROM Method TO \`Impl\`, FROM Method TO Interface, FROM Method TO \`Constructor\`, FROM Method TO \`Property\`, FROM \`Template\` TO \`Template\`, FROM \`Template\` TO Function, FROM \`Template\` TO Method, FROM \`Template\` TO Class, FROM \`Template\` TO \`Struct\`, FROM \`Template\` TO \`TypeAlias\`, FROM \`Template\` TO \`Enum\`, FROM \`Template\` TO \`Macro\`, FROM \`Template\` TO Interface, FROM \`Template\` TO \`Constructor\`, FROM \`Module\` TO \`Module\`, FROM CodeElement TO Community, FROM Interface TO Community, FROM Interface TO Function, FROM Interface TO Method, FROM Interface TO Class, FROM Interface TO Interface, FROM Interface TO \`TypeAlias\`, FROM Interface TO \`Struct\`, FROM Interface TO \`Constructor\`, FROM Interface TO \`Property\`, FROM \`Struct\` TO Community, FROM \`Struct\` TO \`Trait\`, FROM \`Struct\` TO \`Struct\`, FROM \`Struct\` TO Class, FROM \`Struct\` TO \`Enum\`, FROM \`Struct\` TO Function, FROM \`Struct\` TO Method, FROM \`Struct\` TO Interface, FROM \`Struct\` TO \`Constructor\`, FROM \`Struct\` TO \`Property\`, FROM \`Enum\` TO \`Enum\`, FROM \`Enum\` TO Community, FROM \`Enum\` TO Class, FROM \`Enum\` TO Interface, FROM \`Macro\` TO Community, FROM \`Macro\` TO Function, FROM \`Macro\` TO Method, FROM \`Module\` TO Function, FROM \`Module\` TO Method, FROM \`Typedef\` TO Community, FROM \`Union\` TO Community, FROM \`Namespace\` TO Community, FROM \`Namespace\` TO \`Struct\`, FROM \`Trait\` TO Method, FROM \`Trait\` TO \`Constructor\`, FROM \`Trait\` TO \`Property\`, FROM \`Trait\` TO Community, FROM \`Impl\` TO Method, FROM \`Impl\` TO \`Constructor\`, FROM \`Impl\` TO \`Property\`, FROM \`Impl\` TO Community, FROM \`Impl\` TO \`Trait\`, FROM \`Impl\` TO \`Struct\`, FROM \`Impl\` TO \`Impl\`, FROM \`TypeAlias\` TO Community, FROM \`TypeAlias\` TO \`Trait\`, FROM \`TypeAlias\` TO Class, FROM \`Const\` TO Community, FROM \`Static\` TO Community, FROM \`Property\` TO Community, FROM \`Record\` TO Method, FROM \`Record\` TO \`Constructor\`, FROM \`Record\` TO \`Property\`, FROM \`Record\` TO Community, FROM \`Delegate\` TO Community, FROM \`Annotation\` TO Community, FROM \`Constructor\` TO Community, FROM \`Constructor\` TO Interface, FROM \`Constructor\` TO Class, FROM \`Constructor\` TO Method, FROM \`Constructor\` TO Function, FROM \`Constructor\` TO \`Constructor\`, FROM \`Constructor\` TO \`Struct\`, FROM \`Constructor\` TO \`Macro\`, FROM \`Constructor\` TO \`Template\`, FROM \`Constructor\` TO \`TypeAlias\`, FROM \`Constructor\` TO \`Enum\`, FROM \`Constructor\` TO \`Annotation\`, FROM \`Constructor\` TO \`Impl\`, FROM \`Constructor\` TO \`Namespace\`, FROM \`Constructor\` TO \`Module\`, FROM \`Constructor\` TO \`Property\`, FROM \`Constructor\` TO \`Typedef\`, FROM \`Template\` TO Community, FROM \`Module\` TO Community, FROM Function TO Process, FROM Method TO Process, FROM Class TO Process, FROM Interface TO Process, FROM \`Struct\` TO Process, FROM \`Constructor\` TO Process, FROM \`Module\` TO Process, FROM \`Macro\` TO Process, FROM \`Impl\` TO Process, FROM \`Typedef\` TO Process, FROM \`TypeAlias\` TO Process, FROM \`Enum\` TO Process, FROM \`Union\` TO Process, FROM \`Namespace\` TO Process, FROM \`Trait\` TO Process, FROM \`Const\` TO Process, FROM \`Static\` TO Process, FROM \`Property\` TO Process, FROM \`Record\` TO Process, FROM \`Delegate\` TO Process, FROM \`Annotation\` TO Process, FROM \`Template\` TO Process, FROM CodeElement TO Process, type STRING, confidence DOUBLE, reason STRING, step INT32 )`; // ============================================================================ // EMBEDDING TABLE SCHEMA // Separate table for vector storage to avoid copy-on-write overhead // ============================================================================ export const EMBEDDING_SCHEMA = ` CREATE NODE TABLE ${EMBEDDING_TABLE_NAME} ( nodeId STRING, embedding FLOAT[384], PRIMARY KEY (nodeId) )`; /** * Create vector index for semantic search * Uses HNSW (Hierarchical Navigable Small World) algorithm with cosine similarity */ export const CREATE_VECTOR_INDEX_QUERY = ` CALL CREATE_VECTOR_INDEX('${EMBEDDING_TABLE_NAME}', 'code_embedding_idx', 'embedding', metric := 'cosine') `; // ============================================================================ // ALL SCHEMA QUERIES IN ORDER // Node tables must be created before relationship tables that reference them // ============================================================================ export const NODE_SCHEMA_QUERIES = [ FILE_SCHEMA, FOLDER_SCHEMA, FUNCTION_SCHEMA, CLASS_SCHEMA, INTERFACE_SCHEMA, METHOD_SCHEMA, CODE_ELEMENT_SCHEMA, COMMUNITY_SCHEMA, PROCESS_SCHEMA, // Multi-language support STRUCT_SCHEMA, ENUM_SCHEMA, MACRO_SCHEMA, TYPEDEF_SCHEMA, UNION_SCHEMA, NAMESPACE_SCHEMA, TRAIT_SCHEMA, IMPL_SCHEMA, TYPE_ALIAS_SCHEMA, CONST_SCHEMA, STATIC_SCHEMA, PROPERTY_SCHEMA, RECORD_SCHEMA, DELEGATE_SCHEMA, ANNOTATION_SCHEMA, CONSTRUCTOR_SCHEMA, TEMPLATE_SCHEMA, MODULE_SCHEMA, ]; export const REL_SCHEMA_QUERIES = [ RELATION_SCHEMA, ]; export const SCHEMA_QUERIES = [ ...NODE_SCHEMA_QUERIES, ...REL_SCHEMA_QUERIES, EMBEDDING_SCHEMA, ]; ================================================ FILE: gitnexus/src/core/search/bm25-index.ts ================================================ /** * Full-Text Search via LadybugDB FTS * * Uses LadybugDB's built-in full-text search indexes for keyword-based search. * Always reads from the database (no cached state to drift). */ import { queryFTS } from '../lbug/lbug-adapter.js'; export interface BM25SearchResult { filePath: string; score: number; rank: number; } /** * Execute a single FTS query via a custom executor (for MCP connection pool). * Returns the same shape as core queryFTS (from LadybugDB adapter). */ async function queryFTSViaExecutor( executor: (cypher: string) => Promise<any[]>, tableName: string, indexName: string, query: string, limit: number, ): Promise<Array<{ filePath: string; score: number }>> { // Escape single quotes and backslashes to prevent Cypher injection const escapedQuery = query.replace(/\\/g, '\\\\').replace(/'/g, "''"); const cypher = ` CALL QUERY_FTS_INDEX('${tableName}', '${indexName}', '${escapedQuery}', conjunctive := false) RETURN node, score ORDER BY score DESC LIMIT ${limit} `; try { const rows = await executor(cypher); return rows.map((row: any) => { const node = row.node || row[0] || {}; const score = row.score ?? row[1] ?? 0; return { filePath: node.filePath || '', score: typeof score === 'number' ? score : parseFloat(score) || 0, }; }); } catch { return []; } } /** * Search using LadybugDB's built-in FTS (always fresh, reads from disk) * * Queries multiple node tables (File, Function, Class, Method) in parallel * and merges results by filePath, summing scores for the same file. * * @param query - Search query string * @param limit - Maximum results * @param repoId - If provided, queries will be routed via the MCP connection pool * @returns Ranked search results from FTS indexes */ export const searchFTSFromLbug = async (query: string, limit: number = 20, repoId?: string): Promise<BM25SearchResult[]> => { let fileResults: any[], functionResults: any[], classResults: any[], methodResults: any[], interfaceResults: any[]; if (repoId) { // Use MCP connection pool via dynamic import // IMPORTANT: FTS queries run sequentially to avoid connection contention. // The MCP pool supports multiple connections, but FTS is best run serially. const { executeQuery } = await import('../../mcp/core/lbug-adapter.js'); const executor = (cypher: string) => executeQuery(repoId, cypher); fileResults = await queryFTSViaExecutor(executor, 'File', 'file_fts', query, limit); functionResults = await queryFTSViaExecutor(executor, 'Function', 'function_fts', query, limit); classResults = await queryFTSViaExecutor(executor, 'Class', 'class_fts', query, limit); methodResults = await queryFTSViaExecutor(executor, 'Method', 'method_fts', query, limit); interfaceResults = await queryFTSViaExecutor(executor, 'Interface', 'interface_fts', query, limit); } else { // Use core lbug adapter (CLI / pipeline context) — also sequential for safety fileResults = await queryFTS('File', 'file_fts', query, limit, false).catch(() => []); functionResults = await queryFTS('Function', 'function_fts', query, limit, false).catch(() => []); classResults = await queryFTS('Class', 'class_fts', query, limit, false).catch(() => []); methodResults = await queryFTS('Method', 'method_fts', query, limit, false).catch(() => []); interfaceResults = await queryFTS('Interface', 'interface_fts', query, limit, false).catch(() => []); } // Merge results by filePath, summing scores for same file const merged = new Map<string, { filePath: string; score: number }>(); const addResults = (results: any[]) => { for (const r of results) { const existing = merged.get(r.filePath); if (existing) { existing.score += r.score; } else { merged.set(r.filePath, { filePath: r.filePath, score: r.score }); } } }; addResults(fileResults); addResults(functionResults); addResults(classResults); addResults(methodResults); addResults(interfaceResults); // Sort by score descending and add rank const sorted = Array.from(merged.values()) .sort((a, b) => b.score - a.score) .slice(0, limit); return sorted.map((r, index) => ({ filePath: r.filePath, score: r.score, rank: index + 1, })); }; ================================================ FILE: gitnexus/src/core/search/hybrid-search.ts ================================================ /** * Hybrid Search with Reciprocal Rank Fusion (RRF) * * Combines BM25 (keyword) and semantic (embedding) search results. * Uses RRF to merge rankings without needing score normalization. * * This is the same approach used by Elasticsearch, Pinecone, and other * production search systems. */ import { searchFTSFromLbug, type BM25SearchResult } from './bm25-index.js'; import type { SemanticSearchResult } from '../embeddings/types.js'; /** * RRF constant - standard value used in the literature * Higher values give more weight to lower-ranked results */ const RRF_K = 60; export interface HybridSearchResult { filePath: string; score: number; // RRF score rank: number; // Final rank sources: ('bm25' | 'semantic')[]; // Which methods found this // Metadata from semantic search (if available) nodeId?: string; name?: string; label?: string; startLine?: number; endLine?: number; // Original scores for debugging bm25Score?: number; semanticScore?: number; } /** * Perform hybrid search combining BM25 and semantic results * * @param bm25Results - Results from BM25 keyword search * @param semanticResults - Results from semantic/embedding search * @param limit - Maximum results to return * @returns Merged and re-ranked results */ export const mergeWithRRF = ( bm25Results: BM25SearchResult[], semanticResults: SemanticSearchResult[], limit: number = 10 ): HybridSearchResult[] => { const merged = new Map<string, HybridSearchResult>(); // Process BM25 results for (let i = 0; i < bm25Results.length; i++) { const r = bm25Results[i]; const rrfScore = 1 / (RRF_K + i + 1); // i+1 because rank starts at 1 merged.set(r.filePath, { filePath: r.filePath, score: rrfScore, rank: 0, // Will be set after sorting sources: ['bm25'], bm25Score: r.score, }); } // Process semantic results and merge for (let i = 0; i < semanticResults.length; i++) { const r = semanticResults[i]; const rrfScore = 1 / (RRF_K + i + 1); const existing = merged.get(r.filePath); if (existing) { // Found by both methods - add scores existing.score += rrfScore; existing.sources.push('semantic'); existing.semanticScore = 1 - r.distance; // Add semantic metadata existing.nodeId = r.nodeId; existing.name = r.name; existing.label = r.label; existing.startLine = r.startLine; existing.endLine = r.endLine; } else { // Only found by semantic merged.set(r.filePath, { filePath: r.filePath, score: rrfScore, rank: 0, sources: ['semantic'], semanticScore: 1 - r.distance, nodeId: r.nodeId, name: r.name, label: r.label, startLine: r.startLine, endLine: r.endLine, }); } } // Sort by RRF score descending const sorted = Array.from(merged.values()) .sort((a, b) => b.score - a.score) .slice(0, limit); // Assign final ranks sorted.forEach((r, i) => { r.rank = i + 1; }); return sorted; }; /** * Check if hybrid search is available * LadybugDB FTS is always available once the database is initialized. * Semantic search is optional - hybrid works with just FTS if embeddings aren't ready. */ export const isHybridSearchReady = (): boolean => { return true; // FTS is always available via LadybugDB when DB is open }; /** * Format hybrid results for LLM consumption */ export const formatHybridResults = (results: HybridSearchResult[]): string => { if (results.length === 0) { return 'No results found.'; } const formatted = results.map((r, i) => { const sources = r.sources.join(' + '); const location = r.startLine ? ` (lines ${r.startLine}-${r.endLine})` : ''; const label = r.label ? `${r.label}: ` : 'File: '; const name = r.name || r.filePath.split('/').pop() || r.filePath; return `[${i + 1}] ${label}${name} File: ${r.filePath}${location} Found by: ${sources} Relevance: ${r.score.toFixed(4)}`; }); return `Found ${results.length} results:\n\n${formatted.join('\n\n')}`; }; /** * Execute BM25 + semantic search and merge with RRF. * Uses LadybugDB FTS for always-fresh BM25 results (no cached data). * The semanticSearch function is injected to keep this module environment-agnostic. */ export const hybridSearch = async ( query: string, limit: number, executeQuery: (cypher: string) => Promise<any[]>, semanticSearch: (executeQuery: (cypher: string) => Promise<any[]>, query: string, k?: number) => Promise<SemanticSearchResult[]> ): Promise<HybridSearchResult[]> => { // Use LadybugDB FTS for always-fresh BM25 results const bm25Results = await searchFTSFromLbug(query, limit); const semanticResults = await semanticSearch(executeQuery, query, limit); return mergeWithRRF(bm25Results, semanticResults, limit); }; ================================================ FILE: gitnexus/src/core/tree-sitter/parser-loader.ts ================================================ import Parser from 'tree-sitter'; import JavaScript from 'tree-sitter-javascript'; import TypeScript from 'tree-sitter-typescript'; import Python from 'tree-sitter-python'; import Java from 'tree-sitter-java'; import C from 'tree-sitter-c'; import CPP from 'tree-sitter-cpp'; import CSharp from 'tree-sitter-c-sharp'; import Go from 'tree-sitter-go'; import Rust from 'tree-sitter-rust'; import PHP from 'tree-sitter-php'; import Ruby from 'tree-sitter-ruby'; import { createRequire } from 'node:module'; import { SupportedLanguages } from '../../config/supported-languages.js'; // tree-sitter-swift is an optionalDependency — may not be installed const _require = createRequire(import.meta.url); let Swift: any = null; try { Swift = _require('tree-sitter-swift'); } catch {} // tree-sitter-kotlin is an optionalDependency — may not be installed let Kotlin: any = null; try { Kotlin = _require('tree-sitter-kotlin'); } catch {} let parser: Parser | null = null; const languageMap: Record<string, any> = { [SupportedLanguages.JavaScript]: JavaScript, [SupportedLanguages.TypeScript]: TypeScript.typescript, [`${SupportedLanguages.TypeScript}:tsx`]: TypeScript.tsx, [SupportedLanguages.Python]: Python, [SupportedLanguages.Java]: Java, [SupportedLanguages.C]: C, [SupportedLanguages.CPlusPlus]: CPP, [SupportedLanguages.CSharp]: CSharp, [SupportedLanguages.Go]: Go, [SupportedLanguages.Rust]: Rust, ...(Kotlin ? { [SupportedLanguages.Kotlin]: Kotlin } : {}), [SupportedLanguages.PHP]: PHP.php_only, [SupportedLanguages.Ruby]: Ruby, ...(Swift ? { [SupportedLanguages.Swift]: Swift } : {}), }; export const isLanguageAvailable = (language: SupportedLanguages): boolean => language in languageMap; export const loadParser = async (): Promise<Parser> => { if (parser) return parser; parser = new Parser(); return parser; }; export const loadLanguage = async (language: SupportedLanguages, filePath?: string): Promise<void> => { if (!parser) await loadParser(); const key = language === SupportedLanguages.TypeScript && filePath?.endsWith('.tsx') ? `${language}:tsx` : language; const lang = languageMap[key]; if (!lang) { throw new Error(`Unsupported language: ${language}`); } parser!.setLanguage(lang); }; ================================================ FILE: gitnexus/src/core/wiki/generator.ts ================================================ /** * Wiki Generator * * Orchestrates the full wiki generation pipeline: * Phase 0: Validate prerequisites + gather graph structure * Phase 1: Build module tree (one LLM call) * Phase 2: Generate module pages (one LLM call per module, bottom-up) * Phase 3: Generate overview page * * Supports incremental updates via git diff + module-file mapping. */ import fs from 'fs/promises'; import path from 'path'; import { execSync, execFileSync } from 'child_process'; import { initWikiDb, closeWikiDb, getFilesWithExports, getAllFiles, getInterFileCallEdges, getIntraModuleCallEdges, getInterModuleCallEdges, getProcessesForFiles, getAllProcesses, getInterModuleEdgesForOverview, type FileWithExports, } from './graph-queries.js'; import { generateHTMLViewer } from './html-viewer.js'; import { callLLM, estimateTokens, type LLMConfig, type CallLLMOptions, } from './llm-client.js'; import { GROUPING_SYSTEM_PROMPT, GROUPING_USER_PROMPT, MODULE_SYSTEM_PROMPT, MODULE_USER_PROMPT, PARENT_SYSTEM_PROMPT, PARENT_USER_PROMPT, OVERVIEW_SYSTEM_PROMPT, OVERVIEW_USER_PROMPT, fillTemplate, formatFileListForGrouping, formatDirectoryTree, formatCallEdges, formatProcesses, } from './prompts.js'; import { shouldIgnorePath } from '../../config/ignore-service.js'; // ─── Types ──────────────────────────────────────────────────────────── export interface WikiOptions { force?: boolean; model?: string; baseUrl?: string; apiKey?: string; maxTokensPerModule?: number; concurrency?: number; } export interface WikiMeta { fromCommit: string; generatedAt: string; model: string; moduleFiles: Record<string, string[]>; moduleTree: ModuleTreeNode[]; } export interface ModuleTreeNode { name: string; slug: string; files: string[]; children?: ModuleTreeNode[]; } export type ProgressCallback = (phase: string, percent: number, detail?: string) => void; // ─── Constants ──────────────────────────────────────────────────────── const DEFAULT_MAX_TOKENS_PER_MODULE = 30_000; const WIKI_DIR = 'wiki'; // ─── Generator Class ────────────────────────────────────────────────── export class WikiGenerator { private repoPath: string; private storagePath: string; private wikiDir: string; private lbugPath: string; private llmConfig: LLMConfig; private maxTokensPerModule: number; private concurrency: number; private options: WikiOptions; private onProgress: ProgressCallback; private failedModules: string[] = []; constructor( repoPath: string, storagePath: string, lbugPath: string, llmConfig: LLMConfig, options: WikiOptions = {}, onProgress?: ProgressCallback, ) { this.repoPath = repoPath; this.storagePath = storagePath; this.wikiDir = path.join(storagePath, WIKI_DIR); this.lbugPath = lbugPath; this.options = options; this.llmConfig = llmConfig; this.maxTokensPerModule = options.maxTokensPerModule ?? DEFAULT_MAX_TOKENS_PER_MODULE; this.concurrency = options.concurrency ?? 3; const progressFn = onProgress || (() => {}); this.onProgress = (phase, percent, detail) => { if (percent > 0) this.lastPercent = percent; progressFn(phase, percent, detail); }; } private lastPercent = 0; /** * Create streaming options that report LLM progress to the progress bar. * Uses the last known percent so streaming doesn't reset the bar backwards. */ private streamOpts(label: string, fixedPercent?: number): CallLLMOptions { return { onChunk: (chars: number) => { const tokens = Math.round(chars / 4); const pct = fixedPercent ?? this.lastPercent; this.onProgress('stream', pct, `${label} (${tokens} tok)`); }, }; } /** * Main entry point. Runs the full pipeline or incremental update. */ async run(): Promise<{ pagesGenerated: number; mode: 'full' | 'incremental' | 'up-to-date'; failedModules: string[] }> { await fs.mkdir(this.wikiDir, { recursive: true }); const existingMeta = await this.loadWikiMeta(); const currentCommit = this.getCurrentCommit(); const forceMode = this.options.force; // Up-to-date check (skip if --force) if (!forceMode && existingMeta && existingMeta.fromCommit === currentCommit) { // Still regenerate the HTML viewer in case it's missing await this.ensureHTMLViewer(); return { pagesGenerated: 0, mode: 'up-to-date', failedModules: [] }; } // Force mode: delete snapshot to force full re-grouping if (forceMode) { try { await fs.unlink(path.join(this.wikiDir, 'first_module_tree.json')); } catch {} // Delete existing module pages so they get regenerated const existingFiles = await fs.readdir(this.wikiDir).catch(() => [] as string[]); for (const f of existingFiles) { if (f.endsWith('.md')) { try { await fs.unlink(path.join(this.wikiDir, f)); } catch {} } } } // Init graph this.onProgress('init', 2, 'Connecting to knowledge graph...'); await initWikiDb(this.lbugPath); let result: { pagesGenerated: number; mode: 'full' | 'incremental' | 'up-to-date'; failedModules: string[] }; try { if (!forceMode && existingMeta && existingMeta.fromCommit) { result = await this.incrementalUpdate(existingMeta, currentCommit); } else { result = await this.fullGeneration(currentCommit); } } finally { await closeWikiDb(); } // Always generate the HTML viewer after wiki content changes await this.ensureHTMLViewer(); return result; } // ─── HTML Viewer ───────────────────────────────────────────────────── private async ensureHTMLViewer(): Promise<void> { // Only generate if there are markdown pages to bundle const dirEntries = await fs.readdir(this.wikiDir).catch(() => [] as string[]); const hasMd = dirEntries.some(f => f.endsWith('.md')); if (!hasMd) return; this.onProgress('html', 98, 'Building HTML viewer...'); const repoName = path.basename(this.repoPath); await generateHTMLViewer(this.wikiDir, repoName); } // ─── Full Generation ──────────────────────────────────────────────── private async fullGeneration(currentCommit: string): Promise<{ pagesGenerated: number; mode: 'full'; failedModules: string[] }> { let pagesGenerated = 0; // Phase 0: Gather structure this.onProgress('gather', 5, 'Querying graph for file structure...'); const filesWithExports = await getFilesWithExports(); const allFiles = await getAllFiles(); // Filter to source files only const sourceFiles = allFiles.filter(f => !shouldIgnorePath(f)); if (sourceFiles.length === 0) { throw new Error('No source files found in the knowledge graph. Nothing to document.'); } // Build enriched file list (merge exports into all source files) const exportMap = new Map(filesWithExports.map(f => [f.filePath, f])); const enrichedFiles: FileWithExports[] = sourceFiles.map(fp => { return exportMap.get(fp) || { filePath: fp, symbols: [] }; }); this.onProgress('gather', 10, `Found ${sourceFiles.length} source files`); // Phase 1: Build module tree const moduleTree = await this.buildModuleTree(enrichedFiles); pagesGenerated = 0; // Phase 2: Generate module pages (parallel with concurrency limit) const totalModules = this.countModules(moduleTree); let modulesProcessed = 0; const reportProgress = (moduleName?: string) => { modulesProcessed++; const percent = 30 + Math.round((modulesProcessed / totalModules) * 55); const detail = moduleName ? `${modulesProcessed}/${totalModules} — ${moduleName}` : `${modulesProcessed}/${totalModules} modules`; this.onProgress('modules', percent, detail); }; // Flatten tree into layers: leaves first, then parents // Leaves can run in parallel; parents must wait for their children const { leaves, parents } = this.flattenModuleTree(moduleTree); // Process all leaf modules in parallel pagesGenerated += await this.runParallel(leaves, async (node) => { const pagePath = path.join(this.wikiDir, `${node.slug}.md`); if (await this.fileExists(pagePath)) { reportProgress(node.name); return 0; } try { await this.generateLeafPage(node); reportProgress(node.name); return 1; } catch (err: any) { this.failedModules.push(node.name); reportProgress(`Failed: ${node.name}`); return 0; } }); // Process parent modules sequentially (they depend on child docs) for (const node of parents) { const pagePath = path.join(this.wikiDir, `${node.slug}.md`); if (await this.fileExists(pagePath)) { reportProgress(node.name); continue; } try { await this.generateParentPage(node); pagesGenerated++; reportProgress(node.name); } catch (err: any) { this.failedModules.push(node.name); reportProgress(`Failed: ${node.name}`); } } // Phase 3: Generate overview this.onProgress('overview', 88, 'Generating overview page...'); await this.generateOverview(moduleTree); pagesGenerated++; // Save metadata this.onProgress('finalize', 95, 'Saving metadata...'); const moduleFiles = this.extractModuleFiles(moduleTree); await this.saveModuleTree(moduleTree); await this.saveWikiMeta({ fromCommit: currentCommit, generatedAt: new Date().toISOString(), model: this.llmConfig.model, moduleFiles, moduleTree, }); this.onProgress('done', 100, 'Wiki generation complete'); return { pagesGenerated, mode: 'full', failedModules: [...this.failedModules] }; } // ─── Phase 1: Build Module Tree ──────────────────────────────────── private async buildModuleTree(files: FileWithExports[]): Promise<ModuleTreeNode[]> { // Check for existing immutable snapshot (resumability) const snapshotPath = path.join(this.wikiDir, 'first_module_tree.json'); try { const existing = await fs.readFile(snapshotPath, 'utf-8'); const parsed = JSON.parse(existing); if (Array.isArray(parsed) && parsed.length > 0) { this.onProgress('grouping', 25, 'Using existing module tree (resuming)'); return parsed; } } catch { // No snapshot, generate new } this.onProgress('grouping', 15, 'Grouping files into modules (LLM)...'); const fileList = formatFileListForGrouping(files); const dirTree = formatDirectoryTree(files.map(f => f.filePath)); const prompt = fillTemplate(GROUPING_USER_PROMPT, { FILE_LIST: fileList, DIRECTORY_TREE: dirTree, }); const response = await callLLM( prompt, this.llmConfig, GROUPING_SYSTEM_PROMPT, this.streamOpts('Grouping files', 15), ); const grouping = this.parseGroupingResponse(response.content, files); // Convert to tree nodes const tree: ModuleTreeNode[] = []; for (const [moduleName, modulePaths] of Object.entries(grouping)) { const slug = this.slugify(moduleName); const node: ModuleTreeNode = { name: moduleName, slug, files: modulePaths }; // Token budget check — split if too large const totalTokens = await this.estimateModuleTokens(modulePaths); if (totalTokens > this.maxTokensPerModule && modulePaths.length > 3) { node.children = this.splitBySubdirectory(moduleName, modulePaths); node.files = []; // Parent doesn't own files directly when split } tree.push(node); } // Save immutable snapshot for resumability await fs.writeFile(snapshotPath, JSON.stringify(tree, null, 2), 'utf-8'); this.onProgress('grouping', 28, `Created ${tree.length} modules`); return tree; } /** * Parse LLM grouping response. Validates all files are assigned. */ private parseGroupingResponse( content: string, files: FileWithExports[], ): Record<string, string[]> { // Extract JSON from response (handle markdown fences) let jsonStr = content.trim(); const fenceMatch = jsonStr.match(/```(?:json)?\s*\n?([\s\S]*?)\n?```/); if (fenceMatch) { jsonStr = fenceMatch[1].trim(); } let parsed: Record<string, string[]>; try { parsed = JSON.parse(jsonStr); } catch { // Fallback: group by top-level directory return this.fallbackGrouping(files); } if (typeof parsed !== 'object' || Array.isArray(parsed)) { return this.fallbackGrouping(files); } // Validate — ensure all files are assigned const allFilePaths = new Set(files.map(f => f.filePath)); const assignedFiles = new Set<string>(); const validGrouping: Record<string, string[]> = {}; for (const [mod, paths] of Object.entries(parsed)) { if (!Array.isArray(paths)) continue; const validPaths = paths.filter(p => { if (allFilePaths.has(p) && !assignedFiles.has(p)) { assignedFiles.add(p); return true; } return false; }); if (validPaths.length > 0) { validGrouping[mod] = validPaths; } } // Assign unassigned files to a "Miscellaneous" module const unassigned = files .map(f => f.filePath) .filter(fp => !assignedFiles.has(fp)); if (unassigned.length > 0) { validGrouping['Other'] = unassigned; } return Object.keys(validGrouping).length > 0 ? validGrouping : this.fallbackGrouping(files); } /** * Fallback grouping by top-level directory when LLM parsing fails. */ private fallbackGrouping(files: FileWithExports[]): Record<string, string[]> { const groups = new Map<string, string[]>(); for (const f of files) { const parts = f.filePath.replace(/\\/g, '/').split('/'); const topDir = parts.length > 1 ? parts[0] : 'Root'; let group = groups.get(topDir); if (!group) { group = []; groups.set(topDir, group); } group.push(f.filePath); } return Object.fromEntries(groups); } /** * Split a large module into sub-modules by subdirectory. */ private splitBySubdirectory(moduleName: string, files: string[]): ModuleTreeNode[] { const subGroups = new Map<string, string[]>(); for (const fp of files) { const parts = fp.replace(/\\/g, '/').split('/'); // Use the deepest common-ish directory const subDir = parts.length > 2 ? parts.slice(0, 2).join('/') : parts[0]; let group = subGroups.get(subDir); if (!group) { group = []; subGroups.set(subDir, group); } group.push(fp); } return Array.from(subGroups.entries()).map(([subDir, subFiles]) => ({ name: `${moduleName} — ${path.basename(subDir)}`, slug: this.slugify(`${moduleName}-${path.basename(subDir)}`), files: subFiles, })); } // ─── Phase 2: Generate Module Pages ───────────────────────────────── /** * Generate a leaf module page from source code + graph data. */ private async generateLeafPage(node: ModuleTreeNode): Promise<void> { const filePaths = node.files; // Read source files from disk const sourceCode = await this.readSourceFiles(filePaths); // Token budget check — if too large, summarize in batches const totalTokens = estimateTokens(sourceCode); let finalSourceCode = sourceCode; if (totalTokens > this.maxTokensPerModule) { finalSourceCode = this.truncateSource(sourceCode, this.maxTokensPerModule); } // Get graph data const [intraCalls, interCalls, processes] = await Promise.all([ getIntraModuleCallEdges(filePaths), getInterModuleCallEdges(filePaths), getProcessesForFiles(filePaths, 5), ]); const prompt = fillTemplate(MODULE_USER_PROMPT, { MODULE_NAME: node.name, SOURCE_CODE: finalSourceCode, INTRA_CALLS: formatCallEdges(intraCalls), OUTGOING_CALLS: formatCallEdges(interCalls.outgoing), INCOMING_CALLS: formatCallEdges(interCalls.incoming), PROCESSES: formatProcesses(processes), }); const response = await callLLM( prompt, this.llmConfig, MODULE_SYSTEM_PROMPT, this.streamOpts(node.name), ); // Write page with front matter const pageContent = `# ${node.name}\n\n${response.content}`; await fs.writeFile(path.join(this.wikiDir, `${node.slug}.md`), pageContent, 'utf-8'); } /** * Generate a parent module page from children's documentation. */ private async generateParentPage(node: ModuleTreeNode): Promise<void> { if (!node.children || node.children.length === 0) return; // Read children's overview sections const childDocs: string[] = []; for (const child of node.children) { const childPage = path.join(this.wikiDir, `${child.slug}.md`); try { const content = await fs.readFile(childPage, 'utf-8'); // Extract overview section (first ~500 chars or up to "### Architecture") const overviewEnd = content.indexOf('### Architecture'); const overview = overviewEnd > 0 ? content.slice(0, overviewEnd).trim() : content.slice(0, 800).trim(); childDocs.push(`#### ${child.name}\n${overview}`); } catch { childDocs.push(`#### ${child.name}\n(Documentation not yet generated)`); } } // Get cross-child call edges const allChildFiles = node.children.flatMap(c => c.files); const crossCalls = await getIntraModuleCallEdges(allChildFiles); const processes = await getProcessesForFiles(allChildFiles, 3); const prompt = fillTemplate(PARENT_USER_PROMPT, { MODULE_NAME: node.name, CHILDREN_DOCS: childDocs.join('\n\n'), CROSS_MODULE_CALLS: formatCallEdges(crossCalls), CROSS_PROCESSES: formatProcesses(processes), }); const response = await callLLM( prompt, this.llmConfig, PARENT_SYSTEM_PROMPT, this.streamOpts(node.name), ); const pageContent = `# ${node.name}\n\n${response.content}`; await fs.writeFile(path.join(this.wikiDir, `${node.slug}.md`), pageContent, 'utf-8'); } // ─── Phase 3: Generate Overview ───────────────────────────────────── private async generateOverview(moduleTree: ModuleTreeNode[]): Promise<void> { // Read module overview sections const moduleSummaries: string[] = []; for (const node of moduleTree) { const pagePath = path.join(this.wikiDir, `${node.slug}.md`); try { const content = await fs.readFile(pagePath, 'utf-8'); const overviewEnd = content.indexOf('### Architecture'); const overview = overviewEnd > 0 ? content.slice(0, overviewEnd).trim() : content.slice(0, 600).trim(); moduleSummaries.push(`#### ${node.name}\n${overview}`); } catch { moduleSummaries.push(`#### ${node.name}\n(Documentation pending)`); } } // Get inter-module edges for architecture diagram const moduleFiles = this.extractModuleFiles(moduleTree); const moduleEdges = await getInterModuleEdgesForOverview(moduleFiles); // Get top processes for key workflows const topProcesses = await getAllProcesses(5); // Read project config const projectInfo = await this.readProjectInfo(); const edgesText = moduleEdges.length > 0 ? moduleEdges.map(e => `${e.from} → ${e.to} (${e.count} calls)`).join('\n') : 'No inter-module call edges detected'; const prompt = fillTemplate(OVERVIEW_USER_PROMPT, { PROJECT_INFO: projectInfo, MODULE_SUMMARIES: moduleSummaries.join('\n\n'), MODULE_EDGES: edgesText, TOP_PROCESSES: formatProcesses(topProcesses), }); const response = await callLLM( prompt, this.llmConfig, OVERVIEW_SYSTEM_PROMPT, this.streamOpts('Generating overview', 88), ); const pageContent = `# ${path.basename(this.repoPath)} — Wiki\n\n${response.content}`; await fs.writeFile(path.join(this.wikiDir, 'overview.md'), pageContent, 'utf-8'); } // ─── Incremental Updates ──────────────────────────────────────────── private async incrementalUpdate( existingMeta: WikiMeta, currentCommit: string, ): Promise<{ pagesGenerated: number; mode: 'incremental'; failedModules: string[] }> { this.onProgress('incremental', 5, 'Detecting changes...'); // Get changed files since last generation const changedFiles = this.getChangedFiles(existingMeta.fromCommit, currentCommit); if (changedFiles.length === 0) { // No file changes but commit differs (e.g. merge commit) await this.saveWikiMeta({ ...existingMeta, fromCommit: currentCommit, generatedAt: new Date().toISOString(), }); return { pagesGenerated: 0, mode: 'incremental', failedModules: [] }; } this.onProgress('incremental', 10, `${changedFiles.length} files changed`); // Determine affected modules const affectedModules = new Set<string>(); const newFiles: string[] = []; for (const fp of changedFiles) { let found = false; for (const [mod, files] of Object.entries(existingMeta.moduleFiles)) { if (files.includes(fp)) { affectedModules.add(mod); found = true; break; } } if (!found && !shouldIgnorePath(fp)) { newFiles.push(fp); } } // If significant new files exist, re-run full grouping if (newFiles.length > 5) { this.onProgress('incremental', 15, 'Significant new files detected, running full generation...'); // Delete old snapshot to force re-grouping try { await fs.unlink(path.join(this.wikiDir, 'first_module_tree.json')); } catch {} const fullResult = await this.fullGeneration(currentCommit); return { ...fullResult, mode: 'incremental' }; } // Add new files to nearest module or "Other" if (newFiles.length > 0) { if (!existingMeta.moduleFiles['Other']) { existingMeta.moduleFiles['Other'] = []; } existingMeta.moduleFiles['Other'].push(...newFiles); affectedModules.add('Other'); } // Regenerate affected module pages (parallel) let pagesGenerated = 0; const moduleTree = existingMeta.moduleTree; const affectedArray = Array.from(affectedModules); this.onProgress('incremental', 20, `Regenerating ${affectedArray.length} module(s)...`); const affectedNodes: ModuleTreeNode[] = []; for (const mod of affectedArray) { const modSlug = this.slugify(mod); const node = this.findNodeBySlug(moduleTree, modSlug); if (node) { try { await fs.unlink(path.join(this.wikiDir, `${node.slug}.md`)); } catch {} affectedNodes.push(node); } } let incProcessed = 0; pagesGenerated += await this.runParallel(affectedNodes, async (node) => { try { if (node.children && node.children.length > 0) { await this.generateParentPage(node); } else { await this.generateLeafPage(node); } incProcessed++; const percent = 20 + Math.round((incProcessed / affectedNodes.length) * 60); this.onProgress('incremental', percent, `${incProcessed}/${affectedNodes.length} — ${node.name}`); return 1; } catch (err: any) { this.failedModules.push(node.name); incProcessed++; return 0; } }); // Regenerate overview if any pages changed if (pagesGenerated > 0) { this.onProgress('incremental', 85, 'Updating overview...'); await this.generateOverview(moduleTree); pagesGenerated++; } // Save updated metadata this.onProgress('incremental', 95, 'Saving metadata...'); await this.saveWikiMeta({ ...existingMeta, fromCommit: currentCommit, generatedAt: new Date().toISOString(), model: this.llmConfig.model, }); this.onProgress('done', 100, 'Incremental update complete'); return { pagesGenerated, mode: 'incremental', failedModules: [...this.failedModules] }; } // ─── Helpers ──────────────────────────────────────────────────────── private getCurrentCommit(): string { try { return execSync('git rev-parse HEAD', { cwd: this.repoPath }).toString().trim(); } catch { return ''; } } private getChangedFiles(fromCommit: string, toCommit: string): string[] { try { const output = execFileSync( 'git', ['diff', `${fromCommit}..${toCommit}`, '--name-only'], { cwd: this.repoPath }, ).toString().trim(); return output ? output.split('\n').filter(Boolean) : []; } catch { return []; } } private async readSourceFiles(filePaths: string[]): Promise<string> { const parts: string[] = []; for (const fp of filePaths) { const fullPath = path.join(this.repoPath, fp); try { const content = await fs.readFile(fullPath, 'utf-8'); parts.push(`\n--- ${fp} ---\n${content}`); } catch { parts.push(`\n--- ${fp} ---\n(file not readable)`); } } return parts.join('\n'); } private truncateSource(source: string, maxTokens: number): string { // Rough truncation: keep first maxTokens*4 chars and add notice const maxChars = maxTokens * 4; if (source.length <= maxChars) return source; return source.slice(0, maxChars) + '\n\n... (source truncated for context window limits)'; } private async estimateModuleTokens(filePaths: string[]): Promise<number> { let total = 0; for (const fp of filePaths) { try { const content = await fs.readFile(path.join(this.repoPath, fp), 'utf-8'); total += estimateTokens(content); } catch { // File not readable, skip } } return total; } private async readProjectInfo(): Promise<string> { const candidates = ['package.json', 'Cargo.toml', 'pyproject.toml', 'go.mod', 'pom.xml', 'build.gradle']; const lines: string[] = [`Project: ${path.basename(this.repoPath)}`]; for (const file of candidates) { const fullPath = path.join(this.repoPath, file); try { const content = await fs.readFile(fullPath, 'utf-8'); if (file === 'package.json') { const pkg = JSON.parse(content); if (pkg.name) lines.push(`Name: ${pkg.name}`); if (pkg.description) lines.push(`Description: ${pkg.description}`); if (pkg.scripts) lines.push(`Scripts: ${Object.keys(pkg.scripts).join(', ')}`); } else { // Include first 500 chars of other config files lines.push(`\n${file}:\n${content.slice(0, 500)}`); } break; // Use first config found } catch { continue; } } // Read README excerpt for (const readme of ['README.md', 'readme.md', 'README.txt']) { try { const content = await fs.readFile(path.join(this.repoPath, readme), 'utf-8'); lines.push(`\nREADME excerpt:\n${content.slice(0, 1000)}`); break; } catch { continue; } } return lines.join('\n'); } private extractModuleFiles(tree: ModuleTreeNode[]): Record<string, string[]> { const result: Record<string, string[]> = {}; for (const node of tree) { if (node.children && node.children.length > 0) { result[node.name] = node.children.flatMap(c => c.files); for (const child of node.children) { result[child.name] = child.files; } } else { result[node.name] = node.files; } } return result; } private countModules(tree: ModuleTreeNode[]): number { let count = 0; for (const node of tree) { count++; if (node.children) { count += node.children.length; } } return count; } /** * Flatten the module tree into leaf nodes and parent nodes. * Leaves can be processed in parallel; parents must wait for children. */ private flattenModuleTree(tree: ModuleTreeNode[]): { leaves: ModuleTreeNode[]; parents: ModuleTreeNode[] } { const leaves: ModuleTreeNode[] = []; const parents: ModuleTreeNode[] = []; for (const node of tree) { if (node.children && node.children.length > 0) { for (const child of node.children) { leaves.push(child); } parents.push(node); } else { leaves.push(node); } } return { leaves, parents }; } /** * Run async tasks in parallel with a concurrency limit and adaptive rate limiting. * If a 429 rate limit is hit, concurrency is temporarily reduced. */ private async runParallel<T>( items: T[], fn: (item: T) => Promise<number>, ): Promise<number> { let total = 0; let activeConcurrency = this.concurrency; let running = 0; let idx = 0; return new Promise((resolve, reject) => { const next = () => { while (running < activeConcurrency && idx < items.length) { const item = items[idx++]; running++; fn(item) .then((count) => { total += count; running--; if (idx >= items.length && running === 0) { resolve(total); } else { next(); } }) .catch((err) => { running--; // On rate limit, reduce concurrency temporarily if (err.message?.includes('429')) { activeConcurrency = Math.max(1, activeConcurrency - 1); this.onProgress('modules', this.lastPercent, `Rate limited — concurrency → ${activeConcurrency}`); // Re-queue the item idx--; setTimeout(next, 5000); } else { if (idx >= items.length && running === 0) { resolve(total); } else { next(); } } }); } }; if (items.length === 0) { resolve(0); } else { next(); } }); } private findNodeBySlug(tree: ModuleTreeNode[], slug: string): ModuleTreeNode | null { for (const node of tree) { if (node.slug === slug) return node; if (node.children) { const found = this.findNodeBySlug(node.children, slug); if (found) return found; } } return null; } private slugify(name: string): string { return name .toLowerCase() .replace(/[^a-z0-9]+/g, '-') .replace(/^-+|-+$/g, '') .slice(0, 60); } private async fileExists(fp: string): Promise<boolean> { try { await fs.access(fp); return true; } catch { return false; } } private async loadWikiMeta(): Promise<WikiMeta | null> { try { const raw = await fs.readFile(path.join(this.wikiDir, 'meta.json'), 'utf-8'); return JSON.parse(raw) as WikiMeta; } catch { return null; } } private async saveWikiMeta(meta: WikiMeta): Promise<void> { await fs.writeFile( path.join(this.wikiDir, 'meta.json'), JSON.stringify(meta, null, 2), 'utf-8', ); } private async saveModuleTree(tree: ModuleTreeNode[]): Promise<void> { await fs.writeFile( path.join(this.wikiDir, 'module_tree.json'), JSON.stringify(tree, null, 2), 'utf-8', ); } } ================================================ FILE: gitnexus/src/core/wiki/graph-queries.ts ================================================ /** * Graph Queries for Wiki Generation * * Encapsulated Cypher queries against the GitNexus knowledge graph. * Uses the MCP-style pooled lbug-adapter for connection management. */ import { initLbug, executeQuery, closeLbug } from '../../mcp/core/lbug-adapter.js'; const REPO_ID = '__wiki__'; export interface FileWithExports { filePath: string; symbols: Array<{ name: string; type: string }>; } export interface CallEdge { fromFile: string; fromName: string; toFile: string; toName: string; } export interface ProcessInfo { id: string; label: string; type: string; stepCount: number; steps: Array<{ step: number; name: string; filePath: string; type: string; }>; } /** * Initialize the LadybugDB connection for wiki generation. */ export async function initWikiDb(lbugPath: string): Promise<void> { await initLbug(REPO_ID, lbugPath); } /** * Close the LadybugDB connection. */ export async function closeWikiDb(): Promise<void> { await closeLbug(REPO_ID); } /** * Get all source files with their exported symbol names and types. */ export async function getFilesWithExports(): Promise<FileWithExports[]> { const rows = await executeQuery(REPO_ID, ` MATCH (f:File)-[:CodeRelation {type: 'DEFINES'}]->(n) WHERE n.isExported = true RETURN f.filePath AS filePath, n.name AS name, labels(n)[0] AS type ORDER BY f.filePath `); const fileMap = new Map<string, FileWithExports>(); for (const row of rows) { const fp = row.filePath || row[0]; const name = row.name || row[1]; const type = row.type || row[2]; let entry = fileMap.get(fp); if (!entry) { entry = { filePath: fp, symbols: [] }; fileMap.set(fp, entry); } entry.symbols.push({ name, type }); } return Array.from(fileMap.values()); } /** * Get all files tracked in the graph (including those with no exports). */ export async function getAllFiles(): Promise<string[]> { const rows = await executeQuery(REPO_ID, ` MATCH (f:File) RETURN f.filePath AS filePath ORDER BY f.filePath `); return rows.map(r => r.filePath || r[0]); } /** * Get inter-file call edges (calls between different files). */ export async function getInterFileCallEdges(): Promise<CallEdge[]> { const rows = await executeQuery(REPO_ID, ` MATCH (a)-[:CodeRelation {type: 'CALLS'}]->(b) WHERE a.filePath <> b.filePath RETURN DISTINCT a.filePath AS fromFile, a.name AS fromName, b.filePath AS toFile, b.name AS toName `); return rows.map(r => ({ fromFile: r.fromFile || r[0], fromName: r.fromName || r[1], toFile: r.toFile || r[2], toName: r.toName || r[3], })); } /** * Get call edges between files within a specific set (intra-module). */ export async function getIntraModuleCallEdges(filePaths: string[]): Promise<CallEdge[]> { if (filePaths.length === 0) return []; const fileList = filePaths.map(f => `'${f.replace(/'/g, "''")}'`).join(', '); const rows = await executeQuery(REPO_ID, ` MATCH (a)-[:CodeRelation {type: 'CALLS'}]->(b) WHERE a.filePath IN [${fileList}] AND b.filePath IN [${fileList}] RETURN DISTINCT a.filePath AS fromFile, a.name AS fromName, b.filePath AS toFile, b.name AS toName `); return rows.map(r => ({ fromFile: r.fromFile || r[0], fromName: r.fromName || r[1], toFile: r.toFile || r[2], toName: r.toName || r[3], })); } /** * Get call edges crossing module boundaries (external calls from/to module files). */ export async function getInterModuleCallEdges(filePaths: string[]): Promise<{ outgoing: CallEdge[]; incoming: CallEdge[]; }> { if (filePaths.length === 0) return { outgoing: [], incoming: [] }; const fileList = filePaths.map(f => `'${f.replace(/'/g, "''")}'`).join(', '); const outRows = await executeQuery(REPO_ID, ` MATCH (a)-[:CodeRelation {type: 'CALLS'}]->(b) WHERE a.filePath IN [${fileList}] AND NOT b.filePath IN [${fileList}] RETURN DISTINCT a.filePath AS fromFile, a.name AS fromName, b.filePath AS toFile, b.name AS toName LIMIT 30 `); const inRows = await executeQuery(REPO_ID, ` MATCH (a)-[:CodeRelation {type: 'CALLS'}]->(b) WHERE NOT a.filePath IN [${fileList}] AND b.filePath IN [${fileList}] RETURN DISTINCT a.filePath AS fromFile, a.name AS fromName, b.filePath AS toFile, b.name AS toName LIMIT 30 `); return { outgoing: outRows.map(r => ({ fromFile: r.fromFile || r[0], fromName: r.fromName || r[1], toFile: r.toFile || r[2], toName: r.toName || r[3], })), incoming: inRows.map(r => ({ fromFile: r.fromFile || r[0], fromName: r.fromName || r[1], toFile: r.toFile || r[2], toName: r.toName || r[3], })), }; } /** * Get processes (execution flows) that pass through a set of files. * Returns top N by step count. */ export async function getProcessesForFiles(filePaths: string[], limit = 5): Promise<ProcessInfo[]> { if (filePaths.length === 0) return []; const fileList = filePaths.map(f => `'${f.replace(/'/g, "''")}'`).join(', '); // Find processes that have steps in the given files const procRows = await executeQuery(REPO_ID, ` MATCH (s)-[r:CodeRelation {type: 'STEP_IN_PROCESS'}]->(p:Process) WHERE s.filePath IN [${fileList}] RETURN DISTINCT p.id AS id, p.heuristicLabel AS label, p.processType AS type, p.stepCount AS stepCount ORDER BY stepCount DESC LIMIT ${limit} `); const processes: ProcessInfo[] = []; for (const row of procRows) { const procId = row.id || row[0]; const label = row.label || row[1] || procId; const type = row.type || row[2] || 'unknown'; const stepCount = row.stepCount || row[3] || 0; // Get the full step trace for this process const stepRows = await executeQuery(REPO_ID, ` MATCH (s)-[r:CodeRelation {type: 'STEP_IN_PROCESS'}]->(p:Process {id: '${procId.replace(/'/g, "''")}'}) RETURN s.name AS name, s.filePath AS filePath, labels(s)[0] AS type, r.step AS step ORDER BY r.step `); processes.push({ id: procId, label, type, stepCount, steps: stepRows.map(s => ({ step: s.step || s[3] || 0, name: s.name || s[0], filePath: s.filePath || s[1], type: s.type || s[2], })), }); } return processes; } /** * Get all processes in the graph (for overview page). */ export async function getAllProcesses(limit = 20): Promise<ProcessInfo[]> { const procRows = await executeQuery(REPO_ID, ` MATCH (p:Process) RETURN p.id AS id, p.heuristicLabel AS label, p.processType AS type, p.stepCount AS stepCount ORDER BY stepCount DESC LIMIT ${limit} `); const processes: ProcessInfo[] = []; for (const row of procRows) { const procId = row.id || row[0]; const label = row.label || row[1] || procId; const type = row.type || row[2] || 'unknown'; const stepCount = row.stepCount || row[3] || 0; const stepRows = await executeQuery(REPO_ID, ` MATCH (s)-[r:CodeRelation {type: 'STEP_IN_PROCESS'}]->(p:Process {id: '${procId.replace(/'/g, "''")}'}) RETURN s.name AS name, s.filePath AS filePath, labels(s)[0] AS type, r.step AS step ORDER BY r.step `); processes.push({ id: procId, label, type, stepCount, steps: stepRows.map(s => ({ step: s.step || s[3] || 0, name: s.name || s[0], filePath: s.filePath || s[1], type: s.type || s[2], })), }); } return processes; } /** * Get inter-module edges for overview architecture diagram. * Groups call edges by source/target module. */ export async function getInterModuleEdgesForOverview( moduleFiles: Record<string, string[]> ): Promise<Array<{ from: string; to: string; count: number }>> { // Build file-to-module lookup const fileToModule = new Map<string, string>(); for (const [mod, files] of Object.entries(moduleFiles)) { for (const f of files) { fileToModule.set(f, mod); } } const allEdges = await getInterFileCallEdges(); const moduleEdgeCounts = new Map<string, number>(); for (const edge of allEdges) { const fromMod = fileToModule.get(edge.fromFile); const toMod = fileToModule.get(edge.toFile); if (fromMod && toMod && fromMod !== toMod) { const key = `${fromMod}|||${toMod}`; moduleEdgeCounts.set(key, (moduleEdgeCounts.get(key) || 0) + 1); } } return Array.from(moduleEdgeCounts.entries()) .map(([key, count]) => { const [from, to] = key.split('|||'); return { from, to, count }; }) .sort((a, b) => b.count - a.count); } ================================================ FILE: gitnexus/src/core/wiki/html-viewer.ts ================================================ /** * HTML Viewer Generator for Wiki * * Produces a self-contained index.html that embeds all markdown pages, * module tree, and metadata — viewable offline in any browser. */ import fs from 'fs/promises'; import path from 'path'; interface ModuleTreeNode { name: string; slug: string; files: string[]; children?: ModuleTreeNode[]; } /** * Generate the wiki HTML viewer (index.html) from existing markdown pages. */ export async function generateHTMLViewer( wikiDir: string, projectName: string, ): Promise<string> { // Load module tree let moduleTree: ModuleTreeNode[] = []; try { const raw = await fs.readFile(path.join(wikiDir, 'module_tree.json'), 'utf-8'); moduleTree = JSON.parse(raw); } catch { /* will show empty nav */ } // Load meta let meta: Record<string, unknown> | null = null; try { const raw = await fs.readFile(path.join(wikiDir, 'meta.json'), 'utf-8'); meta = JSON.parse(raw); } catch { /* no meta */ } // Read all markdown files into a { slug: content } map const pages: Record<string, string> = {}; const dirEntries = await fs.readdir(wikiDir); for (const f of dirEntries.filter(f => f.endsWith('.md'))) { const content = await fs.readFile(path.join(wikiDir, f), 'utf-8'); pages[f.replace(/\.md$/, '')] = content; } const html = buildHTML(projectName, moduleTree, pages, meta); const outputPath = path.join(wikiDir, 'index.html'); await fs.writeFile(outputPath, html, 'utf-8'); return outputPath; } // ─── HTML Builder ─────────────────────────────────────────────────────── function esc(text: string): string { return text .replace(/&/g, '&') .replace(/</g, '<') .replace(/>/g, '>') .replace(/"/g, '"'); } function buildHTML( projectName: string, moduleTree: ModuleTreeNode[], pages: Record<string, string>, meta: Record<string, unknown> | null, ): string { // Embed data as JSON inside the HTML const pagesJSON = JSON.stringify(pages); const treeJSON = JSON.stringify(moduleTree); const metaJSON = JSON.stringify(meta); const parts: string[] = []; // ── Head ── parts.push('<!DOCTYPE html>'); parts.push('<html lang="en">'); parts.push('<head>'); parts.push('<meta charset="UTF-8">'); parts.push('<meta name="viewport" content="width=device-width, initial-scale=1.0">'); parts.push('<title>' + esc(projectName) + ' — Wiki'); parts.push(' ================================================ FILE: gitnexus-web/package.json ================================================ { "name": "gitnexus", "private": true, "version": "0.0.0", "type": "module", "scripts": { "dev": "vite", "build": "tsc -b && vite build", "preview": "vite preview" }, "dependencies": { "@huggingface/transformers": "^3.0.0", "@isomorphic-git/lightning-fs": "^4.6.2", "@langchain/anthropic": "^1.3.10", "@langchain/core": "^1.1.15", "@langchain/google-genai": "^2.1.10", "@langchain/langgraph": "^1.1.0", "@langchain/ollama": "^1.2.0", "@langchain/openai": "^1.2.2", "@sigma/edge-curve": "^3.1.0", "@tailwindcss/vite": "^4.1.18", "axios": "^1.13.2", "buffer": "^6.0.3", "comlink": "^4.4.2", "d3": "^7.9.0", "graphology": "^0.26.0", "graphology-indices": "^0.17.0", "graphology-utils": "^2.3.0", "mnemonist": "^0.39.0", "pandemonium": "^2.4.0", "graphology-layout-force": "^0.2.4", "graphology-layout-forceatlas2": "^0.10.1", "graphology-layout-noverlap": "^0.4.2", "isomorphic-git": "^1.36.1", "jszip": "^3.10.1", "@ladybugdb/wasm-core": "^0.15.2", "langchain": "^1.2.10", "lru-cache": "^11.2.4", "lucide-react": "^0.562.0", "mermaid": "^11.12.2", "minisearch": "^7.2.0", "react": "^18.3.1", "react-dom": "^18.3.1", "react-markdown": "^10.1.0", "react-syntax-highlighter": "^16.1.0", "react-zoom-pan-pinch": "^3.7.0", "remark-gfm": "^4.0.1", "sigma": "^3.0.2", "tailwindcss": "^4.1.18", "uuid": "^13.0.0", "vite-plugin-top-level-await": "^1.6.0", "vite-plugin-wasm": "^3.5.0", "web-tree-sitter": "^0.20.8", "zod": "^3.25.76" }, "devDependencies": { "@babel/types": "^7.28.5", "@types/jszip": "^3.4.0", "@types/node": "^24.10.1", "@types/react": "^18.3.5", "@types/react-dom": "^18.3.0", "@types/react-syntax-highlighter": "^15.5.13", "@vercel/node": "^5.5.16", "@vitejs/plugin-react": "^5.1.0", "tree-sitter-wasms": "^0.1.13", "typescript": "^5.4.5", "vite": "^5.2.0", "vite-plugin-static-copy": "^3.1.4" } } ================================================ FILE: gitnexus-web/src/App.tsx ================================================ import { useCallback, useEffect, useRef } from 'react'; import { AppStateProvider, useAppState } from './hooks/useAppState'; import { DropZone } from './components/DropZone'; import { LoadingOverlay } from './components/LoadingOverlay'; import { Header } from './components/Header'; import { GraphCanvas, GraphCanvasHandle } from './components/GraphCanvas'; import { RightPanel } from './components/RightPanel'; import { SettingsPanel } from './components/SettingsPanel'; import { StatusBar } from './components/StatusBar'; import { FileTreePanel } from './components/FileTreePanel'; import { CodeReferencesPanel } from './components/CodeReferencesPanel'; import { FileEntry } from './services/zip'; import { getActiveProviderConfig } from './core/llm/settings-service'; import { createKnowledgeGraph } from './core/graph/graph'; import { connectToServer, fetchRepos, normalizeServerUrl, type ConnectToServerResult } from './services/server-connection'; const AppContent = () => { const { viewMode, setViewMode, setGraph, setFileContents, setProgress, setProjectName, progress, isRightPanelOpen, runPipeline, runPipelineFromFiles, isSettingsPanelOpen, setSettingsPanelOpen, refreshLLMSettings, initializeAgent, startEmbeddings, embeddingStatus, codeReferences, selectedNode, isCodePanelOpen, serverBaseUrl, setServerBaseUrl, availableRepos, setAvailableRepos, switchRepo, } = useAppState(); const graphCanvasRef = useRef(null); const handleFileSelect = useCallback(async (file: File) => { const projectName = file.name.replace('.zip', ''); setProjectName(projectName); setProgress({ phase: 'extracting', percent: 0, message: 'Starting...', detail: 'Preparing to extract files' }); setViewMode('loading'); try { const result = await runPipeline(file, (progress) => { setProgress(progress); }); setGraph(result.graph); setFileContents(result.fileContents); setViewMode('exploring'); // Initialize (or re-initialize) the agent AFTER a repo loads so it captures // the current codebase context (file contents + graph tools) in the worker. if (getActiveProviderConfig()) { initializeAgent(projectName); } // Auto-start embeddings pipeline in background // Uses WebGPU if available, falls back to WASM startEmbeddings().catch((err) => { if (err?.name === 'WebGPUNotAvailableError' || err?.message?.includes('WebGPU')) { startEmbeddings('wasm').catch(console.warn); } else { console.warn('Embeddings auto-start failed:', err); } }); } catch (error) { console.error('Pipeline error:', error); setProgress({ phase: 'error', percent: 0, message: 'Error processing file', detail: error instanceof Error ? error.message : 'Unknown error', }); setTimeout(() => { setViewMode('onboarding'); setProgress(null); }, 3000); } }, [setViewMode, setGraph, setFileContents, setProgress, setProjectName, runPipeline, startEmbeddings, initializeAgent]); const handleGitClone = useCallback(async (files: FileEntry[]) => { const firstPath = files[0]?.path || 'repository'; const projectName = firstPath.split('/')[0].replace(/-\d+$/, '') || 'repository'; setProjectName(projectName); setProgress({ phase: 'extracting', percent: 0, message: 'Starting...', detail: 'Preparing to process files' }); setViewMode('loading'); try { const result = await runPipelineFromFiles(files, (progress) => { setProgress(progress); }); setGraph(result.graph); setFileContents(result.fileContents); setViewMode('exploring'); if (getActiveProviderConfig()) { initializeAgent(projectName); } startEmbeddings().catch((err) => { if (err?.name === 'WebGPUNotAvailableError' || err?.message?.includes('WebGPU')) { startEmbeddings('wasm').catch(console.warn); } else { console.warn('Embeddings auto-start failed:', err); } }); } catch (error) { console.error('Pipeline error:', error); setProgress({ phase: 'error', percent: 0, message: 'Error processing repository', detail: error instanceof Error ? error.message : 'Unknown error', }); setTimeout(() => { setViewMode('onboarding'); setProgress(null); }, 3000); } }, [setViewMode, setGraph, setFileContents, setProgress, setProjectName, runPipelineFromFiles, startEmbeddings, initializeAgent]); const handleServerConnect = useCallback((result: ConnectToServerResult) => { // Extract project name from repoPath const repoPath = result.repoInfo.repoPath; const projectName = repoPath.split('/').pop() || 'server-project'; setProjectName(projectName); // Build KnowledgeGraph from server data (bypasses WASM pipeline entirely) const graph = createKnowledgeGraph(); for (const node of result.nodes) { graph.addNode(node); } for (const rel of result.relationships) { graph.addRelationship(rel); } setGraph(graph); // Set file contents from extracted File node content const fileMap = new Map(); for (const [path, content] of Object.entries(result.fileContents)) { fileMap.set(path, content); } setFileContents(fileMap); // Transition directly to exploring view setViewMode('exploring'); // Initialize agent if LLM is configured if (getActiveProviderConfig()) { initializeAgent(projectName); } // Auto-start embeddings startEmbeddings().catch((err) => { if (err?.name === 'WebGPUNotAvailableError' || err?.message?.includes('WebGPU')) { startEmbeddings('wasm').catch(console.warn); } else { console.warn('Embeddings auto-start failed:', err); } }); }, [setViewMode, setGraph, setFileContents, setProjectName, initializeAgent, startEmbeddings]); // Auto-connect when ?server query param is present (bookmarkable shortcut) const autoConnectRan = useRef(false); useEffect(() => { if (autoConnectRan.current) return; const params = new URLSearchParams(window.location.search); if (!params.has('server')) return; autoConnectRan.current = true; // Clean the URL so a refresh won't re-trigger const cleanUrl = window.location.pathname + window.location.hash; window.history.replaceState(null, '', cleanUrl); setProgress({ phase: 'extracting', percent: 0, message: 'Connecting to server...', detail: 'Validating server' }); setViewMode('loading'); const serverUrl = params.get('server') || window.location.origin; const baseUrl = normalizeServerUrl(serverUrl); connectToServer(serverUrl, (phase, downloaded, total) => { if (phase === 'validating') { setProgress({ phase: 'extracting', percent: 5, message: 'Connecting to server...', detail: 'Validating server' }); } else if (phase === 'downloading') { const pct = total ? Math.round((downloaded / total) * 90) + 5 : 50; const mb = (downloaded / (1024 * 1024)).toFixed(1); setProgress({ phase: 'extracting', percent: pct, message: 'Downloading graph...', detail: `${mb} MB downloaded` }); } else if (phase === 'extracting') { setProgress({ phase: 'extracting', percent: 97, message: 'Processing...', detail: 'Extracting file contents' }); } }).then(async (result) => { handleServerConnect(result); // Store server URL and fetch available repos for the repo switcher setServerBaseUrl(baseUrl); try { const repos = await fetchRepos(baseUrl); setAvailableRepos(repos); } catch (e) { console.warn('Failed to fetch repo list:', e); } }).catch((err) => { console.error('Auto-connect failed:', err); setProgress({ phase: 'error', percent: 0, message: 'Failed to connect to server', detail: err instanceof Error ? err.message : 'Unknown error', }); setTimeout(() => { setViewMode('onboarding'); setProgress(null); }, 3000); }); }, [handleServerConnect, setProgress, setViewMode, setServerBaseUrl, setAvailableRepos]); const handleFocusNode = useCallback((nodeId: string) => { graphCanvasRef.current?.focusNode(nodeId); }, []); // Handle settings saved - refresh and reinitialize agent // NOTE: Must be defined BEFORE any conditional returns (React hooks rule) const handleSettingsSaved = useCallback(() => { refreshLLMSettings(); initializeAgent(); }, [refreshLLMSettings, initializeAgent]); // Render based on view mode if (viewMode === 'onboarding') { return ( { handleServerConnect(result); if (serverUrl) { const baseUrl = normalizeServerUrl(serverUrl); setServerBaseUrl(baseUrl); try { const repos = await fetchRepos(baseUrl); setAvailableRepos(repos); } catch (e) { console.warn('Failed to fetch repo list:', e); } } }} /> ); } if (viewMode === 'loading' && progress) { return ; } // Exploring view return (
{/* Left Panel - File Tree */} {/* Graph area - takes remaining space */}
{/* Code References Panel (overlay) - does NOT resize the graph, it overlaps on top */} {isCodePanelOpen && (codeReferences.length > 0 || !!selectedNode) && (
)}
{/* Right Panel - Code & Chat (tabbed) */} {isRightPanelOpen && }
{/* Settings Panel (modal) */} setSettingsPanelOpen(false)} onSettingsSaved={handleSettingsSaved} />
); }; function App() { return ( ); } export default App; ================================================ FILE: gitnexus-web/src/components/BackendRepoSelector.tsx ================================================ import { Server, ArrowRight } from 'lucide-react'; import { BackendRepo } from '../services/backend'; interface BackendRepoSelectorProps { repos: BackendRepo[]; onSelectRepo: (repoName: string) => void; backendUrl: string; isConnected: boolean; } export const BackendRepoSelector = ({ repos, onSelectRepo, backendUrl, isConnected, }: BackendRepoSelectorProps) => { return (
{/* Icon */}
{/* Title */}

Local Repositories

Select an indexed repository from your local GitNexus server

{/* Connected status badge */} {isConnected && (
Connected to {backendUrl}
)} {/* Repo list or empty state */} {repos.length > 0 ? (
{repos.map((repo) => ( ))}
) : (

No indexed repositories found

Run{' '} gitnexus analyze{' '} in a repository

)} {/* Bottom hints */}
{repos.length} {repos.length === 1 ? 'repo' : 'repos'} Pre-indexed
); }; ================================================ FILE: gitnexus-web/src/components/CodeReferencesPanel.tsx ================================================ import { useCallback, useEffect, useMemo, useRef, useState } from 'react'; import { Code, PanelLeftClose, PanelLeft, Trash2, X, Target, FileCode, Sparkles, MousePointerClick } from 'lucide-react'; import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter'; import { vscDarkPlus } from 'react-syntax-highlighter/dist/esm/styles/prism'; import { useAppState } from '../hooks/useAppState'; import { NODE_COLORS } from '../lib/constants'; /** Map file extension to Prism syntax highlighter language identifier */ const getSyntaxLanguage = (filePath: string | undefined): string => { if (!filePath) return 'text'; const ext = filePath.split('.').pop()?.toLowerCase(); switch (ext) { case 'js': case 'jsx': case 'mjs': case 'cjs': return 'javascript'; case 'ts': case 'tsx': case 'mts': case 'cts': return 'typescript'; case 'py': case 'pyw': return 'python'; case 'rb': case 'rake': case 'gemspec': return 'ruby'; case 'java': return 'java'; case 'go': return 'go'; case 'rs': return 'rust'; case 'c': case 'h': return 'c'; case 'cpp': case 'cc': case 'cxx': case 'hpp': case 'hxx': case 'hh': return 'cpp'; case 'cs': return 'csharp'; case 'php': return 'php'; case 'kt': case 'kts': return 'kotlin'; case 'swift': return 'swift'; case 'json': return 'json'; case 'yaml': case 'yml': return 'yaml'; case 'md': case 'mdx': return 'markdown'; case 'html': case 'htm': case 'erb': return 'markup'; case 'css': case 'scss': case 'sass': return 'css'; case 'sh': case 'bash': case 'zsh': return 'bash'; case 'sql': return 'sql'; case 'xml': return 'xml'; default: break; } // Handle extensionless Ruby files const basename = filePath.split('/').pop() || ''; if (['Rakefile', 'Gemfile', 'Guardfile', 'Vagrantfile', 'Brewfile'].includes(basename)) return 'ruby'; if (['Makefile'].includes(basename)) return 'makefile'; if (['Dockerfile'].includes(basename)) return 'docker'; return 'text'; }; // Match the code theme used elsewhere in the app const customTheme = { ...vscDarkPlus, 'pre[class*="language-"]': { ...vscDarkPlus['pre[class*="language-"]'], background: '#0a0a10', margin: 0, padding: '12px 0', fontSize: '13px', lineHeight: '1.6', }, 'code[class*="language-"]': { ...vscDarkPlus['code[class*="language-"]'], background: 'transparent', fontFamily: '"JetBrains Mono", "Fira Code", monospace', }, }; export interface CodeReferencesPanelProps { onFocusNode: (nodeId: string) => void; } export const CodeReferencesPanel = ({ onFocusNode }: CodeReferencesPanelProps) => { const { graph, fileContents, selectedNode, codeReferences, removeCodeReference, clearCodeReferences, setSelectedNode, codeReferenceFocus, } = useAppState(); const [isCollapsed, setIsCollapsed] = useState(false); const [glowRefId, setGlowRefId] = useState(null); const panelRef = useRef(null); const resizeRef = useRef<{ startX: number; startWidth: number } | null>(null); const refCardEls = useRef>(new Map()); const glowTimerRef = useRef(null); useEffect(() => { return () => { if (glowTimerRef.current) { window.clearTimeout(glowTimerRef.current); glowTimerRef.current = null; } }; }, []); const [panelWidth, setPanelWidth] = useState(() => { try { const saved = window.localStorage.getItem('gitnexus.codePanelWidth'); const parsed = saved ? parseInt(saved, 10) : NaN; if (!Number.isFinite(parsed)) return 560; // increased default return Math.max(420, Math.min(parsed, 900)); } catch { return 560; } }); useEffect(() => { try { window.localStorage.setItem('gitnexus.codePanelWidth', String(panelWidth)); } catch { // ignore } }, [panelWidth]); const startResize = useCallback((e: React.MouseEvent) => { e.preventDefault(); e.stopPropagation(); resizeRef.current = { startX: e.clientX, startWidth: panelWidth }; document.body.style.cursor = 'col-resize'; document.body.style.userSelect = 'none'; const onMove = (ev: MouseEvent) => { const state = resizeRef.current; if (!state) return; const delta = ev.clientX - state.startX; const next = Math.max(420, Math.min(state.startWidth + delta, 900)); setPanelWidth(next); }; const onUp = () => { resizeRef.current = null; document.body.style.cursor = ''; document.body.style.userSelect = ''; window.removeEventListener('mousemove', onMove); window.removeEventListener('mouseup', onUp); }; window.addEventListener('mousemove', onMove); window.addEventListener('mouseup', onUp); }, [panelWidth]); const aiReferences = useMemo(() => codeReferences.filter(r => r.source === 'ai'), [codeReferences]); // When the user clicks a citation badge in chat, focus the corresponding snippet card: // - expand the panel if collapsed // - smooth-scroll the card into view // - briefly glow it for discoverability useEffect(() => { if (!codeReferenceFocus) return; // Ensure panel is expanded setIsCollapsed(false); const { filePath, startLine, endLine } = codeReferenceFocus; const target = aiReferences.find(r => r.filePath === filePath && r.startLine === startLine && r.endLine === endLine ) ?? aiReferences.find(r => r.filePath === filePath); if (!target) return; // Double rAF: wait for collapse state + list DOM to render. requestAnimationFrame(() => { requestAnimationFrame(() => { const el = refCardEls.current.get(target.id); if (!el) return; el.scrollIntoView({ behavior: 'smooth', block: 'center' }); setGlowRefId(target.id); if (glowTimerRef.current) { window.clearTimeout(glowTimerRef.current); } glowTimerRef.current = window.setTimeout(() => { setGlowRefId((prev) => (prev === target.id ? null : prev)); glowTimerRef.current = null; }, 1200); }); }); }, [codeReferenceFocus?.ts, aiReferences]); const refsWithSnippets = useMemo(() => { return aiReferences.map((ref) => { const content = fileContents.get(ref.filePath); if (!content) { return { ref, content: null as string | null, start: 0, end: 0, highlightStart: 0, highlightEnd: 0, totalLines: 0 }; } const lines = content.split('\n'); const totalLines = lines.length; const startLine = ref.startLine ?? 0; const endLine = ref.endLine ?? startLine; const contextBefore = 3; const contextAfter = 20; const start = Math.max(0, startLine - contextBefore); const end = Math.min(totalLines - 1, endLine + contextAfter); return { ref, content: lines.slice(start, end + 1).join('\n'), start, end, highlightStart: Math.max(0, startLine - start), highlightEnd: Math.max(0, endLine - start), totalLines, }; }); }, [aiReferences, fileContents]); const selectedFilePath = selectedNode?.properties?.filePath; const selectedFileContent = selectedFilePath ? fileContents.get(selectedFilePath) : undefined; const selectedIsFile = selectedNode?.label === 'File' && !!selectedFilePath; const showSelectedViewer = !!selectedNode && !!selectedFilePath; const showCitations = aiReferences.length > 0; if (isCollapsed) { return ( ); } return ( ); }; ================================================ FILE: gitnexus-web/src/components/DropZone.tsx ================================================ import { useState, useCallback, useRef, DragEvent } from 'react'; import { Upload, FileArchive, Github, Loader2, ArrowRight, Key, Eye, EyeOff, Globe, X } from 'lucide-react'; import { cloneRepository, parseGitHubUrl } from '../services/git-clone'; import { connectToServer, type ConnectToServerResult } from '../services/server-connection'; import { FileEntry } from '../services/zip'; interface DropZoneProps { onFileSelect: (file: File) => void; onGitClone?: (files: FileEntry[]) => void; onServerConnect?: (result: ConnectToServerResult, serverUrl?: string) => void; } function formatBytes(bytes: number): string { if (bytes < 1024) return `${bytes} B`; if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`; return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; } export const DropZone = ({ onFileSelect, onGitClone, onServerConnect }: DropZoneProps) => { const [isDragging, setIsDragging] = useState(false); const [activeTab, setActiveTab] = useState<'zip' | 'github' | 'server'>('zip'); const [githubUrl, setGithubUrl] = useState(''); const [githubToken, setGithubToken] = useState(''); const [showToken, setShowToken] = useState(false); const [isCloning, setIsCloning] = useState(false); const [cloneProgress, setCloneProgress] = useState({ phase: '', percent: 0 }); const [error, setError] = useState(null); // Server tab state const [serverUrl, setServerUrl] = useState(() => localStorage.getItem('gitnexus-server-url') || '' ); const [isConnecting, setIsConnecting] = useState(false); const [serverProgress, setServerProgress] = useState<{ phase: string; downloaded: number; total: number | null; }>({ phase: '', downloaded: 0, total: null }); const abortControllerRef = useRef(null); const handleDragOver = useCallback((e: DragEvent) => { e.preventDefault(); e.stopPropagation(); setIsDragging(true); }, []); const handleDragLeave = useCallback((e: DragEvent) => { e.preventDefault(); e.stopPropagation(); setIsDragging(false); }, []); const handleDrop = useCallback((e: DragEvent) => { e.preventDefault(); e.stopPropagation(); setIsDragging(false); const files = e.dataTransfer.files; if (files.length > 0) { const file = files[0]; if (file.name.endsWith('.zip')) { onFileSelect(file); } else { setError('Please drop a .zip file'); } } }, [onFileSelect]); const handleFileInput = useCallback((e: React.ChangeEvent) => { const files = e.target.files; if (files && files.length > 0) { const file = files[0]; if (file.name.endsWith('.zip')) { onFileSelect(file); } else { setError('Please select a .zip file'); } } }, [onFileSelect]); const handleGitClone = async () => { if (!githubUrl.trim()) { setError('Please enter a GitHub URL'); return; } const parsed = parseGitHubUrl(githubUrl); if (!parsed) { setError('Invalid GitHub URL. Use format: https://github.com/owner/repo'); return; } setError(null); setIsCloning(true); setCloneProgress({ phase: 'starting', percent: 0 }); try { const files = await cloneRepository( githubUrl, (phase, percent) => setCloneProgress({ phase, percent }), githubToken || undefined ); setGithubToken(''); if (onGitClone) { onGitClone(files); } } catch (err) { console.error('Clone failed:', err); const message = err instanceof Error ? err.message : 'Failed to clone repository'; if (message.includes('401') || message.includes('403') || message.includes('Authentication')) { if (!githubToken) { setError('This looks like a private repo. Add a GitHub PAT (Personal Access Token) to access it.'); } else { setError('Authentication failed. Check your token permissions (needs repo access).'); } } else if (message.includes('404') || message.includes('not found')) { setError('Repository not found. Check the URL or it might be private (needs PAT).'); } else { setError(message); } } finally { setIsCloning(false); } }; const handleServerConnect = async () => { const urlToUse = serverUrl.trim() || window.location.origin; if (!urlToUse) { setError('Please enter a server URL'); return; } // Persist URL to localStorage localStorage.setItem('gitnexus-server-url', serverUrl); setError(null); setIsConnecting(true); setServerProgress({ phase: 'validating', downloaded: 0, total: null }); const abortController = new AbortController(); abortControllerRef.current = abortController; try { const result = await connectToServer( urlToUse, (phase, downloaded, total) => { setServerProgress({ phase, downloaded, total }); }, abortController.signal ); if (onServerConnect) { onServerConnect(result, urlToUse); } } catch (err) { if ((err as Error).name === 'AbortError') { // User cancelled return; } console.error('Server connect failed:', err); const message = err instanceof Error ? err.message : 'Failed to connect to server'; if (message.includes('Failed to fetch') || message.includes('NetworkError')) { setError('Cannot reach server. Check the URL and ensure the server is running.'); } else { setError(message); } } finally { setIsConnecting(false); abortControllerRef.current = null; } }; const handleCancelConnect = () => { abortControllerRef.current?.abort(); setIsConnecting(false); }; const serverProgressPercent = serverProgress.total ? Math.round((serverProgress.downloaded / serverProgress.total) * 100) : null; return (
{/* Background gradient effects */}
{/* Tab Switcher */}
{/* Error Message */} {error && (
{error}
)} {/* ZIP Upload Tab */} {activeTab === 'zip' && ( <>
document.getElementById('file-input')?.click()} > {/* Icon */}
{isDragging ? ( ) : ( )}
{/* Text */}

{isDragging ? 'Drop it here!' : 'Drop your codebase'}

Drag & drop a .zip file to generate a knowledge graph

{/* Hints */}
.zip
)} {/* GitHub URL Tab */} {activeTab === 'github' && (
{/* Icon */}
{/* Text */}

Clone from GitHub

Enter a repository URL to clone directly

{/* Inputs - wrapped in div to prevent form autofill */}
setGithubUrl(e.target.value)} onKeyDown={(e) => e.key === 'Enter' && !isCloning && handleGitClone()} placeholder="https://github.com/owner/repo" disabled={isCloning} autoComplete="off" data-lpignore="true" data-1p-ignore="true" data-form-type="other" className=" w-full px-4 py-3 bg-elevated border border-border-default rounded-xl text-text-primary placeholder-text-muted focus:outline-none focus:border-accent focus:ring-1 focus:ring-accent disabled:opacity-50 disabled:cursor-not-allowed transition-all duration-200 " /> {/* Token input for private repos */}
setGithubToken(e.target.value)} placeholder="GitHub PAT (optional, for private repos)" disabled={isCloning} autoComplete="new-password" data-lpignore="true" data-1p-ignore="true" data-form-type="other" className=" w-full pl-10 pr-10 py-3 bg-elevated border border-border-default rounded-xl text-text-primary placeholder-text-muted focus:outline-none focus:border-accent focus:ring-1 focus:ring-accent disabled:opacity-50 disabled:cursor-not-allowed transition-all duration-200 " />
{/* Progress bar */} {isCloning && (
)} {/* Security note */} {githubToken && (

Token stays in your browser only, never sent to any server

)} {/* Hints */}
{githubToken ? 'Private + Public' : 'Public repos'} Shallow clone
)} {/* Server Tab */} {activeTab === 'server' && (
{/* Icon */}
{/* Text */}

Connect to Server

Load a pre-built knowledge graph from a running GitNexus server

{/* Inputs */}
setServerUrl(e.target.value)} onKeyDown={(e) => e.key === 'Enter' && !isConnecting && handleServerConnect()} placeholder={window.location.origin} disabled={isConnecting} autoComplete="off" data-lpignore="true" data-1p-ignore="true" data-form-type="other" className=" w-full px-4 py-3 bg-elevated border border-border-default rounded-xl text-text-primary placeholder-text-muted focus:outline-none focus:border-accent focus:ring-1 focus:ring-accent disabled:opacity-50 disabled:cursor-not-allowed transition-all duration-200 " />
{isConnecting && ( )}
{/* Progress bar */} {isConnecting && serverProgress.phase === 'downloading' && (
{serverProgress.total && (

{formatBytes(serverProgress.downloaded)} / {formatBytes(serverProgress.total)}

)}
)} {/* Hints */}
Pre-indexed No WASM needed
)}
); }; ================================================ FILE: gitnexus-web/src/components/EmbeddingStatus.tsx ================================================ import { Brain, Loader2, Check, AlertCircle, Zap, FlaskConical } from 'lucide-react'; import { useAppState } from '../hooks/useAppState'; import { useState } from 'react'; import { WebGPUFallbackDialog } from './WebGPUFallbackDialog'; /** * Embedding status indicator and trigger button * Shows in header when graph is loaded */ export const EmbeddingStatus = () => { const { embeddingStatus, embeddingProgress, startEmbeddings, graph, viewMode, serverBaseUrl, testArrayParams, } = useAppState(); const [testResult, setTestResult] = useState(null); const [showFallbackDialog, setShowFallbackDialog] = useState(false); // Only show when exploring a loaded graph; hide in backend mode (no WASM DB) if (viewMode !== 'exploring' || !graph || serverBaseUrl) return null; const nodeCount = graph.nodes.length; const handleStartEmbeddings = async (forceDevice?: 'webgpu' | 'wasm') => { try { await startEmbeddings(forceDevice); } catch (error: any) { // Check if it's a WebGPU not available error if (error?.name === 'WebGPUNotAvailableError' || error?.message?.includes('WebGPU not available')) { setShowFallbackDialog(true); } else { console.error('Embedding failed:', error); } } }; const handleUseCPU = () => { setShowFallbackDialog(false); handleStartEmbeddings('wasm'); }; const handleSkipEmbeddings = () => { setShowFallbackDialog(false); // Just close - user can try again later if they want }; const handleTestArrayParams = async () => { setTestResult('Testing...'); const result = await testArrayParams(); if (result.success) { setTestResult('✅ Array params WORK!'); console.log('✅ Array params test passed!'); } else { setTestResult(`❌ ${result.error}`); console.error('❌ Array params test failed:', result.error); } }; // WebGPU fallback dialog - rendered independently of state const fallbackDialog = ( setShowFallbackDialog(false)} onUseCPU={handleUseCPU} onSkip={handleSkipEmbeddings} nodeCount={nodeCount} /> ); // Idle state - show button to start if (embeddingStatus === 'idle') { return ( <>
{/* Test button (dev only) */} {import.meta.env.DEV && ( )}
{fallbackDialog} ); } // Loading model if (embeddingStatus === 'loading') { const downloadPercent = embeddingProgress?.modelDownloadPercent ?? 0; return ( <>
Loading AI model...
{fallbackDialog} ); } // Embedding in progress if (embeddingStatus === 'embedding') { const processed = embeddingProgress?.nodesProcessed ?? 0; const total = embeddingProgress?.totalNodes ?? 0; const percent = embeddingProgress?.percent ?? 0; return (
Embedding {processed}/{total} nodes
); } // Indexing if (embeddingStatus === 'indexing') { return (
Creating vector index...
); } // Ready if (embeddingStatus === 'ready') { return (
Semantic Ready
); } // Error if (embeddingStatus === 'error') { return ( <> {fallbackDialog} ); } return null; }; ================================================ FILE: gitnexus-web/src/components/FileTreePanel.tsx ================================================ import { useState, useMemo, useCallback, useEffect } from 'react'; import { ChevronRight, ChevronDown, Folder, FolderOpen, FileCode, Search, Filter, PanelLeftClose, PanelLeft, Box, Braces, Variable, Hash, Target, } from 'lucide-react'; import { useAppState } from '../hooks/useAppState'; import { FILTERABLE_LABELS, NODE_COLORS, ALL_EDGE_TYPES, EDGE_INFO, type EdgeType } from '../lib/constants'; import { GraphNode, NodeLabel } from '../core/graph/types'; // Tree node structure interface TreeNode { id: string; name: string; type: 'folder' | 'file'; path: string; children: TreeNode[]; graphNode?: GraphNode; } // Build tree from graph nodes const buildFileTree = (nodes: GraphNode[]): TreeNode[] => { const root: TreeNode[] = []; const pathMap = new Map(); // Filter to only folders and files const fileNodes = nodes.filter(n => n.label === 'Folder' || n.label === 'File'); // Sort by path to ensure parents come before children fileNodes.sort((a, b) => a.properties.filePath.localeCompare(b.properties.filePath)); fileNodes.forEach(node => { const parts = node.properties.filePath.split('/').filter(Boolean); let currentPath = ''; let currentLevel = root; parts.forEach((part, index) => { currentPath = currentPath ? `${currentPath}/${part}` : part; let existing = pathMap.get(currentPath); if (!existing) { const isLastPart = index === parts.length - 1; const isFile = isLastPart && node.label === 'File'; existing = { id: isLastPart ? node.id : currentPath, name: part, type: isFile ? 'file' : 'folder', path: currentPath, children: [], graphNode: isLastPart ? node : undefined, }; pathMap.set(currentPath, existing); currentLevel.push(existing); } currentLevel = existing.children; }); }); return root; }; // Tree item component interface TreeItemProps { node: TreeNode; depth: number; searchQuery: string; onNodeClick: (node: TreeNode) => void; expandedPaths: Set; toggleExpanded: (path: string) => void; selectedPath: string | null; } const TreeItem = ({ node, depth, searchQuery, onNodeClick, expandedPaths, toggleExpanded, selectedPath, }: TreeItemProps) => { const isExpanded = expandedPaths.has(node.path); const isSelected = selectedPath === node.path; const hasChildren = node.children.length > 0; // Filter children based on search const filteredChildren = useMemo(() => { if (!searchQuery) return node.children; return node.children.filter(child => child.name.toLowerCase().includes(searchQuery.toLowerCase()) || child.children.some(c => c.name.toLowerCase().includes(searchQuery.toLowerCase())) ); }, [node.children, searchQuery]); // Check if this node matches search const matchesSearch = searchQuery && node.name.toLowerCase().includes(searchQuery.toLowerCase()); const handleClick = () => { if (hasChildren) { toggleExpanded(node.path); } onNodeClick(node); }; return (
{/* Children */} {isExpanded && filteredChildren.length > 0 && (
{filteredChildren.map(child => ( ))}
)}
); }; // Icon for node types const getNodeTypeIcon = (label: NodeLabel) => { switch (label) { case 'Folder': return Folder; case 'File': return FileCode; case 'Class': return Box; case 'Function': return Braces; case 'Method': return Braces; case 'Interface': return Hash; case 'Import': return FileCode; default: return Variable; } }; interface FileTreePanelProps { onFocusNode: (nodeId: string) => void; } export const FileTreePanel = ({ onFocusNode }: FileTreePanelProps) => { const { graph, visibleLabels, toggleLabelVisibility, visibleEdgeTypes, toggleEdgeVisibility, selectedNode, setSelectedNode, openCodePanel, depthFilter, setDepthFilter } = useAppState(); const [isCollapsed, setIsCollapsed] = useState(false); const [searchQuery, setSearchQuery] = useState(''); const [expandedPaths, setExpandedPaths] = useState>(new Set()); const [activeTab, setActiveTab] = useState<'files' | 'filters'>('files'); // Build file tree from graph const fileTree = useMemo(() => { if (!graph) return []; return buildFileTree(graph.nodes); }, [graph]); // Auto-expand first level on initial load useEffect(() => { if (fileTree.length > 0 && expandedPaths.size === 0) { const firstLevel = new Set(fileTree.map(n => n.path)); setExpandedPaths(firstLevel); } }, [fileTree.length]); // Only run when tree first loads // Auto-expand to selected file when selectedNode changes (e.g., from graph click) useEffect(() => { const path = selectedNode?.properties?.filePath; if (!path) return; // Expand all parent folders leading to this file const parts = path.split('/').filter(Boolean); const pathsToExpand: string[] = []; let currentPath = ''; // Build all parent paths (exclude the last part if it's a file) for (let i = 0; i < parts.length - 1; i++) { currentPath = currentPath ? `${currentPath}/${parts[i]}` : parts[i]; pathsToExpand.push(currentPath); } if (pathsToExpand.length > 0) { setExpandedPaths(prev => { const next = new Set(prev); pathsToExpand.forEach(p => next.add(p)); return next; }); } }, [selectedNode?.id]); // Trigger when selected node changes const toggleExpanded = useCallback((path: string) => { setExpandedPaths(prev => { const next = new Set(prev); if (next.has(path)) { next.delete(path); } else { next.add(path); } return next; }); }, []); const handleNodeClick = useCallback((treeNode: TreeNode) => { if (treeNode.graphNode) { // Only focus if selecting a different node const isSameNode = selectedNode?.id === treeNode.graphNode.id; setSelectedNode(treeNode.graphNode); openCodePanel(); if (!isSameNode) { onFocusNode(treeNode.graphNode.id); } } }, [setSelectedNode, openCodePanel, onFocusNode, selectedNode]); const selectedPath = selectedNode?.properties.filePath || null; if (isCollapsed) { return (
); } return (
{/* Header */}
{activeTab === 'files' && ( <> {/* Search */}
setSearchQuery(e.target.value)} className="w-full pl-8 pr-3 py-1.5 bg-elevated border border-border-subtle rounded text-xs text-text-primary placeholder:text-text-muted focus:outline-none focus:border-accent" />
{/* File tree */}
{fileTree.length === 0 ? (
No files loaded
) : ( fileTree.map(node => ( )) )}
)} {activeTab === 'filters' && (

Node Types

Toggle visibility of node types in the graph

{FILTERABLE_LABELS.map((label) => { const Icon = getNodeTypeIcon(label); const isVisible = visibleLabels.includes(label); return ( ); })}
{/* Edge Type Toggles */}

Edge Types

Toggle visibility of relationship types

{ALL_EDGE_TYPES.map((edgeType) => { const info = EDGE_INFO[edgeType]; const isVisible = visibleEdgeTypes.includes(edgeType); return ( ); })}
{/* Depth Filter */}

Focus Depth

Show nodes within N hops of selection

{[ { value: null, label: 'All' }, { value: 1, label: '1 hop' }, { value: 2, label: '2 hops' }, { value: 3, label: '3 hops' }, { value: 5, label: '5 hops' }, ].map(({ value, label }) => ( ))}
{depthFilter !== null && !selectedNode && (

Select a node to apply depth filter

)}
{/* Legend */}

Color Legend

{(['Folder', 'File', 'Class', 'Function', 'Interface', 'Method'] as NodeLabel[]).map(label => (
{label}
))}
)} {/* Stats footer */} {graph && (
{graph.nodes.length} nodes {graph.relationships.length} edges
)}
); }; ================================================ FILE: gitnexus-web/src/components/GraphCanvas.tsx ================================================ import { useEffect, useCallback, useMemo, useState, forwardRef, useImperativeHandle } from 'react'; import { ZoomIn, ZoomOut, Maximize2, Focus, RotateCcw, Play, Pause, Lightbulb, LightbulbOff } from 'lucide-react'; import { useSigma } from '../hooks/useSigma'; import { useAppState } from '../hooks/useAppState'; import { knowledgeGraphToGraphology, filterGraphByDepth, SigmaNodeAttributes, SigmaEdgeAttributes } from '../lib/graph-adapter'; import { QueryFAB } from './QueryFAB'; import Graph from 'graphology'; export interface GraphCanvasHandle { focusNode: (nodeId: string) => void; } export const GraphCanvas = forwardRef((_, ref) => { const { graph, setSelectedNode, selectedNode: appSelectedNode, visibleLabels, visibleEdgeTypes, openCodePanel, depthFilter, highlightedNodeIds, setHighlightedNodeIds, aiCitationHighlightedNodeIds, aiToolHighlightedNodeIds, blastRadiusNodeIds, isAIHighlightsEnabled, toggleAIHighlights, animatedNodes, } = useAppState(); const [hoveredNodeName, setHoveredNodeName] = useState(null); const effectiveHighlightedNodeIds = useMemo(() => { if (!isAIHighlightsEnabled) return highlightedNodeIds; const next = new Set(highlightedNodeIds); for (const id of aiCitationHighlightedNodeIds) next.add(id); for (const id of aiToolHighlightedNodeIds) next.add(id); // Note: blast radius nodes are handled separately with red color return next; }, [highlightedNodeIds, aiCitationHighlightedNodeIds, aiToolHighlightedNodeIds, isAIHighlightsEnabled]); // Blast radius nodes (only when AI highlights enabled) const effectiveBlastRadiusNodeIds = useMemo(() => { if (!isAIHighlightsEnabled) return new Set(); return blastRadiusNodeIds; }, [blastRadiusNodeIds, isAIHighlightsEnabled]); // Animated nodes (only when AI highlights enabled) const effectiveAnimatedNodes = useMemo(() => { if (!isAIHighlightsEnabled) return new Map(); return animatedNodes; }, [animatedNodes, isAIHighlightsEnabled]); const handleNodeClick = useCallback((nodeId: string) => { if (!graph) return; const node = graph.nodes.find(n => n.id === nodeId); if (node) { setSelectedNode(node); openCodePanel(); } }, [graph, setSelectedNode, openCodePanel]); const handleNodeHover = useCallback((nodeId: string | null) => { if (!nodeId || !graph) { setHoveredNodeName(null); return; } const node = graph.nodes.find(n => n.id === nodeId); if (node) { setHoveredNodeName(node.properties.name); } }, [graph]); const handleStageClick = useCallback(() => { setSelectedNode(null); }, [setSelectedNode]); const { containerRef, sigmaRef, setGraph: setSigmaGraph, zoomIn, zoomOut, resetZoom, focusNode, isLayoutRunning, startLayout, stopLayout, selectedNode: sigmaSelectedNode, setSelectedNode: setSigmaSelectedNode, } = useSigma({ onNodeClick: handleNodeClick, onNodeHover: handleNodeHover, onStageClick: handleStageClick, highlightedNodeIds: effectiveHighlightedNodeIds, blastRadiusNodeIds: effectiveBlastRadiusNodeIds, animatedNodes: effectiveAnimatedNodes, visibleEdgeTypes, }); // Expose focusNode to parent via ref useImperativeHandle(ref, () => ({ focusNode: (nodeId: string) => { // Also update app state so the selection syncs properly if (graph) { const node = graph.nodes.find(n => n.id === nodeId); if (node) { setSelectedNode(node); openCodePanel(); } } focusNode(nodeId); } }), [focusNode, graph, setSelectedNode, openCodePanel]); // Update Sigma graph when KnowledgeGraph changes useEffect(() => { if (!graph) return; // Build communityMemberships map from MEMBER_OF relationships // MEMBER_OF edges: nodeId -> communityId (stored as targetId) const communityMemberships = new Map(); graph.relationships.forEach(rel => { if (rel.type === 'MEMBER_OF') { // Find the community node to get its index const communityNode = graph.nodes.find(n => n.id === rel.targetId && n.label === 'Community'); if (communityNode) { // Extract community index from id (e.g., "comm_5" -> 5) const communityIdx = parseInt(rel.targetId.replace('comm_', ''), 10) || 0; communityMemberships.set(rel.sourceId, communityIdx); } } }); const sigmaGraph = knowledgeGraphToGraphology(graph, communityMemberships); setSigmaGraph(sigmaGraph); }, [graph, setSigmaGraph]); // Update node visibility when filters change useEffect(() => { const sigma = sigmaRef.current; if (!sigma) return; const sigmaGraph = sigma.getGraph() as Graph; if (sigmaGraph.order === 0) return; // Don't filter empty graph filterGraphByDepth(sigmaGraph, appSelectedNode?.id || null, depthFilter, visibleLabels); sigma.refresh(); }, [visibleLabels, depthFilter, appSelectedNode, sigmaRef]); // Sync app selected node with sigma useEffect(() => { if (appSelectedNode) { setSigmaSelectedNode(appSelectedNode.id); } else { setSigmaSelectedNode(null); } }, [appSelectedNode, setSigmaSelectedNode]); // Focus on selected node const handleFocusSelected = useCallback(() => { if (appSelectedNode) { focusNode(appSelectedNode.id); } }, [appSelectedNode, focusNode]); // Clear selection const handleClearSelection = useCallback(() => { setSelectedNode(null); setSigmaSelectedNode(null); resetZoom(); }, [setSelectedNode, setSigmaSelectedNode, resetZoom]); return (
{/* Background gradient */}
{/* Sigma container */}
{/* Hovered node tooltip - only show when NOT selected */} {hoveredNodeName && !sigmaSelectedNode && (
{hoveredNodeName}
)} {/* Selection info bar */} {sigmaSelectedNode && appSelectedNode && (
{appSelectedNode.properties.name} ({appSelectedNode.label})
)} {/* Graph Controls - Bottom Right */}
{/* Divider */}
{/* Focus on selected */} {appSelectedNode && ( )} {/* Clear selection */} {sigmaSelectedNode && ( )} {/* Divider */}
{/* Layout control */}
{/* Layout running indicator */} {isLayoutRunning && (
Layout optimizing...
)} {/* Query FAB */} {/* AI Highlights toggle - Top Right */}
); }); GraphCanvas.displayName = 'GraphCanvas'; ================================================ FILE: gitnexus-web/src/components/Header.tsx ================================================ import { Search, Settings, HelpCircle, Sparkles, Github, Star, ChevronDown } from 'lucide-react'; import { useAppState } from '../hooks/useAppState'; import type { RepoSummary } from '../services/server-connection'; import { useState, useMemo, useRef, useEffect, useCallback } from 'react'; import { GraphNode } from '../core/graph/types'; import { EmbeddingStatus } from './EmbeddingStatus'; // Color mapping for node types in search results const NODE_TYPE_COLORS: Record = { Folder: '#6366f1', File: '#3b82f6', Function: '#10b981', Class: '#f59e0b', Method: '#14b8a6', Interface: '#ec4899', Variable: '#64748b', Import: '#475569', Type: '#a78bfa', }; interface HeaderProps { onFocusNode?: (nodeId: string) => void; availableRepos?: RepoSummary[]; onSwitchRepo?: (repoName: string) => void; } export const Header = ({ onFocusNode, availableRepos = [], onSwitchRepo }: HeaderProps) => { const { projectName, graph, openChatPanel, isRightPanelOpen, rightPanelTab, setSettingsPanelOpen, } = useAppState(); const [isRepoDropdownOpen, setIsRepoDropdownOpen] = useState(false); const repoDropdownRef = useRef(null); const [searchQuery, setSearchQuery] = useState(''); const [isSearchOpen, setIsSearchOpen] = useState(false); const [selectedIndex, setSelectedIndex] = useState(0); const searchRef = useRef(null); const inputRef = useRef(null); const nodeCount = graph?.nodes.length ?? 0; const edgeCount = graph?.relationships.length ?? 0; // Search results - filter nodes by name const searchResults = useMemo(() => { if (!graph || !searchQuery.trim()) return []; const query = searchQuery.toLowerCase(); return graph.nodes .filter(node => node.properties.name.toLowerCase().includes(query)) .slice(0, 10); // Limit to 10 results }, [graph, searchQuery]); // Handle clicking outside to close dropdowns useEffect(() => { const handleClickOutside = (e: MouseEvent) => { if (searchRef.current && !searchRef.current.contains(e.target as Node)) { setIsSearchOpen(false); } if (repoDropdownRef.current && !repoDropdownRef.current.contains(e.target as Node)) { setIsRepoDropdownOpen(false); } }; document.addEventListener('mousedown', handleClickOutside); return () => document.removeEventListener('mousedown', handleClickOutside); }, []); // Keyboard shortcut (Cmd+K / Ctrl+K) useEffect(() => { const handleKeyDown = (e: KeyboardEvent) => { if ((e.metaKey || e.ctrlKey) && e.key === 'k') { e.preventDefault(); inputRef.current?.focus(); setIsSearchOpen(true); } if (e.key === 'Escape') { setIsSearchOpen(false); inputRef.current?.blur(); } }; document.addEventListener('keydown', handleKeyDown); return () => document.removeEventListener('keydown', handleKeyDown); }, []); // Handle keyboard navigation in results const handleKeyDown = (e: React.KeyboardEvent) => { if (!isSearchOpen || searchResults.length === 0) return; if (e.key === 'ArrowDown') { e.preventDefault(); setSelectedIndex(i => Math.min(i + 1, searchResults.length - 1)); } else if (e.key === 'ArrowUp') { e.preventDefault(); setSelectedIndex(i => Math.max(i - 1, 0)); } else if (e.key === 'Enter') { e.preventDefault(); const selected = searchResults[selectedIndex]; if (selected) { handleSelectNode(selected); } } }; const handleSelectNode = (node: GraphNode) => { // onFocusNode handles both camera focus AND selection in useSigma onFocusNode?.(node.id); setSearchQuery(''); setIsSearchOpen(false); setSelectedIndex(0); }; return (
{/* Left section */}
{/* Logo */}
GitNexus
{/* Project badge / Repo selector dropdown */} {projectName && (
{/* Repo dropdown */} {isRepoDropdownOpen && availableRepos.length >= 2 && (
{availableRepos.map((repo) => { const isCurrent = repo.name === projectName; return ( ); })}
)}
)}
{/* Center - Search */}
{ setSearchQuery(e.target.value); setIsSearchOpen(true); setSelectedIndex(0); }} onFocus={() => setIsSearchOpen(true)} onKeyDown={handleKeyDown} className="flex-1 bg-transparent border-none outline-none text-sm text-text-primary placeholder:text-text-muted" /> ⌘K
{/* Search Results Dropdown */} {isSearchOpen && searchQuery.trim() && (
{searchResults.length === 0 ? (
No nodes found for "{searchQuery}"
) : (
{searchResults.map((node, index) => ( ))}
)}
)}
{/* Right section */}
{/* GitHub Star Button */} Star if cool {/* Stats */} {graph && (
{nodeCount} nodes {edgeCount} edges
)} {/* Embedding Status */} {/* Icon buttons */} {/* AI Button */}
); }; ================================================ FILE: gitnexus-web/src/components/LoadingOverlay.tsx ================================================ import { PipelineProgress } from '../types/pipeline'; interface LoadingOverlayProps { progress: PipelineProgress; } export const LoadingOverlay = ({ progress }: LoadingOverlayProps) => { return (
{/* Background gradient effects */}
{/* Pulsing orb */}
{/* Progress bar */}
{/* Status text */}

{progress.message} |

{progress.detail && (

{progress.detail}

)}
{/* Stats */} {progress.stats && (
{progress.stats.filesProcessed} / {progress.stats.totalFiles} files
{progress.stats.nodesCreated} nodes
)} {/* Percent */}

{progress.percent}%

); }; ================================================ FILE: gitnexus-web/src/components/MarkdownRenderer.tsx ================================================ import React, { useState } from 'react'; import ReactMarkdown from 'react-markdown'; import remarkGfm from 'remark-gfm'; import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter'; import { vscDarkPlus } from 'react-syntax-highlighter/dist/esm/styles/prism'; import { MermaidDiagram } from './MermaidDiagram'; import { ToolCallCard } from './ToolCallCard'; import { Copy, Check } from 'lucide-react'; // Custom syntax theme const customTheme = { ...vscDarkPlus, 'pre[class*="language-"]': { ...vscDarkPlus['pre[class*="language-"]'], background: '#0a0a10', margin: 0, padding: '16px 0', fontSize: '13px', lineHeight: '1.6', }, 'code[class*="language-"]': { ...vscDarkPlus['code[class*="language-"]'], background: 'transparent', fontFamily: '"JetBrains Mono", "Fira Code", monospace', }, }; interface MarkdownRendererProps { content: string; onLinkClick?: (href: string) => void; toolCalls?: any[]; // Keep flexible for now showCopyButton?: boolean; } export const MarkdownRenderer: React.FC = ({ content, onLinkClick, toolCalls, showCopyButton = false }) => { const [copied, setCopied] = useState(false); const handleCopy = async () => { try { await navigator.clipboard.writeText(content); setCopied(true); setTimeout(() => setCopied(false), 2000); } catch (err) { console.error('Failed to copy:', err); } }; // Helper to format text for display (convert [[links]] to markdown links) const formatMarkdownForDisplay = (md: string) => { // Avoid rewriting inside fenced code blocks. const parts = md.split('```'); for (let i = 0; i < parts.length; i += 2) { // Pattern 1: File grounding - [[file.ext]] parts[i] = parts[i].replace( /\[\[([a-zA-Z0-9_\-./\\]+\.[a-zA-Z0-9]+(?::\d+(?:[-–]\d+)?)?)\]\]/g, (_m, inner: string) => { const trimmed = inner.trim(); const href = `code-ref:${encodeURIComponent(trimmed)}`; return `[${trimmed}](${href})`; } ); // Pattern 2: Node grounding - [[Type:Name]] parts[i] = parts[i].replace( /\[\[(?:graph:)?(Class|Function|Method|Interface|File|Folder|Variable|Enum|Type|CodeElement):([^\]]+)\]\]/g, (_m, nodeType: string, nodeName: string) => { const trimmed = `${nodeType}:${nodeName.trim()}`; const href = `node-ref:${encodeURIComponent(trimmed)}`; return `[${trimmed}](${href})`; } ); } return parts.join('```'); }; const handleLinkClick = (e: React.MouseEvent, href: string) => { if (href.startsWith('code-ref:') || href.startsWith('node-ref:')) { e.preventDefault(); onLinkClick?.(href); } // External links open in new tab (default behavior) }; const formattedContent = React.useMemo(() => formatMarkdownForDisplay(content), [content]); const markdownComponents = React.useMemo(() => ({ a: ({ href, children, ...props }: any) => { const hrefStr = href || ''; // Grounding links (Code refs & Node refs) if (hrefStr.startsWith('code-ref:') || hrefStr.startsWith('node-ref:')) { const isNodeRef = hrefStr.startsWith('node-ref:'); const inner = decodeURIComponent(hrefStr.slice(isNodeRef ? 9 : 9)); // length is same? wait.. code-ref: (9), node-ref: (9). Yes. // Styles const baseParams = "code-ref-btn inline-flex items-center px-2 py-0.5 rounded-md font-mono text-[12px] !no-underline hover:!no-underline transition-colors"; const colorParams = isNodeRef ? "border border-amber-300/55 bg-amber-400/10 !text-amber-200 visited:!text-amber-200 hover:bg-amber-400/15 hover:border-amber-200/70" : "border border-cyan-300/55 bg-cyan-400/10 !text-cyan-200 visited:!text-cyan-200 hover:bg-cyan-400/15 hover:border-cyan-200/70"; return ( handleLinkClick(e, hrefStr)} className={`${baseParams} ${colorParams}`} title={isNodeRef ? `View ${inner} in Code panel` : `Open in Code panel • ${inner}`} {...props} > {children} ); } // External links return ( {children} ); }, code: ({ className, children, ...props }: any) => { const match = /language-(\w+)/.exec(className || ''); const isInline = !className && !match; const codeContent = String(children).replace(/\n$/, ''); if (isInline) { return {children}; } const language = match ? match[1] : 'text'; // Render Mermaid diagrams if (language === 'mermaid') { return ; } return ( {codeContent} ); }, pre: ({ children }: any) => <>{children}, }), [onLinkClick]); // Removed handleLinkClick dependency as it is defined inside component but depends on onLinkClick return (
{ if (url.startsWith('code-ref:') || url.startsWith('node-ref:')) return url; // Default behavior for http/https/etc return url; }} components={markdownComponents} > {formattedContent} {/* Copy Button */} {showCopyButton && (
)} {/* Tool Call Cards appended at the bottom if provided */} {toolCalls && toolCalls.length > 0 && (
{toolCalls.map(tc => ( ))}
)}
); }; ================================================ FILE: gitnexus-web/src/components/MermaidDiagram.tsx ================================================ import { useEffect, useRef, useState } from 'react'; import mermaid from 'mermaid'; import { AlertTriangle, Maximize2 } from 'lucide-react'; import { ProcessFlowModal } from './ProcessFlowModal'; import type { ProcessData } from '../lib/mermaid-generator'; // Initialize mermaid with cyan theme matching ProcessFlowModal mermaid.initialize({ startOnLoad: false, maxTextSize: 900000, theme: 'base', themeVariables: { primaryColor: '#1e293b', // node bg - slate primaryTextColor: '#f1f5f9', primaryBorderColor: '#22d3ee', // cyan lineColor: '#94a3b8', secondaryColor: '#1e293b', tertiaryColor: '#0f172a', mainBkg: '#1e293b', nodeBorder: '#22d3ee', // cyan clusterBkg: '#1e293b', clusterBorder: '#475569', titleColor: '#f1f5f9', edgeLabelBackground: '#0f172a', }, flowchart: { curve: 'basis', padding: 15, nodeSpacing: 50, rankSpacing: 50, htmlLabels: true, }, sequence: { actorMargin: 50, boxMargin: 10, boxTextMargin: 5, noteMargin: 10, messageMargin: 35, }, fontFamily: '"JetBrains Mono", "Fira Code", monospace', fontSize: 13, suppressErrorRendering: true, }); // Override the default error handler to prevent it from logging to UI mermaid.parseError = (_err) => { // Silent catch }; interface MermaidDiagramProps { code: string; } export const MermaidDiagram = ({ code }: MermaidDiagramProps) => { const containerRef = useRef(null); const [error, setError] = useState(null); const [showModal, setShowModal] = useState(false); const [svg, setSvg] = useState(''); useEffect(() => { const renderDiagram = async () => { if (!containerRef.current) return; try { // Generate unique ID for this diagram const id = `mermaid-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`; // Render the diagram const { svg: renderedSvg } = await mermaid.render(id, code.trim()); setSvg(renderedSvg); setError(null); } catch (err) { // Silent catch for streaming: // If render fails (common during partial streaming), we: // 1. Log to console for debugging // 2. Do NOT set error state (avoids flashing red box) // 3. Do NOT clear existing SVG (keeps last valid state visible) console.debug('Mermaid render skipped (incomplete):', err); } }; // Debounce rendering to prevent "jerking" during high-speed streaming const timeoutId = setTimeout(() => { renderDiagram(); }, 300); return () => clearTimeout(timeoutId); }, [code]); // Create a pseudo ProcessData for the modal (with custom rawMermaid property) const processData: any = showModal ? { id: 'ai-generated', label: 'AI Generated Diagram', processType: 'intra_community', steps: [], // Empty - we'll render raw mermaid edges: [], clusters: [], rawMermaid: code, // Pass raw mermaid code } : null; if (error) { return (
Diagram Error
{error}
Show source
            {code}
          
); } return ( <>
{/* Header */}
Diagram
{/* Diagram container */}
{/* Use ProcessFlowModal for expansion */} {showModal && processData && ( setShowModal(false)} /> )} ); }; ================================================ FILE: gitnexus-web/src/components/ProcessFlowModal.tsx ================================================ /** * Process Flow Modal * * Displays a Mermaid flowchart for a process in a centered modal popup. */ import { useEffect, useRef, useCallback, useState } from 'react'; import { X, GitBranch, Copy, Focus, Layers, ZoomIn, ZoomOut } from 'lucide-react'; import mermaid from 'mermaid'; import { ProcessData, generateProcessMermaid } from '../lib/mermaid-generator'; interface ProcessFlowModalProps { process: ProcessData | null; onClose: () => void; onFocusInGraph?: (nodeIds: string[], processId: string) => void; isFullScreen?: boolean; } // Initialize mermaid with cyan/purple theme matching GitNexus // Initialize mermaid with cyan/purple theme matching GitNexus mermaid.initialize({ startOnLoad: false, suppressErrorRendering: true, // Try to suppress if supported maxTextSize: 900000, // Increase from default 50000 to handle large combined diagrams theme: 'base', themeVariables: { primaryColor: '#1e293b', // node bg primaryTextColor: '#f1f5f9', primaryBorderColor: '#22d3ee', lineColor: '#94a3b8', secondaryColor: '#1e293b', tertiaryColor: '#0f172a', mainBkg: '#1e293b', // background nodeBorder: '#22d3ee', clusterBkg: '#1e293b', clusterBorder: '#475569', titleColor: '#f1f5f9', edgeLabelBackground: '#0f172a', }, flowchart: { curve: 'basis', padding: 50, nodeSpacing: 120, rankSpacing: 140, htmlLabels: true, }, }); // Suppress distinct syntax error overlay mermaid.parseError = (err) => { // Suppress visual error - we handle errors in the render try/catch console.debug('Mermaid parse error (suppressed):', err); }; export const ProcessFlowModal = ({ process, onClose, onFocusInGraph, isFullScreen = false }: ProcessFlowModalProps) => { const containerRef = useRef(null); const diagramRef = useRef(null); const scrollContainerRef = useRef(null); // Full process map gets higher default zoom (667%) and max zoom (3000%) const defaultZoom = isFullScreen ? 6.67 : 1; const maxZoom = isFullScreen ? 30 : 10; const [zoom, setZoom] = useState(defaultZoom); const [pan, setPan] = useState({ x: 0, y: 0 }); const [isPanning, setIsPanning] = useState(false); const [panStart, setPanStart] = useState({ x: 0, y: 0 }); // Reset zoom when switching between full screen and regular mode useEffect(() => { setZoom(defaultZoom); setPan({ x: 0, y: 0 }); }, [isFullScreen, defaultZoom]); // Handle zoom with scroll wheel useEffect(() => { const handleWheel = (e: WheelEvent) => { e.preventDefault(); const delta = e.deltaY * -0.001; setZoom(prev => Math.min(Math.max(0.1, prev + delta), maxZoom)); }; const container = scrollContainerRef.current; if (container) { container.addEventListener('wheel', handleWheel, { passive: false }); return () => container.removeEventListener('wheel', handleWheel); } }, [process, maxZoom]); // Re-attach when process or maxZoom changes // Handle keyboard zoom useEffect(() => { const handleKeyDown = (e: KeyboardEvent) => { if (e.key === '+' || e.key === '=') { setZoom(prev => Math.min(prev + 0.2, maxZoom)); } else if (e.key === '-' || e.key === '_') { setZoom(prev => Math.max(prev - 0.2, 0.1)); } }; window.addEventListener('keydown', handleKeyDown); return () => window.removeEventListener('keydown', handleKeyDown); }, [maxZoom]); // Zoom in/out handlers const handleZoomIn = useCallback(() => { setZoom(prev => Math.min(prev + 0.25, maxZoom)); }, [maxZoom]); const handleZoomOut = useCallback(() => { setZoom(prev => Math.max(prev - 0.25, 0.1)); }, []); // Handle pan with mouse drag const handleMouseDown = useCallback((e: React.MouseEvent) => { setIsPanning(true); setPanStart({ x: e.clientX - pan.x, y: e.clientY - pan.y }); }, [pan]); const handleMouseMove = useCallback((e: React.MouseEvent) => { if (!isPanning) return; setPan({ x: e.clientX - panStart.x, y: e.clientY - panStart.y }); }, [isPanning, panStart]); const handleMouseUp = useCallback(() => { setIsPanning(false); }, []); const resetView = useCallback(() => { setZoom(defaultZoom); setPan({ x: 0, y: 0 }); }, [defaultZoom]); // Render mermaid diagram useEffect(() => { if (!process || !diagramRef.current) return; const renderDiagram = async () => { try { // Check if we have raw mermaid code (from AI chat) or need to generate it const mermaidCode = (process as any).rawMermaid ? (process as any).rawMermaid : generateProcessMermaid(process); const id = `mermaid-${Date.now()}`; // Clear previous content diagramRef.current!.innerHTML = ''; const { svg } = await mermaid.render(id, mermaidCode); diagramRef.current!.innerHTML = svg; } catch (error) { console.error('Mermaid render error:', error); const errorMessage = error instanceof Error ? error.message : String(error); const isSizeError = errorMessage.includes('Maximum') || errorMessage.includes('exceeded'); diagramRef.current!.innerHTML = `
${isSizeError ? '📊 Diagram Too Large' : '⚠️ Render Error'}
${isSizeError ? `This diagram has ${process.steps?.length || 0} steps and is too complex to render. Try viewing individual processes instead of "All Processes".` : `Unable to render diagram. Steps: ${process.steps?.length || 0}` }
`; } }; renderDiagram(); }, [process]); // Close on escape useEffect(() => { const handleEscape = (e: KeyboardEvent) => { if (e.key === 'Escape') onClose(); }; window.addEventListener('keydown', handleEscape); return () => window.removeEventListener('keydown', handleEscape); }, [onClose]); // Close on backdrop click const handleBackdropClick = useCallback((e: React.MouseEvent) => { if (e.target === containerRef.current) { onClose(); } }, [onClose]); // Copy mermaid code to clipboard const handleCopyMermaid = useCallback(async () => { if (!process) return; const mermaidCode = generateProcessMermaid(process); await navigator.clipboard.writeText(mermaidCode); }, [process]); // Focus in graph const handleFocusInGraph = useCallback(() => { if (!process || !onFocusInGraph) return; const nodeIds = process.steps.map(s => s.id); onFocusInGraph(nodeIds, process.id); onClose(); }, [process, onFocusInGraph, onClose]); if (!process) return null; return (
{/* Glassmorphism Modal */}
{/* Subtle gradient overlay for extra glass feel */}
{/* Header */}

Process: {process.label}

{/* Diagram */}
{/* Footer Actions */}
{/* Zoom controls */}
{Math.round(zoom * 100)}%
{onFocusInGraph && ( )}
); }; ================================================ FILE: gitnexus-web/src/components/ProcessesPanel.tsx ================================================ /** * Processes Panel * * Lists all detected processes grouped by type (cross-community / intra-community). * Clicking a process opens the ProcessFlowModal with a flowchart. */ import { useState, useMemo, useCallback, useEffect } from 'react'; import { GitBranch, Search, Eye, Zap, Home, ChevronDown, ChevronRight, Sparkles, Lightbulb, Layers } from 'lucide-react'; import { useAppState } from '../hooks/useAppState'; import { ProcessFlowModal } from './ProcessFlowModal'; import type { ProcessData, ProcessStep } from '../lib/mermaid-generator'; export const ProcessesPanel = () => { const { graph, runQuery, setHighlightedNodeIds, highlightedNodeIds } = useAppState(); const [searchQuery, setSearchQuery] = useState(''); const [selectedProcess, setSelectedProcess] = useState(null); const [expandedSections, setExpandedSections] = useState>(new Set(['cross', 'intra'])); const [loadingProcess, setLoadingProcess] = useState(null); const [focusedProcessId, setFocusedProcessId] = useState(null); // Extract processes from graph const processes = useMemo(() => { if (!graph) return { cross: [], intra: [] }; const processNodes = graph.nodes.filter(n => n.label === 'Process'); const cross: Array<{ id: string; label: string; stepCount: number; clusters: string[] }> = []; const intra: Array<{ id: string; label: string; stepCount: number; clusters: string[] }> = []; for (const node of processNodes) { const item = { id: node.id, label: node.properties.heuristicLabel || node.properties.name || node.id, stepCount: node.properties.stepCount || 0, clusters: node.properties.communities || [], }; if (node.properties.processType === 'cross_community') { cross.push(item); } else { intra.push(item); } } // Sort by step count (most complex first) cross.sort((a, b) => b.stepCount - a.stepCount); intra.sort((a, b) => b.stepCount - a.stepCount); return { cross, intra }; }, [graph]); // Filter by search const filteredProcesses = useMemo(() => { if (!searchQuery.trim()) return processes; const query = searchQuery.toLowerCase(); return { cross: processes.cross.filter(p => p.label.toLowerCase().includes(query)), intra: processes.intra.filter(p => p.label.toLowerCase().includes(query)), }; }, [processes, searchQuery]); // Toggle section expansion const toggleSection = useCallback((section: string) => { setExpandedSections(prev => { const next = new Set(prev); if (next.has(section)) { next.delete(section); } else { next.add(section); } return next; }); }, []); // Load ALL processes and combine into one mega-diagram const handleViewAllProcesses = useCallback(async () => { setLoadingProcess('all'); try { const allProcessIds = [...processes.cross, ...processes.intra].map(p => p.id); if (allProcessIds.length === 0) return; // Collect all steps from all processes const allStepsMap = new Map(); const allEdges: Array<{ from: string; to: string; type: string }> = []; // Fetch steps for all processes concurrently in batches if needed, but for now sequentially to be safe // Optimization: Fetch all steps in one query if possible const allStepsQuery = ` MATCH (s)-[r:CodeRelation {type: 'STEP_IN_PROCESS'}]->(p:Process) WHERE p.id IN [${allProcessIds.map(id => `'${id.replace(/'/g, "''")}'`).join(',')}] RETURN s.id AS id, s.name AS name, s.filePath AS filePath, r.step AS stepNumber `; const stepsResult = await runQuery(allStepsQuery); for (const row of stepsResult) { const stepId = row.id || row[0]; if (!allStepsMap.has(stepId)) { allStepsMap.set(stepId, { id: stepId, name: row.name || row[1] || 'Unknown', filePath: row.filePath || row[2], stepNumber: row.stepNumber || row.step || row[3] || 0, }); } } const allSteps = Array.from(allStepsMap.values()); const stepIds = allSteps.map(s => s.id); // Query for all CALLS edges between the combined steps if (stepIds.length > 0) { // Batch query if too many steps const edgesQuery = ` MATCH (from)-[r:CodeRelation {type: 'CALLS'}]->(to) WHERE from.id IN [${stepIds.map(id => `'${id.replace(/'/g, "''")}'`).join(',')}] AND to.id IN [${stepIds.map(id => `'${id.replace(/'/g, "''")}'`).join(',')}] RETURN from.id AS fromId, to.id AS toId, r.type AS type `; try { const edgesResult = await runQuery(edgesQuery); allEdges.push(...edgesResult .map((row: any) => ({ from: row.fromId || row[0], to: row.toId || row[1], type: row.type || row[2] || 'CALLS', })) .filter(edge => edge.from !== edge.to)); } catch (err) { console.warn('Could not fetch combined edges:', err); } } const combinedProcessData: ProcessData = { id: 'combined-all', label: `All Processes (${allProcessIds.length} combined)`, processType: 'cross_community', // Treat as cross-community for styling steps: allSteps, edges: allEdges, clusters: [], }; setSelectedProcess(combinedProcessData); } catch (error) { console.error('Failed to load combined processes:', error); } finally { setLoadingProcess(null); } }, [processes, runQuery]); // Load process steps and open modal const handleViewProcess = useCallback(async (processId: string, label: string, processType: string) => { setLoadingProcess(processId); try { // Query for process steps const stepsQuery = ` MATCH (s)-[r:CodeRelation {type: 'STEP_IN_PROCESS'}]->(p:Process {id: '${processId.replace(/'/g, "''")}'}) RETURN s.id AS id, s.name AS name, s.filePath AS filePath, r.step AS stepNumber ORDER BY r.step `; const stepsResult = await runQuery(stepsQuery); const steps: ProcessStep[] = stepsResult.map((row: any) => ({ id: row.id || row[0], name: row.name || row[1] || 'Unknown', filePath: row.filePath || row[2], stepNumber: row.stepNumber || row.step || row[3] || 0, })); // Get step IDs for edge query const stepIds = steps.map(s => s.id); // Query for CALLS edges between the steps in this process let edges: Array<{ from: string; to: string; type: string }> = []; if (stepIds.length > 0) { const edgesQuery = ` MATCH (from)-[r:CodeRelation {type: 'CALLS'}]->(to) WHERE from.id IN [${stepIds.map(id => `'${id.replace(/'/g, "''")}'`).join(',')}] AND to.id IN [${stepIds.map(id => `'${id.replace(/'/g, "''")}'`).join(',')}] RETURN from.id AS fromId, to.id AS toId, r.type AS type `; try { const edgesResult = await runQuery(edgesQuery); edges = edgesResult .map((row: any) => ({ from: row.fromId || row[0], to: row.toId || row[1], type: row.type || row[2] || 'CALLS', })) .filter(edge => edge.from !== edge.to); // Remove self-loops } catch (err) { console.warn('Could not fetch edges:', err); // Continue with empty edges - will fallback to linear } } // Get clusters for this process const processNode = graph?.nodes.find(n => n.id === processId); const clusters = processNode?.properties.communities || []; const processData: ProcessData = { id: processId, label, processType: processType as 'cross_community' | 'intra_community', steps, edges, clusters, }; setSelectedProcess(processData); } catch (error) { console.error('Failed to load process steps:', error); } finally { setLoadingProcess(null); } }, [runQuery, graph]); // Cache for process steps (so we don't re-query when toggling focus) const [processStepsCache, setProcessStepsCache] = useState>(new Map()); // Toggle focus for any process - loads steps on demand const handleToggleFocusForProcess = useCallback(async (processId: string) => { // If already focused on this process, turn off if (focusedProcessId === processId) { setHighlightedNodeIds(new Set()); setFocusedProcessId(null); return; } // Check if we have cached steps if (processStepsCache.has(processId)) { const stepIds = processStepsCache.get(processId)!; setHighlightedNodeIds(new Set(stepIds)); setFocusedProcessId(processId); return; } // Load steps for this process setLoadingProcess(processId); try { const stepsQuery = ` MATCH (s)-[r:CodeRelation {type: 'STEP_IN_PROCESS'}]->(p:Process {id: '${processId.replace(/'/g, "''")}'}) RETURN s.id AS id `; const stepsResult = await runQuery(stepsQuery); const stepIds = stepsResult.map((row: any) => row.id || row[0]); // Cache the result setProcessStepsCache(prev => new Map(prev).set(processId, stepIds)); // Set focus setHighlightedNodeIds(new Set(stepIds)); setFocusedProcessId(processId); } catch (error) { console.error('Failed to load process steps for focus:', error); } finally { setLoadingProcess(null); } }, [focusedProcessId, processStepsCache, runQuery, setHighlightedNodeIds]); // Focus in graph callback - toggles highlight (used by modal) const handleFocusInGraph = useCallback((nodeIds: string[], processId: string) => { // Check if this process is already focused if (focusedProcessId === processId) { // Clear focus setHighlightedNodeIds(new Set()); setFocusedProcessId(null); } else { // Set focus and cache setHighlightedNodeIds(new Set(nodeIds)); setFocusedProcessId(processId); setProcessStepsCache(prev => new Map(prev).set(processId, nodeIds)); } }, [focusedProcessId, setHighlightedNodeIds]); // Clear focused process when highlights are cleared externally useEffect(() => { if (highlightedNodeIds.size === 0 && focusedProcessId !== null) { setFocusedProcessId(null); } }, [highlightedNodeIds, focusedProcessId]); const totalCount = processes.cross.length + processes.intra.length; if (totalCount === 0) { return (

No Processes Detected

Processes are execution flows traced from entry points. Load a codebase to see detected processes.

); } return (
{/* Header with search */}
setSearchQuery(e.target.value)} placeholder="Filter processes..." className="flex-1 bg-transparent border-none outline-none text-sm text-text-primary placeholder:text-text-muted" />
{totalCount} processes detected
{/* Process list */}
{/* View All Processes Card */}
{/* Cross-Community Section */} {filteredProcesses.cross.length > 0 && (
{expandedSections.has('cross') && (
{filteredProcesses.cross.map((process) => ( handleViewProcess(process.id, process.label, 'cross_community')} onToggleFocus={() => handleToggleFocusForProcess(process.id)} /> ))}
)}
)} {/* Intra-Community Section */} {filteredProcesses.intra.length > 0 && (
{expandedSections.has('intra') && (
{filteredProcesses.intra.map((process) => ( handleViewProcess(process.id, process.label, 'intra_community')} onToggleFocus={() => handleToggleFocusForProcess(process.id)} /> ))}
)}
)}
{/* Modal */} setSelectedProcess(null)} onFocusInGraph={handleFocusInGraph} isFullScreen={selectedProcess?.id === 'combined-all'} />
); }; // Individual process item interface ProcessItemProps { process: { id: string; label: string; stepCount: number; clusters: string[] }; isLoading: boolean; isSelected: boolean; isFocused: boolean; onView: () => void; onToggleFocus: () => void; } const ProcessItem = ({ process, isLoading, isSelected, isFocused, onView, onToggleFocus }: ProcessItemProps) => { // Determine row styling - focused gets special highlight const rowClass = isFocused ? 'bg-amber-950/40 border border-amber-500/50 ring-1 ring-amber-400/30' : isSelected ? 'bg-cyan-950/40 border border-cyan-500/50 ring-1 ring-cyan-400/30' : ''; return (
{process.label}
{process.stepCount} steps {process.clusters.length > 0 && ( <> {process.clusters.length} clusters )}
{/* Lightbulb icon - appears on hover, always visible when focused */}
); }; ================================================ FILE: gitnexus-web/src/components/QueryFAB.tsx ================================================ import { useState, useRef, useEffect, useCallback } from 'react'; import { Terminal, Play, X, ChevronDown, ChevronUp, Loader2, Sparkles, Table } from 'lucide-react'; import { useAppState } from '../hooks/useAppState'; const EXAMPLE_QUERIES = [ { label: 'All Functions', query: `MATCH (n:Function) RETURN n.id AS id, n.name AS name, n.filePath AS path LIMIT 50`, }, { label: 'All Classes', query: `MATCH (n:Class) RETURN n.id AS id, n.name AS name, n.filePath AS path LIMIT 50`, }, { label: 'All Interfaces', query: `MATCH (n:Interface) RETURN n.id AS id, n.name AS name, n.filePath AS path LIMIT 50`, }, { label: 'Function Calls', query: `MATCH (a:File)-[r:CodeRelation {type: 'CALLS'}]->(b:Function) RETURN a.id AS id, a.name AS caller, b.name AS callee LIMIT 50`, }, { label: 'Import Dependencies', query: `MATCH (a:File)-[r:CodeRelation {type: 'IMPORTS'}]->(b:File) RETURN a.id AS id, a.name AS from, b.name AS imports LIMIT 50`, }, ]; export const QueryFAB = () => { const { setHighlightedNodeIds, setQueryResult, queryResult, clearQueryHighlights, graph, runQuery, isDatabaseReady } = useAppState(); const [isExpanded, setIsExpanded] = useState(false); const [query, setQuery] = useState(''); const [isRunning, setIsRunning] = useState(false); const [error, setError] = useState(null); const [showExamples, setShowExamples] = useState(false); const [showResults, setShowResults] = useState(true); const textareaRef = useRef(null); const panelRef = useRef(null); useEffect(() => { if (isExpanded && textareaRef.current) { textareaRef.current.focus(); } }, [isExpanded]); useEffect(() => { const handleClickOutside = (e: MouseEvent) => { if (panelRef.current && !panelRef.current.contains(e.target as Node)) { setShowExamples(false); } }; document.addEventListener('mousedown', handleClickOutside); return () => document.removeEventListener('mousedown', handleClickOutside); }, []); useEffect(() => { const handleKeyDown = (e: KeyboardEvent) => { if (e.key === 'Escape' && isExpanded) { setIsExpanded(false); setShowExamples(false); } }; document.addEventListener('keydown', handleKeyDown); return () => document.removeEventListener('keydown', handleKeyDown); }, [isExpanded]); const handleRunQuery = useCallback(async () => { if (!query.trim() || isRunning) return; if (!graph) { setError('No project loaded. Load a project first.'); return; } const ready = await isDatabaseReady(); if (!ready) { setError('Database not ready. Please wait for loading to complete.'); return; } setIsRunning(true); setError(null); const startTime = performance.now(); try { const rows = await runQuery(query); const executionTime = performance.now() - startTime; // Extract node IDs from results - handles various formats // 1. Array format: first element if it looks like a node ID // 2. Object format: any field ending with 'id' (case-insensitive) // 3. Values matching node ID pattern: Label:path:name const nodeIdPattern = /^(File|Function|Class|Method|Interface|Folder|CodeElement):/; const nodeIds = rows .flatMap(row => { const ids: string[] = []; if (Array.isArray(row)) { // Array format - check all elements for node ID patterns row.forEach(val => { if (typeof val === 'string' && (nodeIdPattern.test(val) || val.includes(':'))) { ids.push(val); } }); } else if (typeof row === 'object' && row !== null) { // Object format - check fields ending with 'id' and values matching patterns Object.entries(row).forEach(([key, val]) => { const keyLower = key.toLowerCase(); if (typeof val === 'string') { // Field name contains 'id' if (keyLower.includes('id') || keyLower === 'id') { ids.push(val); } // Value matches node ID pattern else if (nodeIdPattern.test(val)) { ids.push(val); } } }); } return ids; }) .filter(Boolean) .filter((id, index, arr) => arr.indexOf(id) === index); setQueryResult({ rows, nodeIds, executionTime }); setHighlightedNodeIds(new Set(nodeIds)); } catch (err) { setError(err instanceof Error ? err.message : 'Query execution failed'); setQueryResult(null); setHighlightedNodeIds(new Set()); } finally { setIsRunning(false); } }, [query, isRunning, graph, isDatabaseReady, runQuery, setHighlightedNodeIds, setQueryResult]); const handleKeyDown = (e: React.KeyboardEvent) => { if (e.key === 'Enter' && (e.ctrlKey || e.metaKey)) { e.preventDefault(); handleRunQuery(); } }; const handleSelectExample = (exampleQuery: string) => { setQuery(exampleQuery); setShowExamples(false); textareaRef.current?.focus(); }; const handleClose = () => { setIsExpanded(false); setShowExamples(false); clearQueryHighlights(); setError(null); }; const handleClear = () => { setQuery(''); clearQueryHighlights(); setError(null); textareaRef.current?.focus(); }; if (!isExpanded) { return ( ); } return (
Cypher Query