Showing preview only (2,080K chars total). Download the full file or copy to clipboard to get everything.
Repository: stephenslab/susieR
Branch: master
Commit: 00f8ea82d576
Files: 222
Total size: 1.9 MB
Directory structure:
gitextract_f5azyrwd/
├── .Rbuildignore
├── .github/
│ ├── dependabot.yml
│ ├── rattler-build_container.df
│ ├── recipe/
│ │ ├── recipe.yaml
│ │ ├── variant_r44.yaml
│ │ └── variant_r45.yaml
│ └── workflows/
│ ├── ci.yml
│ ├── conda_build.yml
│ ├── dispatch_pkgdown_build.yml
│ └── release.yml
├── .gitignore
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── Makefile
├── NAMESPACE
├── R/
│ ├── cpp11.R
│ ├── diagnosis_reports.R
│ ├── example_dataset.R
│ ├── generic_methods.R
│ ├── individual_data_methods.R
│ ├── iterative_bayesian_stepwise_selection.R
│ ├── mixture_prior.R
│ ├── model_methods.R
│ ├── mr.ash.R
│ ├── mr.ash.rss.R
│ ├── predict.susie.R
│ ├── refinement.R
│ ├── rss_lambda_methods.R
│ ├── rss_mismatch.R
│ ├── single_effect_regression.R
│ ├── slot_prior.R
│ ├── sparse_multiplication.R
│ ├── ss_mixture_methods.R
│ ├── sufficient_stats_methods.R
│ ├── summary.susie.R
│ ├── susie.R
│ ├── susieR-package.R
│ ├── susie_auto.R
│ ├── susie_constructors.R
│ ├── susie_get_functions.R
│ ├── susie_plot.R
│ ├── susie_post_outcome_configuration.R
│ ├── susie_rss_utils.R
│ ├── susie_trendfilter.R
│ ├── susie_trendfilter_utils.R
│ ├── susie_utils.R
│ ├── susie_workhorse.R
│ └── univariate_regression.R
├── README.md
├── _pkgdown.yml
├── data/
│ ├── FinemappingConvergence.RData
│ ├── N2finemapping.RData
│ ├── N3finemapping.RData
│ ├── SummaryConsistency.RData
│ ├── data_small.RData
│ └── unmappable_data.RData
├── inst/
│ ├── CITATION
│ ├── analysis/
│ │ ├── optimize.Rmd
│ │ ├── test_susie_auto.Rmd
│ │ └── testing.Rmd
│ ├── code/
│ │ ├── caviar.R
│ │ ├── compute_ss_memory.R
│ │ ├── dap-g.py
│ │ ├── finemap.R
│ │ ├── finemap_1p4.R
│ │ ├── gen_original_results.R
│ │ ├── monitor_memory.py
│ │ ├── python_example/
│ │ │ ├── N3finemapping_python.ipynb
│ │ │ └── environment.yml
│ │ ├── simulate_lambda_pop_ld_bias.R
│ │ ├── small_sim.R
│ │ ├── sparse_matrix_strategy.Rmd
│ │ ├── summarize_small_sim.R
│ │ ├── susie_memory.R
│ │ └── susie_rss_memory.R
│ ├── datafiles/
│ │ ├── FinemappingConvergence1k.RData
│ │ ├── N3finemapping.CAVIAR.RData
│ │ ├── N3finemapping.DAP.RData
│ │ ├── N3finemapping.FINEMAP.RData
│ │ ├── SummaryConsistency1k.RData
│ │ └── small_sim_out_v0.14.48.RData
│ ├── misc/
│ │ ├── README_susie_v2.md
│ │ ├── format_r_code.sh
│ │ ├── post-commit.sh
│ │ ├── pre-commit.sh
│ │ └── uncrustify_default.cfg
│ └── notebooks/
│ ├── benchmark_mix_vs_sp.R
│ ├── small_sample_benchmark.ipynb
│ └── stochastic_ld_benchmark.ipynb
├── man/
│ ├── FinemappingConvergence.Rd
│ ├── N2finemapping.Rd
│ ├── N3finemapping.Rd
│ ├── SummaryConsistency.Rd
│ ├── absolute.order.Rd
│ ├── add_delta_features.Rd
│ ├── block_coordinate_ascent.Rd
│ ├── calculate_posterior_moments_mixture_common.Rd
│ ├── check_alpha_pip_cycle_convergence.Rd
│ ├── cleanup_extra_fields.Rd
│ ├── coef.mr.ash.Rd
│ ├── coef.susie.Rd
│ ├── collect_ash_diag.Rd
│ ├── compare_ash_methods.Rd
│ ├── compute_marginal_bhat_shat.Rd
│ ├── compute_suff_stat.Rd
│ ├── data_small.Rd
│ ├── diagnose_ash_filter_archived_iter.Rd
│ ├── diagnose_bb_ash_iter.Rd
│ ├── estimate_s_rss.Rd
│ ├── extract_bb_ash_features.Rd
│ ├── format_extra_diag.Rd
│ ├── format_sigma2_summary.Rd
│ ├── get.full.posterior.Rd
│ ├── get_alpha_l.Rd
│ ├── get_cs_correlation.Rd
│ ├── get_objective.Rd
│ ├── get_posterior_mean_l.Rd
│ ├── get_posterior_mean_sum.Rd
│ ├── get_posterior_moments_l.Rd
│ ├── get_prior_variance_l.Rd
│ ├── get_slot_weight.Rd
│ ├── ibss_finalize.Rd
│ ├── ibss_initialize.Rd
│ ├── is_symmetric_matrix.Rd
│ ├── kriging_rss.Rd
│ ├── label_diag_truth.Rd
│ ├── loglik_mixture_common.Rd
│ ├── mr.ash.Rd
│ ├── mr.ash.rss.Rd
│ ├── path.order.Rd
│ ├── post_loglik_prior_hook.Rd
│ ├── pre_loglik_prior_hook.Rd
│ ├── predict.mr.ash.Rd
│ ├── predict.susie.Rd
│ ├── print.summary.susie_post_outcome_configuration.Rd
│ ├── resolve_mixture_prior.Rd
│ ├── safe_cor.Rd
│ ├── safe_cov2cor.Rd
│ ├── scale_design_matrix.Rd
│ ├── set_prior_variance_l.Rd
│ ├── slot_prior_betabinom.Rd
│ ├── summary.susie.Rd
│ ├── summary.susie_post_outcome_configuration.Rd
│ ├── susie.Rd
│ ├── susieR-package.Rd
│ ├── susie_auto.Rd
│ ├── susie_get_methods.Rd
│ ├── susie_init_coef.Rd
│ ├── susie_plot_changepoint.Rd
│ ├── susie_plots.Rd
│ ├── susie_post_outcome_configuration.Rd
│ ├── susie_rss.Rd
│ ├── susie_rss_lambda.Rd
│ ├── susie_ss.Rd
│ ├── susie_trendfilter.Rd
│ ├── susie_workhorse.Rd
│ ├── univar.order.Rd
│ ├── univariate_regression.Rd
│ └── unmappable_data.Rd
├── pixi.toml
├── src/
│ ├── Makevars
│ ├── Makevars.win
│ ├── caisa.cpp
│ ├── cpp11.cpp
│ ├── mr_ash.h
│ ├── mr_ash_rss.cpp
│ └── mr_ash_rss.h
├── tests/
│ ├── README.md
│ ├── testthat/
│ │ ├── helper_nig_reference.R
│ │ ├── helper_reference.R
│ │ ├── helper_testthat.R
│ │ ├── reference/
│ │ │ ├── test_susie_auto_reference.R
│ │ │ ├── test_susie_nig_reference.R
│ │ │ ├── test_susie_reference.R
│ │ │ ├── test_susie_rss_lambda_reference.R
│ │ │ ├── test_susie_rss_reference.R
│ │ │ └── test_susie_ss_reference.R
│ │ ├── test_X_centering.R
│ │ ├── test_coef_predict.R
│ │ ├── test_compute_marginal_bhat_shat.R
│ │ ├── test_generic_methods.R
│ │ ├── test_ibss.R
│ │ ├── test_individual_data_methods.R
│ │ ├── test_l_greedy.R
│ │ ├── test_mixture_prior.R
│ │ ├── test_mr_ash_equivalence.R
│ │ ├── test_plotting.R
│ │ ├── test_post_outcome_configuration_summary.R
│ │ ├── test_refinement.R
│ │ ├── test_rss_lambda_methods.R
│ │ ├── test_rss_mismatch.R
│ │ ├── test_rss_utils.R
│ │ ├── test_single_effect_regression.R
│ │ ├── test_slot_prior.R
│ │ ├── test_slot_weights.R
│ │ ├── test_sparse_multiplication.R
│ │ ├── test_sufficient_stats_methods.R
│ │ ├── test_summary_print.R
│ │ ├── test_susie.R
│ │ ├── test_susie_ash_ss_equivalence.R
│ │ ├── test_susie_auto.R
│ │ ├── test_susie_constructors.R
│ │ ├── test_susie_get_functions.R
│ │ ├── test_susie_small.R
│ │ ├── test_susie_utils.R
│ │ ├── test_susie_workhorse.R
│ │ ├── test_trendfilter.R
│ │ └── test_univariate_regression.R
│ └── testthat.R
└── vignettes/
├── announcements.Rmd
├── finemapping.Rmd
├── finemapping_summary_statistics.Rmd
├── l0_initialization.Rmd
├── mwe.Rmd
├── small_sample.Rmd
├── sparse_susie_eval.Rmd
├── susie_refine.Rmd
├── susie_rss.Rmd
├── susie_unmappable_effects.Rmd
├── susierss_diagnostic.Rmd
└── trend_filtering.Rmd
================================================
FILE CONTENTS
================================================
================================================
FILE: .Rbuildignore
================================================
^docs$
^.gitignore$
^appveyor\.yml$
^_pkgdown\.yml$
^Makefile$
^\.circleci$
^susieR\.Rproj$
^\.Rproj\.user$
^\.travis\.yml$
^.*\.Rproj$
^tests/testthat/full_data_1_sim_gaussian_null_1\.rds$
^inst/datafiles$
^inst/code/compute_ss_data\.RData$
^inst/code/susie_data\.RData$
^inst/code/susie_rss_data\.RData$
^LICENSE\.md$
^NOTES\.txt$
^\.Renviron$
^pixi\.toml$
^pixi\.lock$
^\.pixi/.*
^\.github$
================================================
FILE: .github/dependabot.yml
================================================
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"
================================================
FILE: .github/rattler-build_container.df
================================================
FROM ghcr.io/prefix-dev/pixi:latest
SHELL ["/bin/bash", "-c"]
RUN apt-get update
RUN apt-get install -y libgl1 ca-certificates
RUN groupadd -g 118 github
RUN useradd -m -u 1001 -g 118 -s /bin/bash runner
USER runner
RUN pixi global install rattler-build git patch
ENV PATH=/home/runner/.pixi/bin:${PATH}
================================================
FILE: .github/recipe/recipe.yaml
================================================
context:
version: VERSION_PLACEHOLDER
package:
name: r-susier
version: ${{ version }}
source:
path: susieR-${{ version }}.tar.gz
sha256: SHA256SUM_PLACEHOLDER
build:
number: BUILD_PLACEHOLDER
dynamic_linking:
rpaths:
- lib/R/lib/
- lib/
script: R CMD INSTALL --build .
requirements:
build:
- ${{ compiler('c') }}
- ${{ stdlib('c') }}
- ${{ compiler('cxx') }}
host:
- r-base
- r-cowplot
- r-crayon
- r-curl
- r-ggplot2
- r-knitr
- r-l0learn
- r-matrix
- r-matrixstats
- r-microbenchmark
- r-mixsqp
- r-cpp11
- r-cpp11armadillo
- r-reshape
- r-rfast
- r-rmarkdown
- r-survival
- r-testthat
run:
- r-base
- r-cowplot
- r-crayon
- r-curl
- r-ggplot2
- r-knitr
- r-l0learn
- r-matrix
- r-matrixstats
- r-microbenchmark
- r-mixsqp
- r-cpp11
- r-cpp11armadillo
- r-reshape
- r-rfast
- r-rmarkdown
- r-survival
- r-testthat
tests:
- script:
- R -e "library('susieR')"
about:
license: BSD-3-Clause
license_file: LICENSE
summary: Implements methods for variable selection in linear regression based on the Sum of Single Effects (SuSiE) model.
homepage: https://github.com/stephenslab/susieR
extra:
recipe-maintainers:
- danielnachun
================================================
FILE: .github/recipe/variant_r44.yaml
================================================
MACOSX_DEPLOYMENT_TARGET:
- '11.0'
c_stdlib_version:
- if: linux
then: 2.17
- if: osx
then: 11.0
c_stdlib:
- if: linux
then: sysroot
- if: osx
then: macosx_deployment_target
r_base:
- 4.4
================================================
FILE: .github/recipe/variant_r45.yaml
================================================
MACOSX_DEPLOYMENT_TARGET:
- '11.0'
c_stdlib_version:
- if: linux
then: 2.17
- if: osx
then: 11.0
c_stdlib:
- if: linux
then: sysroot
- if: osx
then: macosx_deployment_target
r_base:
- 4.5
================================================
FILE: .github/workflows/ci.yml
================================================
name: Continuous Integration
on:
push:
branches: master
pull_request:
paths-ignore:
- .github/*
- .gitignore
- README.md
jobs:
ci_linux-64:
name: linux-64 CI
runs-on: ubuntu-latest
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
strategy:
fail-fast: false
matrix:
environment: ["r44", "r45"]
steps:
- name: Checkout pull request branch
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Copy TOML
run: |
mkdir /tmp/pixi
cp ${GITHUB_WORKSPACE}/pixi.toml /tmp/pixi
- name: Setup pixi
uses: prefix-dev/setup-pixi@v0.9.4
with:
run-install: false
- name: Run unit tests
run: pixi run --environment ${{ matrix.environment }} --manifest-path /tmp/pixi/pixi.toml devtools_test
- name: Run R CMD CHECK
run: |
pixi run --environment ${{ matrix.environment }} --manifest-path /tmp/pixi/pixi.toml build
pixi run --environment ${{ matrix.environment }} --manifest-path /tmp/pixi/pixi.toml rcmdcheck
- name: Check unit test code coverage
if: ${{ matrix.environment == 'r44' }}
run: pixi run --environment ${{ matrix.environment }} --manifest-path /tmp/pixi/pixi.toml codecov
ci_osx-arm64:
name: osx-arm64 CI
runs-on: macos-latest
strategy:
fail-fast: false
matrix:
environment: ["r44", "r45"]
steps:
- name: Checkout pull request branch
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Copy TOML
run: |
mkdir /tmp/pixi
cp ${GITHUB_WORKSPACE}/pixi.toml /tmp/pixi
- name: Setup pixi
uses: prefix-dev/setup-pixi@v0.9.4
with:
run-install: false
- name: Run unit tests
run: pixi run --environment ${{ matrix.environment }} --manifest-path /tmp/pixi/pixi.toml devtools_test
- name: Run R CMD CHECK
run: |
pixi run --environment ${{ matrix.environment }} --manifest-path /tmp/pixi/pixi.toml build
pixi run --environment ${{ matrix.environment }} --manifest-path /tmp/pixi/pixi.toml rcmdcheck
================================================
FILE: .github/workflows/conda_build.yml
================================================
name: Build conda package
on:
release:
types: [published]
workflow_dispatch:
inputs:
version:
description: Version to package
required: true
default: 'latest'
build:
description: "Build revision of package (default: 0)"
required: false
default: '0'
jobs:
build_package_linux-64:
name: Build conda package for linux-64
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
variant: ["r44", "r45"]
env:
ANACONDA_API_KEY: ${{ secrets.ANACONDA_API_TOKEN }}
ANACONDA_OWNER: ${{ vars.ANACONDA_OWNER }}
steps:
- name: Checkout repository
uses: actions/checkout@v6
with:
fetch-depth: 0
- name: Get latest version
id: latest-version
if: github.event_name == 'workflow_dispatch' && github.event.inputs.version == 'latest'
uses: pozetroninc/github-action-get-latest-release@v0.8.0
with:
repository: ${{ github.repository }}
token: ${{ secrets.CI_TOKEN }}
- name: Set version
id: set-version
run: |
if [[ "${{ github.event_name }}" == "release" ]]; then
version="${{ github.event.release.tag_name }}"
elif [[ "${{ github.event.inputs.version }}" != "latest" ]]; then
version="${{ github.event.inputs.version }}"
else
version="${{ steps.latest-version.outputs.release }}"
fi
echo "version=${version}" >> "$GITHUB_OUTPUT"
- name: Set build number
id: set-build
run: |
if [[ "${{ github.event_name }}" == "release" ]]; then
echo "build=0" >> "$GITHUB_OUTPUT"
else
echo "build=${{ github.event.inputs.build }}" >> "$GITHUB_OUTPUT"
fi
- name: Download release
uses: robinraju/release-downloader@v1
with:
tag: ${{ steps.set-version.outputs.version }}
token: ${{ secrets.CI_TOKEN }}
out-file-path: /tmp/recipe
tarBall: true
- name: Setup pixi
uses: prefix-dev/setup-pixi@v0.9.4
with:
run-install: false
- name: Create recipe from template
shell: pixi exec --spec sed --spec coreutils --spec wget -- bash -e {0}
run: |
cp .github/recipe/recipe.yaml /tmp/recipe/recipe.yaml
cp .github/recipe/variant_${{ matrix.variant }}.yaml /tmp/recipe/variants.yaml
repository=${{ github.repository }}
build=${{ steps.set-build.outputs.build }}
version=${{ steps.set-version.outputs.version }}
sha256sum=$(sha256sum /tmp/recipe/${repository//*\//}-${version}.tar.gz | cut -d ' ' -f 1)
sed -i "s/VERSION_PLACEHOLDER/${version}/g" /tmp/recipe/recipe.yaml
sed -i "s/SHA256SUM_PLACEHOLDER/${sha256sum}/g" /tmp/recipe/recipe.yaml
sed -i "s/BUILD_PLACEHOLDER/${build}/g" /tmp/recipe/recipe.yaml
- name: Setup up docker buildx
uses: docker/setup-buildx-action@v4
- name: Build and export docker containers
uses: docker/build-push-action@v7
with:
load: true
file: .github/rattler-build_container.df
tags: rattler-build:latest
- name: Build conda packages
run: |
docker run --rm --volume /tmp:/tmp \
--volume /etc/passwd:/etc/passwd:ro \
--volume /etc/group:/etc/group:ro \
--volume $(pwd) --workdir $(pwd) \
--user $(id -u) rattler-build \
rattler-build build -c dnachun -c conda-forge -c bioconda \
--output-dir /tmp/rattler-build --recipe-dir /tmp/recipe
- name: Upload package
shell: pixi exec --spec rattler-build -- bash -e {0}
run: rattler-build upload anaconda --force /tmp/rattler-build/linux-64/*.conda
build_package_osx-arm64:
name: Build conda package for osx-arm64
runs-on: macos-14
strategy:
fail-fast: false
matrix:
variant: ["r44", "r45"]
env:
ANACONDA_API_KEY: ${{ secrets.ANACONDA_API_TOKEN }}
ANACONDA_OWNER: ${{ vars.ANACONDA_OWNER }}
steps:
- name: Checkout repository
uses: actions/checkout@v6
with:
fetch-depth: 0
- name: Get latest version
id: latest-version
if: github.event_name == 'workflow_dispatch' && github.event.inputs.version == 'latest'
uses: pozetroninc/github-action-get-latest-release@v0.8.0
with:
repository: ${{ github.repository }}
token: ${{ secrets.CI_TOKEN }}
- name: Set version
id: set-version
run: |
if [[ "${{ github.event_name }}" == "release" ]]; then
version="${{ github.event.release.tag_name }}"
elif [[ "${{ github.event.inputs.version }}" != "latest" ]]; then
version="${{ github.event.inputs.version }}"
else
version="${{ steps.latest-version.outputs.release }}"
fi
echo "version=${version}" >> "$GITHUB_OUTPUT"
- name: Set build number
id: set-build
run: |
if [[ "${{ github.event_name }}" == "release" ]]; then
echo "build=0" >> "$GITHUB_OUTPUT"
else
echo "build=${{ github.event.inputs.build }}" >> "$GITHUB_OUTPUT"
fi
- name: Download release
uses: robinraju/release-downloader@v1
with:
tag: ${{ steps.set-version.outputs.version }}
token: ${{ secrets.CI_TOKEN }}
out-file-path: /tmp/recipe
tarBall: true
- name: Setup pixi
uses: prefix-dev/setup-pixi@v0.9.4
with:
run-install: false
- name: Create recipe from template
shell: pixi exec --spec sed --spec coreutils --spec wget -- bash -e {0}
run: |
cp .github/recipe/recipe.yaml /tmp/recipe/recipe.yaml
cp .github/recipe/variant_${{ matrix.variant }}.yaml /tmp/recipe/variants.yaml
repository=${{ github.repository }}
build=${{ steps.set-build.outputs.build }}
version=${{ steps.set-version.outputs.version }}
sha256sum=$(sha256sum /tmp/recipe/${repository//*\//}-${version}.tar.gz | cut -d ' ' -f 1)
sed -i "s/VERSION_PLACEHOLDER/${version}/g" /tmp/recipe/recipe.yaml
sed -i "s/SHA256SUM_PLACEHOLDER/${sha256sum}/g" /tmp/recipe/recipe.yaml
sed -i "s/BUILD_PLACEHOLDER/${build}/g" /tmp/recipe/recipe.yaml
- name: Build conda package
shell: pixi exec --spec rattler-build -- bash -e {0}
run: rattler-build build -c dnachun -c conda-forge -c bioconda --output-dir /tmp/rattler-build --recipe-dir /tmp/recipe
- name: Upload package
shell: pixi exec --spec rattler-build -- bash -e {0}
run: rattler-build upload anaconda --force /tmp/rattler-build/osx-arm64/*.conda
================================================
FILE: .github/workflows/dispatch_pkgdown_build.yml
================================================
name: Deploy website
on:
push:
branches: ["master"]
workflow_dispatch:
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
permissions:
contents: read
pages: write
id-token: write
concurrency:
group: "pages"
cancel-in-progress: false
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout master
uses: actions/checkout@v5
with:
ref: master
- name: Setup pixi
uses: prefix-dev/setup-pixi@v0.9.4
- name: Update pkgdown site
run: pixi run -e r44 pkgdown_build
- name: Setup Pages
uses: actions/configure-pages@v5
- name: Upload artifact
uses: actions/upload-pages-artifact@v5
with:
path: ./docs
deploy:
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-latest
needs: build
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4
================================================
FILE: .github/workflows/release.yml
================================================
name: Upload new release
on:
push:
branches: [master]
paths: [DESCRIPTION]
workflow_dispatch:
inputs:
tag:
description: Version to use for release tag
default: auto
required: true
commit:
description: Commit to use for tag
default: auto
required: true
increment_major_version:
description: Increment major version
default: false
required: true
increment_minor_version:
description: Increment minor version
default: false
required: true
increment_patch_version:
description: Increment patch version
default: true
required: true
# Prevent duplicate releases when manual dispatch pushes a version commit
concurrency:
group: release
cancel-in-progress: false
jobs:
# Only runs on manual dispatch to bump version in DESCRIPTION
update_version:
outputs:
commit: ${{ steps.commit-changes.outputs.commit_long_sha }}
runs-on: ubuntu-latest
if: github.event_name == 'workflow_dispatch' && github.event.inputs.commit == 'auto'
steps:
- name: Checkout repository
uses: actions/checkout@v6
with:
token: ${{ secrets.CI_TOKEN }}
fetch-depth: 0
repository: ${{ github.repository }}
ref: master
- name: Setup pixi
uses: prefix-dev/setup-pixi@v0.9.4
- name: Update version
run: |
if [[ "${{ github.event.inputs.tag }}" != "auto" ]]; then
sed -i 's/Version: .*$/Version: ${{ github.event.inputs.tag }}/' DESCRIPTION
elif [[ "${{ github.event.inputs.increment_major_version }}" == "true" ]]; then
pixi run use_major_version
elif [[ "${{ github.event.inputs.increment_minor_version }}" == "true" ]]; then
pixi run use_minor_version
elif [[ "${{ github.event.inputs.increment_patch_version }}" == "true" ]]; then
pixi run use_patch_version
fi
- name: Commit changes to version
id: commit-changes
uses: EndBug/add-and-commit@v10
with:
push: true
message: Update version
# Creates tag + GitHub release — runs for both push and manual triggers
create_release:
needs: update_version
# Run when update_version succeeds (manual) or is skipped (push trigger)
if: always() && (needs.update_version.result == 'success' || needs.update_version.result == 'skipped')
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Determine commit
id: determine-commit
run: |
if [[ "${{ github.event_name }}" == "push" ]]; then
echo "commit=${{ github.sha }}" >> "$GITHUB_OUTPUT"
elif [[ "${{ github.event.inputs.commit }}" != "auto" ]]; then
echo "commit=${{ github.event.inputs.commit }}" >> "$GITHUB_OUTPUT"
else
echo "commit=${{ needs.update_version.outputs.commit }}" >> "$GITHUB_OUTPUT"
fi
- name: Checkout
uses: actions/checkout@v6
with:
ref: ${{ steps.determine-commit.outputs.commit }}
fetch-depth: 0
- name: Set tag from DESCRIPTION
id: set-tag
run: |
if [[ "${{ github.event_name }}" == "workflow_dispatch" && "${{ github.event.inputs.tag }}" != "auto" ]]; then
tag="${{ github.event.inputs.tag }}"
else
tag=$(grep "^Version:" DESCRIPTION | sed 's/Version: *//')
fi
echo "tag=${tag}" >> "$GITHUB_OUTPUT"
- name: Check if tag already exists
id: check-tag
run: |
if git rev-parse "refs/tags/${{ steps.set-tag.outputs.tag }}" >/dev/null 2>&1; then
echo "exists=true" >> "$GITHUB_OUTPUT"
echo "::notice::Tag ${{ steps.set-tag.outputs.tag }} already exists — skipping release"
else
echo "exists=false" >> "$GITHUB_OUTPUT"
fi
- name: Check if major or minor version changed
id: check-version-change
run: |
# Strip optional v prefix so legacy v-prefixed tags compare correctly
new_version="${{ steps.set-tag.outputs.tag }}"
new_version="${new_version#v}"
new_major_minor=$(echo "$new_version" | cut -d. -f1,2)
# Normalize all tags (strip v) before version-sorting, otherwise
# git's v:refname sort ranks v-prefixed tags ahead of plain ones.
latest_tag=$(git tag | sed 's/^v//' | sort -V | tail -n1 || echo "")
if [[ -z "$latest_tag" ]]; then
echo "No existing tags found — treating as new release"
echo "is_major_minor=true" >> "$GITHUB_OUTPUT"
else
latest_major_minor=$(echo "$latest_tag" | cut -d. -f1,2)
if [[ "$new_major_minor" != "$latest_major_minor" ]]; then
echo "Major/minor version changed: $latest_major_minor -> $new_major_minor"
echo "is_major_minor=true" >> "$GITHUB_OUTPUT"
else
echo "::notice::Only patch version changed ($latest_tag -> $new_version) — skipping release"
echo "is_major_minor=false" >> "$GITHUB_OUTPUT"
fi
fi
- name: Create new tag
if: steps.check-tag.outputs.exists == 'false' && steps.check-version-change.outputs.is_major_minor == 'true'
id: tag-version
uses: mathieudutour/github-tag-action@v6.2
with:
default_bump: false
default_prerelease_bump: false
github_token: ${{ secrets.GITHUB_TOKEN }}
custom_tag: ${{ steps.set-tag.outputs.tag }}
commit_sha: ${{ steps.determine-commit.outputs.commit }}
tag_prefix: ""
- name: Create a GitHub release
if: steps.check-tag.outputs.exists == 'false' && steps.check-version-change.outputs.is_major_minor == 'true'
uses: ncipollo/release-action@v1
with:
tag: ${{ steps.set-tag.outputs.tag }}
name: Release ${{ steps.set-tag.outputs.tag }}
generateReleaseNotes: true
================================================
FILE: .gitignore
================================================
**/.Rhistory
**/.DS_Store
**/.Rapp.history
**/.ipynb_checkpoints
**/.virtual_documents
susieR.Rproj
.Rproj.user
.RData
.Ruserdata
.pixi/
Rplots.pdf
docs
pixi.lock
**/*.html
**/*.so
**/*.o
susieR.Rcheck
**/.*.swp
**/.*.swo
================================================
FILE: DESCRIPTION
================================================
Encoding: UTF-8
Type: Package
Package: susieR
Title: Sum of Single Effects Linear Regression
Description: Implements methods for variable selection in linear
regression based on the "Sum of Single Effects" (SuSiE) model, as
described in Wang et al (2020) <DOI:10.1101/501114> and Zou et al
(2021) <DOI:10.1101/2021.11.03.467167>. These methods provide
simple summaries, called "Credible Sets", for accurately
quantifying uncertainty in which variables should be selected.
The methods are motivated by genetic fine-mapping applications,
and are particularly well-suited to settings where variables are
highly correlated and detectable effects are sparse. The fitting
algorithm, a Bayesian analogue of stepwise selection methods
called "Iterative Bayesian Stepwise Selection" (IBSS), is simple
and fast, allowing the SuSiE model be fit to large data sets
(thousands of samples and hundreds of thousands of variables).
Date: 2026-04-24
Version: 0.16.1
Authors@R: c(person("Gao","Wang",role="aut",email="wang.gao@columbia.edu"),
person("Yuxin","Zou",role="aut"),
person("Alexander","McCreight",role="aut"),
person("Kaiqian","Zhang",role="aut"),
person("William","R.P. Denault",role="aut"),
person("Peter","Carbonetto",role=c("aut","cre"),
email="peter.carbonetto@gmail.com"),
person("Matthew","Stephens",role="aut"))
URL: https://github.com/stephenslab/susieR
BugReports: https://github.com/stephenslab/susieR/issues
License: BSD_3_clause + file LICENSE
Depends: R (>= 3.0.2)
Imports:
methods,
graphics,
grDevices,
stats,
Matrix,
matrixStats,
mixsqp,
reshape,
crayon,
ggplot2
LinkingTo:
cpp11,
cpp11armadillo
Suggests:
curl,
pkgload,
rprojroot,
testthat,
microbenchmark,
knitr,
rmarkdown,
Rfast,
cowplot,
L0Learn
LazyData: yes
LazyDataCompression: xz
NeedsCompilation: yes
RoxygenNote: 7.3.3
VignetteBuilder: knitr
================================================
FILE: LICENSE
================================================
YEAR: 2017-2022
COPYRIGHT HOLDER: Gao Wang, Peter Carbonetto, Yuxin Zou, Kaiqian Zhang, Matthew Stephens
ORGANIZATION: Gao Wang, Peter Carbonetto, Yuxin Zou, Kaiqian Zhang, Matthew Stephens
================================================
FILE: LICENSE.md
================================================
Copyright (c) 2017-2022, Gao Wang, Peter Carbonetto, Yuxin Zou, Kaiqian Zhang,
Matthew Stephens.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Gao Wang, Peter Carbonetto, Yuxin Zou, Kaiqian
Zhang, Matthew Stephens, nor the names of its contributors may be used
to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
================================================
FILE: Makefile
================================================
# Makefile for susieR package
.PHONY: all install document test test-coverage pkgdown lint style clean deep-clean check check-cran
# Default target
all: document install
## Install the package
install: document
@echo "Installing package..."
@Rscript -e "devtools::install('.', quiet = TRUE, upgrade = FALSE, build = FALSE)"
## Document the package (properly handling Rcpp compilation)
document:
@echo "Ensuring DESCRIPTION has final newline..."
@Rscript -e "lines <- readLines('DESCRIPTION'); writeLines(lines, 'DESCRIPTION')"
@echo "Creating minimal NAMESPACE if missing..."
@test -f NAMESPACE || ( echo "# Generated by roxygen2: do not edit by hand" > NAMESPACE && \
echo "" >> NAMESPACE )
@echo "Regenerating Rcpp exports..."
@Rscript -e "Rcpp::compileAttributes('.')"
@echo "Compiling shared library..."
@Rscript -e "pkgbuild::compile_dll('.')"
@echo "Documenting with roxygen2..."
@Rscript -e "roxygen2::roxygenise('.', clean = TRUE)"
## Run tests
test:
@echo "Running tests..."
@Rscript -e "devtools::test('.')"
## Test with coverage
test-coverage:
@echo "Running tests with coverage..."
@Rscript -e "covr::package_coverage('.')"
## Build pkgdown site
pkgdown: document
@echo "Building pkgdown site..."
@Rscript -e "pkgdown::clean_site('.')"
@Rscript -e "pkgdown::init_site('.')"
@Rscript -e "pkgdown::build_site('.', lazy = FALSE)"
pkgdown-lazy: document
@echo "Building pkgdown site (lazy mode - only changed pages)..."
@Rscript -e "pkgdown::build_site('.', lazy = TRUE)"
## Run lintr
lint:
@echo "Running lintr..."
@Rscript -e "lintr::lint_package('.')"
## Format code with styler
style:
@echo "Styling code..."
@Rscript -e "styler::style_pkg('.')"
## Basic check
check: document
@echo "Running basic checks..."
@Rscript -e "devtools::check('.', document = FALSE)"
## Run CRAN check
check-cran: document
@echo "Running CRAN checks..."
@Rscript -e "devtools::check('.', cran = TRUE, document = FALSE)"
## Clean generated files (KEEP NAMESPACE to avoid issues)
clean:
@echo "Cleaning generated files..."
@rm -f src/*.o src/*.so src/*.dll
@rm -f src/RcppExports.cpp R/RcppExports.R
@rm -rf man
@rm -f src/symbols.rds
@rm -rf *.tar.gz *.Rcheck
@echo "Clean complete (NAMESPACE preserved)"
## Deep clean (removes everything including NAMESPACE)
deep-clean:
@echo "Deep cleaning all generated files..."
@rm -f src/*.o src/*.so src/*.dll
@rm -f src/RcppExports.cpp R/RcppExports.R
@rm -rf man
@rm -f NAMESPACE
@rm -f src/symbols.rds
@rm -rf *.tar.gz *.Rcheck
@rm -rf .Rhistory .RData .Rproj.user
@rm -rf docs
@rm -rf inst/doc vignettes/*.html vignettes/*.R
@echo "Deep clean complete"
## Quick development workflow (when NAMESPACE exists)
quick:
@echo "Quick rebuild (assumes NAMESPACE exists)..."
@Rscript -e "Rcpp::compileAttributes('.')"
@Rscript -e "devtools::install('.', quick = TRUE, upgrade = FALSE)"
## Load package for interactive use
load:
@echo "Loading package..."
@Rscript -e "devtools::load_all('.')"
## Help
help:
@echo "susieR Makefile"
@echo ""
@echo "Main targets:"
@echo " make - Document and install (default)"
@echo " make document - Generate documentation (creates NAMESPACE if needed)"
@echo " make install - Install package"
@echo " make test - Run tests"
@echo " make test-coverage - Test coverage report"
@echo " make pkgdown - Build pkgdown site"
@echo " make lint - Run lintr"
@echo " make style - Format code with styler"
@echo " make check - Run R CMD check"
@echo " make check-cran - Run CRAN check"
@echo ""
@echo "Maintenance:"
@echo " make clean - Remove generated files (keeps NAMESPACE)"
@echo " make deep-clean - Remove ALL generated files"
@echo ""
@echo "Quick targets:"
@echo " make quick - Quick rebuild (when NAMESPACE exists)"
@echo " make load - Load package for interactive use"
================================================
FILE: NAMESPACE
================================================
# Generated by roxygen2: do not edit by hand
S3method(coef,mr.ash)
S3method(coef,susie)
S3method(get_objective,default)
S3method(ibss_initialize,default)
S3method(post_loglik_prior_hook,default)
S3method(pre_loglik_prior_hook,default)
S3method(predict,mr.ash)
S3method(predict,susie)
S3method(print,slot_prior)
S3method(print,summary.susie)
S3method(print,summary.susie_post_outcome_configuration)
S3method(summary,susie)
S3method(summary,susie_post_outcome_configuration)
export(absolute.order)
export(block_coordinate_ascent)
export(calc_z)
export(coef.mr.ash)
export(coef.susie)
export(compute_marginal_bhat_shat)
export(compute_suff_stat)
export(estimate_s_rss)
export(get.full.posterior)
export(get_cs_correlation)
export(get_objective)
export(ibss_finalize)
export(ibss_initialize)
export(is_symmetric_matrix)
export(kriging_rss)
export(mr.ash)
export(mr.ash.rss)
export(path.order)
export(post_loglik_prior_hook)
export(pre_loglik_prior_hook)
export(predict.mr.ash)
export(predict.susie)
export(print.summary.susie)
export(print.summary.susie_post_outcome_configuration)
export(slot_prior_betabinom)
export(slot_prior_poisson)
export(summary.susie)
export(summary.susie_post_outcome_configuration)
export(susie)
export(susie_auto)
export(susie_get_cs)
export(susie_get_lfsr)
export(susie_get_niter)
export(susie_get_objective)
export(susie_get_pip)
export(susie_get_posterior_mean)
export(susie_get_posterior_samples)
export(susie_get_posterior_sd)
export(susie_get_prior_variance)
export(susie_get_residual_variance)
export(susie_init_coef)
export(susie_plot)
export(susie_plot_changepoint)
export(susie_plot_iteration)
export(susie_post_outcome_configuration)
export(susie_rss)
export(susie_rss_lambda)
export(susie_ss)
export(susie_trendfilter)
export(susie_workhorse)
export(univar.order)
export(univariate_regression)
importFrom(Matrix,colMeans)
importFrom(Matrix,colSums)
importFrom(Matrix,crossprod)
importFrom(Matrix,forceSymmetric)
importFrom(Matrix,rowSums)
importFrom(Matrix,sparseMatrix)
importFrom(Matrix,summary)
importFrom(Matrix,t)
importFrom(Matrix,tcrossprod)
importFrom(Rfast,colVars)
importFrom(crayon,blue)
importFrom(crayon,bold)
importFrom(crayon,combine_styles)
importFrom(crayon,cyan)
importFrom(crayon,green)
importFrom(crayon,has_color)
importFrom(crayon,magenta)
importFrom(crayon,silver)
importFrom(crayon,yellow)
importFrom(ggplot2,.data)
importFrom(ggplot2,aes)
importFrom(ggplot2,aes_string)
importFrom(ggplot2,annotate)
importFrom(ggplot2,geom_abline)
importFrom(ggplot2,geom_col)
importFrom(ggplot2,geom_line)
importFrom(ggplot2,geom_point)
importFrom(ggplot2,ggplot)
importFrom(ggplot2,ggtitle)
importFrom(ggplot2,labs)
importFrom(ggplot2,theme_bw)
importFrom(ggplot2,theme_classic)
importFrom(grDevices,dev.off)
importFrom(grDevices,pdf)
importFrom(graphics,legend)
importFrom(graphics,par)
importFrom(graphics,plot)
importFrom(graphics,points)
importFrom(graphics,segments)
importFrom(matrixStats,colSds)
importFrom(methods,as)
importFrom(mixsqp,mixsqp)
importFrom(reshape,melt)
importFrom(stats,.lm.fit)
importFrom(stats,coef)
importFrom(stats,cor)
importFrom(stats,dnorm)
importFrom(stats,lm)
importFrom(stats,median)
importFrom(stats,optim)
importFrom(stats,optimize)
importFrom(stats,pnorm)
importFrom(stats,predict)
importFrom(stats,rmultinom)
importFrom(stats,rnorm)
importFrom(stats,sd)
importFrom(stats,summary.lm)
importFrom(stats,var)
importFrom(utils,head)
importFrom(utils,modifyList)
useDynLib(susieR, .registration = TRUE)
================================================
FILE: R/cpp11.R
================================================
# Generated by cpp11: do not edit by hand
random_order <- function(p, numiter) {
.Call(`_susieR_random_order`, p, numiter)
}
caisa_cpp <- function(X, w, sa2, pi_init, beta_init, r_init, sigma2, o_r, maxiter, miniter, convtol, epstol, method_q, updatepi, updatesigma, verbose) {
.Call(`_susieR_caisa_cpp`, X, w, sa2, pi_init, beta_init, r_init, sigma2, o_r, maxiter, miniter, convtol, epstol, method_q, updatepi, updatesigma, verbose)
}
mr_ash_rss_cpp <- function(bhat, shat, z, R, var_y, n, sigma2_e, s0, w0, mu1_init, tol, max_iter, update_w0, update_sigma, compute_ELBO, standardize) {
.Call(`_susieR_mr_ash_rss_cpp`, bhat, shat, z, R, var_y, n, sigma2_e, s0, w0, mu1_init, tol, max_iter, update_w0, update_sigma, compute_ELBO, standardize)
}
================================================
FILE: R/diagnosis_reports.R
================================================
# Diagnostic functions for SuSiE-ash filter
#
# Per-iteration functions (called from susie_utils.R, return data.frame):
# diagnose_bb_ash_iter() - BB+ash code path
# diagnose_ash_filter_archived_iter() - V0 code path
#
# Post-run helpers (called via susieR:::):
# collect_ash_diag(fit) - rbind all iterations into ML table
# label_diag_truth(df, fit, causal) - add TP/FP labels
# add_delta_features(df) - add per-slot change-over-iteration features
# extract_bb_ash_features(fit, X, causal) - quick feature extraction from converged fit
# compare_ash_methods(df1, df2) - side-by-side comparison
#
# Usage example:
# fit <- susie(X, y, L=10, slot_prior=slot_prior_betabinom(),
# unmappable_effects="ash", max_iter=50)
# df <- susieR:::collect_ash_diag(fit)
# df <- susieR:::label_diag_truth(df, fit, causal)
# df <- susieR:::add_delta_features(df)
#
# # ML analysis: per-slot, per-iteration features with TP/FP labels
# # Key columns: iter, slot, c_hat, lbf, purity, V, max_alpha,
# # alpha_entropy, mask_tier, collision, ever_uncertain, ...
# # Delta columns: delta_c_hat, delta_V, delta_lbf, delta_max_alpha, ...
# # (change from previous iteration for the same slot)
# #
# # For 4-way comparison (BB+ash/V0 x mrash/no-mrash):
# # options(susie.skip_mrash = TRUE) # toggle mr.ash off
# # fit_nomrash <- susie(...)
# # options(susie.skip_mrash = FALSE) # restore
#
# Data.frames accumulated on fit$.diag_env$history during the run.
# Debug flag .ash_debug in susie_utils.R (TRUE = on, never turn off).
#' BB+ash per-iteration diagnostic
#'
#' @return data.frame with one row per slot, all features
#' @keywords internal
diagnose_bb_ash_iter <- function(model, Xcorr, mask, b_confident,
sentinels, sentinel_collision,
is_confident_now, is_trusted,
emerging_slots, active_slots, c_hat,
ash_result, p,
high_chat = NULL, low_chat = NULL,
# Tunable parameters (captured for reproducibility)
collision_threshold = 0.9,
purity_threshold = 0.5,
masking_threshold = 0.5,
nPIP_threshold = 0.05,
c_hat_excess_threshold = 0.2,
alpha_entropy_threshold = log(5),
slot_prior = NULL,
mask_smoothness = NULL,
mask_amount = NULL,
mask_concentration = NULL,
mask_burnin = NULL,
mask_spread_pip_at_sent = NULL,
mask_pip_prot_at_sent = NULL) {
L <- nrow(model$alpha)
theta_raw <- ash_result$beta
theta_masked <- theta_raw
theta_masked[mask] <- 0
cs_coverage <- 0.9
iter <- model$ash_iter
rows <- list()
for (l in seq_len(L)) {
sent <- sentinels[l]
alpha_l <- model$alpha[l, ]
max_a <- max(alpha_l)
# Purity and CS size
cs_order <- order(alpha_l, decreasing = TRUE)
cs_size <- min(which(cumsum(alpha_l[cs_order]) >= cs_coverage))
if (cs_size > 1 && l %in% active_slots) {
cs_idx <- cs_order[1:cs_size]
pur <- min(abs(Xcorr[cs_idx, cs_idx]))
} else if (cs_size == 1) {
pur <- 1.0
} else {
pur <- 0.0
}
# Status
status <- if (is_trusted[l]) "trusted"
else if (is_confident_now[l] && model$ever_uncertain[l]) "conf_unc"
else if (is_confident_now[l]) "confident"
else if (sentinel_collision[l]) "collide"
else if (model$V[l] == 0) "null"
else "emerging"
# Mask tier (key c_hat feature)
mask_tier <- if (is_trusted[l]) "trusted"
else if (!is.null(high_chat) && l %in% high_chat) "high_chat"
else if (!is.null(low_chat) && l %in% low_chat) "low_chat"
else "unknown"
# Max cross-sentinel |r| (collision strength)
max_cross_r <- 0
if (sent > 0 && length(active_slots) > 1) {
other_sents <- sentinels[setdiff(active_slots, l)]
other_sents <- other_sents[other_sents > 0]
if (length(other_sents) > 0)
max_cross_r <- max(abs(Xcorr[sent, other_sents]))
}
# Sentinel change
prev_sent_l <- if (!is.null(model$prev_sentinel)) model$prev_sentinel[l] else 0L
sent_changed <- (prev_sent_l > 0) && (sent != prev_sent_l)
# Theta at sentinel
theta_at_sent <- if (sent > 0) theta_masked[sent] else 0
theta_raw_at_sent <- if (sent > 0) theta_raw[sent] else 0
# Alpha entropy (low = concentrated, possibly FP)
alpha_nz <- alpha_l[alpha_l > 1e-10]
alpha_entropy <- -sum(alpha_nz * log(alpha_nz))
# Number of colliding partners
n_colliding <- 0
if (sent > 0 && length(active_slots) > 1) {
other_sents <- sentinels[setdiff(active_slots, l)]
other_sents <- other_sents[other_sents > 0]
if (length(other_sents) > 0)
n_colliding <- sum(abs(Xcorr[sent, other_sents]) > 0.9)
}
# Per-slot mu properties
mu_l <- model$mu[l, ]
mu_at_sent <- if (sent > 0) mu_l[sent] else 0
max_abs_mu <- max(abs(mu_l))
# c_hat relative to prior: how much evidence beyond the prior expectation
# For BB: prior_log_odds = log(a + k_others) - log(b + L-1 - k_others)
# c_hat_null = sigmoid(prior_log_odds), c_hat_excess = c_hat - c_hat_null
# Prior params read from model$slot_prior (set during susie init)
c_hat_null_l <- NA
c_hat_excess_l <- NA
if (!is.null(model$slot_weights) && !is.null(slot_prior) &&
!is.null(slot_prior$a_beta)) {
sw <- model$slot_weights
k_others <- sum(sw[-l])
prior_lo <- log(slot_prior$a_beta + k_others) -
log(slot_prior$b_beta + L - 1 - k_others)
c_hat_null_l <- 1 / (1 + exp(-prior_lo))
c_hat_excess_l <- c_hat[l] - c_hat_null_l
}
rows[[l]] <- data.frame(
method = "bb_ash", iter = iter, slot = l,
sentinel = sent, purity = pur, V = model$V[l],
c_hat = c_hat[l], c_hat_null = c_hat_null_l, c_hat_excess = c_hat_excess_l,
lbf = if (!is.null(model$lbf)) model$lbf[l] else NA,
max_alpha = max_a, cs_size = cs_size,
alpha_entropy = alpha_entropy,
is_confident = is_confident_now[l],
is_trusted = is_trusted[l],
status = status, mask_tier = mask_tier,
was_low_chat = if (!is.null(model$was_low_chat)) model$was_low_chat[l] else FALSE,
was_exposed = if (!is.null(model$was_exposed)) model$was_exposed[l] else FALSE,
collision = sentinel_collision[l],
ever_uncertain = model$ever_uncertain[l],
n_colliding = n_colliding,
max_cross_r = max_cross_r,
sent_changed = sent_changed,
prev_sentinel = prev_sent_l,
mu_at_sent = mu_at_sent,
max_abs_mu = max_abs_mu,
theta_at_sent = theta_at_sent,
theta_raw_at_sent = theta_raw_at_sent,
mask_size = sum(mask), mask_frac = round(sum(mask) / p, 3),
n_active = length(active_slots),
n_trusted = sum(is_trusted),
n_high_chat = if (!is.null(high_chat)) length(high_chat) else NA,
n_low_chat = if (!is.null(low_chat)) length(low_chat) else NA,
C_hat = round(sum(c_hat), 1),
sigma2 = ash_result$sigma2,
pi0 = if (!is.null(ash_result$pi)) ash_result$pi[1] else NA,
theta_ss = sum(theta_masked^2),
theta_raw_ss = sum(theta_raw^2),
b_conf_ss = sum(b_confident^2),
b_conf_max = max(abs(b_confident)),
sent_masked = if (sent > 0) mask[sent] else FALSE,
skip_mrash = getOption("susie.skip_mrash", FALSE),
# Rule activation: which decision rules kicked in for this slot
rule_active_gate = (l %in% active_slots),
rule_collision = sentinel_collision[l],
rule_jump = model$ever_uncertain[l] && !sentinel_collision[l],
rule_trusted = is_trusted[l],
rule_low_chat = if (!is.null(model$was_low_chat)) model$was_low_chat[l] else FALSE,
rule_high_chat_pip = (!is.null(high_chat) && l %in% high_chat),
rule_low_chat_sentinel = (!is.null(low_chat) && l %in% low_chat &&
!(if (!is.null(model$was_exposed)) model$was_exposed[l] else FALSE)),
rule_exposed = if (!is.null(model$was_exposed)) model$was_exposed[l] else FALSE,
# Tunable parameter values
param_collision_threshold = collision_threshold,
param_purity_threshold = purity_threshold,
param_masking_threshold = masking_threshold,
param_nPIP_threshold = nPIP_threshold,
param_c_hat_excess_threshold = c_hat_excess_threshold,
param_alpha_entropy_threshold = alpha_entropy_threshold,
# Unified mask diagnostics
smoothness = if (!is.null(mask_smoothness)) mask_smoothness[l] else NA,
amount = if (!is.null(mask_amount)) mask_amount[l] else NA,
concentration = if (!is.null(mask_concentration)) mask_concentration[l] else NA,
burnin = if (!is.null(mask_burnin)) mask_burnin[l] else NA,
spread_pip_at_sent = if (!is.null(mask_spread_pip_at_sent) && sent > 0) mask_spread_pip_at_sent[l] else NA,
pip_prot_at_sent = if (!is.null(mask_pip_prot_at_sent) && sent > 0) mask_pip_prot_at_sent[l] else NA,
stringsAsFactors = FALSE
)
}
do.call(rbind, rows)
}
#' V0 archived filter per-iteration diagnostic
#'
#' @return data.frame with one row per slot, all features
#' @keywords internal
diagnose_ash_filter_archived_iter <- function(model, Xcorr, masked,
b_confident, sentinels,
effect_purity, current_case,
current_collision,
mrash_output) {
L <- nrow(model$alpha)
p <- ncol(model$alpha)
theta_raw <- mrash_output$beta
theta_masked <- theta_raw
theta_masked[masked] <- 0
cs_coverage <- 0.9
iter <- model$ash_iter
is_active <- sapply(seq_len(L), function(l) {
max(model$alpha[l, ]) - min(model$alpha[l, ]) >= 5e-5
})
rows <- list()
for (l in seq_len(L)) {
sent <- sentinels[l]
alpha_l <- model$alpha[l, ]
max_a <- max(alpha_l)
cs_order <- order(alpha_l, decreasing = TRUE)
cs_size <- min(which(cumsum(alpha_l[cs_order]) >= cs_coverage))
case_str <- if (!is.na(current_case[l])) paste0("C", current_case[l]) else "inactive"
# Max cross-sentinel |r|
max_cross_r <- 0
active_idx <- which(is_active)
if (sent > 0 && length(active_idx) > 1) {
other_sents <- sentinels[setdiff(active_idx, l)]
other_sents <- other_sents[other_sents > 0]
if (length(other_sents) > 0)
max_cross_r <- max(abs(Xcorr[sent, other_sents]))
}
# Sentinel change
prev_sent_l <- if (!is.null(model$prev_sentinel)) model$prev_sentinel[l] else 0L
sent_changed <- (prev_sent_l > 0) && (sent != prev_sent_l)
# Theta at sentinel
theta_at_sent <- if (sent > 0) theta_masked[sent] else 0
theta_raw_at_sent <- if (sent > 0) theta_raw[sent] else 0
# Alpha entropy
alpha_nz <- alpha_l[alpha_l > 1e-10]
alpha_entropy <- -sum(alpha_nz * log(alpha_nz))
# Number of colliding partners
n_colliding <- 0
if (sent > 0 && length(active_idx) > 1) {
other_sents <- sentinels[setdiff(active_idx, l)]
other_sents <- other_sents[other_sents > 0]
if (length(other_sents) > 0)
n_colliding <- sum(abs(Xcorr[sent, other_sents]) > 0.9)
}
# Per-slot mu properties
mu_l <- model$mu[l, ]
mu_at_sent <- if (sent > 0) mu_l[sent] else 0
max_abs_mu <- max(abs(mu_l))
rows[[l]] <- data.frame(
method = "v0", iter = iter, slot = l,
sentinel = sent, purity = effect_purity[l], V = model$V[l],
lbf = model$lbf[l], max_alpha = max_a, cs_size = cs_size,
alpha_entropy = alpha_entropy,
status = case_str,
current_collision = current_collision[l],
ever_diffuse = model$ever_diffuse[l],
diffuse_iter_count = if (!is.null(model$diffuse_iter_count)) model$diffuse_iter_count[l] else 0L,
prev_case = if (!is.null(model$prev_case)) model$prev_case[l] else 0L,
n_colliding = n_colliding,
max_cross_r = max_cross_r,
sent_changed = sent_changed,
prev_sentinel = prev_sent_l,
mu_at_sent = mu_at_sent,
max_abs_mu = max_abs_mu,
theta_at_sent = theta_at_sent,
theta_raw_at_sent = theta_raw_at_sent,
mask_size = sum(masked), mask_frac = round(sum(masked) / p, 3),
n_active = sum(is_active),
sigma2 = mrash_output$sigma2,
pi0 = if (!is.null(mrash_output$pi)) mrash_output$pi[1] else NA,
theta_ss = sum(theta_masked^2),
theta_raw_ss = sum(theta_raw^2),
b_conf_ss = sum(b_confident^2),
b_conf_max = max(abs(b_confident)),
sent_masked = if (sent > 0) masked[sent] else FALSE,
skip_mrash = getOption("susie.skip_mrash", FALSE),
# Rule activation: which V0 decision rules kicked in
rule_collision = current_collision[l],
rule_ever_diffuse = (model$ever_diffuse[l] > 0),
rule_case1 = (!is.na(current_case[l]) && current_case[l] == 1),
rule_case2 = (!is.na(current_case[l]) && current_case[l] == 2),
rule_case3 = (!is.na(current_case[l]) && current_case[l] == 3),
rule_exposure = (if (!is.null(model$diffuse_iter_count)) model$diffuse_iter_count[l] else 0) >= 2,
rule_second_chance = if (!is.null(model$second_chance_used)) model$second_chance_used[sent] else FALSE,
stringsAsFactors = FALSE
)
}
do.call(rbind, rows)
}
# ---- Helper functions for ML analysis ----
#' Collect diagnostic data.frames across iterations
#'
#' Call this after running susie() to rbind all per-iteration diagnostics
#' into a single ML-ready data.frame.
#'
#' @param fit SuSiE fit object (must have been run with .ash_debug = TRUE)
#' @return data.frame with nrow = L * n_ash_iters, or NULL if no diagnostics
#'
#' @examples
#' \dontrun{
#' # Full ML pipeline:
#' data(unmappable_data)
#' X <- unmappable_data$X; y <- as.vector(unmappable_data$y)
#' causal <- which(unmappable_data$beta != 0)
#'
#' fit <- susie(X, y, L=10, slot_prior=slot_prior_betabinom(),
#' unmappable_effects="ash", max_iter=50)
#'
#' df <- susieR:::collect_ash_diag(fit) # all iterations
#' df <- susieR:::label_diag_truth(df, fit, causal) # TP/FP labels
#' df <- susieR:::add_delta_features(df) # temporal features
#'
#' # Inspect FP slot across iterations:
#' subset(df, cs_label == "FP", select = c(iter, slot, sentinel,
#' c_hat, lbf, max_alpha, alpha_entropy, mask_tier, delta_c_hat))
#'
#' # 4-way comparison (BB+ash vs V0, with/without mr.ash):
#' options(susie.skip_mrash = TRUE)
#' fit_nomrash <- susie(X, y, L=10, slot_prior=slot_prior_betabinom(),
#' unmappable_effects="ash", max_iter=50)
#' options(susie.skip_mrash = FALSE)
#' df_nomrash <- susieR:::collect_ash_diag(fit_nomrash)
#'
#' # Decision tree analysis:
#' # library(rpart)
#' # last_iter <- df[df$iter == max(df$iter) & df$V > 0, ]
#' # tree <- rpart(cs_label ~ c_hat + lbf + max_alpha + alpha_entropy +
#' # purity + mask_tier + delta_c_hat, data = last_iter)
#' }
#'
#' @keywords internal
collect_ash_diag <- function(fit) {
if (!is.null(fit$.diag_env) && !is.null(fit$.diag_env$history)) {
return(do.call(rbind, fit$.diag_env$history))
}
# Fallback to old .diag_history list
if (!is.null(fit$.diag_history)) {
return(do.call(rbind, fit$.diag_history))
}
return(NULL)
}
#' Label diagnostic table with ground truth TP/FP
#'
#' For each slot at the final iteration, check if its CS (if any) contains
#' a causal variant.
#'
#' @param df Diagnostic data.frame (from collect_ash_diag or single iter)
#' @param fit SuSiE fit object
#' @param causal Integer vector of causal variant indices
#' @return df with added 'cs_label' column: "TP", "FP", or "-" (no CS)
#' @keywords internal
label_diag_truth <- function(df, fit, causal) {
cs <- fit$sets$cs
L <- max(df$slot)
# Map each CS to its owning slot
cs_slot_map <- rep(NA, L)
cs_tp_map <- rep(NA, L)
if (length(cs) > 0) {
for (i in seq_along(cs)) {
sent <- cs[[i]][which.max(fit$pip[cs[[i]]])]
owner <- which.max(fit$alpha[, sent])
cs_slot_map[owner] <- i
cs_tp_map[owner] <- any(cs[[i]] %in% causal)
}
}
df$cs_label <- sapply(df$slot, function(l) {
if (is.na(cs_slot_map[l])) "-"
else if (cs_tp_map[l]) "TP"
else "FP"
})
df
}
#' Add per-slot delta features (change from previous iteration)
#'
#' Computes delta_c_hat, delta_V, delta_lbf, delta_max_alpha,
#' delta_alpha_entropy, delta_purity for each slot across iterations.
#' Also adds lag1 features (previous iteration values) and
#' cumulative features (max c_hat ever, min alpha_entropy ever).
#' These temporal features help ML models detect trajectories
#' (e.g., a slot that was strong then weakened = collapse signal).
#'
#' @param df data.frame from collect_ash_diag + label_diag_truth
#' @return df with added delta_, lag1_, and cum_ columns
#'
#' @examples
#' \dontrun{
#' df <- susieR:::collect_ash_diag(fit)
#' df <- susieR:::label_diag_truth(df, fit, causal)
#' df <- susieR:::add_delta_features(df)
#' # Now df has delta_c_hat, lag1_c_hat, cum_max_c_hat, etc.
#' # Use for decision tree: rpart::rpart(cs_label ~ ., data = df_last_iter)
#' }
#'
#' @keywords internal
add_delta_features <- function(df) {
iters <- sort(unique(df$iter))
slots <- sort(unique(df$slot))
# Features to compute deltas/lags for
feat_cols <- c("c_hat", "V", "lbf", "max_alpha", "alpha_entropy", "purity",
"mask_size", "theta_ss")
# Initialize new columns
for (f in feat_cols) {
df[[paste0("delta_", f)]] <- NA_real_
df[[paste0("lag1_", f)]] <- NA_real_
}
df$cum_max_c_hat <- NA_real_
df$cum_min_entropy <- NA_real_
df$cum_max_lbf <- NA_real_
for (s in slots) {
idx <- which(df$slot == s)
if (length(idx) < 2) next
slot_df <- df[idx, ]
for (f in feat_cols) {
vals <- slot_df[[f]]
if (all(is.na(vals))) next
# Delta: current - previous
delta <- c(NA, diff(vals))
df[[paste0("delta_", f)]][idx] <- delta
# Lag1: previous value
lag1 <- c(NA, vals[-length(vals)])
df[[paste0("lag1_", f)]][idx] <- lag1
}
# Cumulative features
if ("c_hat" %in% names(slot_df)) {
df$cum_max_c_hat[idx] <- cummax(ifelse(is.na(slot_df$c_hat), 0, slot_df$c_hat))
}
if ("alpha_entropy" %in% names(slot_df)) {
df$cum_min_entropy[idx] <- cummin(ifelse(is.na(slot_df$alpha_entropy), Inf, slot_df$alpha_entropy))
}
if ("lbf" %in% names(slot_df)) {
df$cum_max_lbf[idx] <- cummax(ifelse(is.na(slot_df$lbf), 0, slot_df$lbf))
}
}
df
}
#' Compare two diagnostic runs side by side
#'
#' Takes two data.frames (from diagnose_bb_ash_iter or
#' diagnose_ash_filter_archived_iter) at the same iteration
#' and prints a side-by-side comparison.
#'
#' @param df1 First diagnostic data.frame
#' @param df2 Second diagnostic data.frame
#' @param label1 Label for first run (e.g., "BB+ash")
#' @param label2 Label for second run (e.g., "V0")
#' @keywords internal
compare_ash_methods <- function(df1, df2, label1 = "Method1", label2 = "Method2") {
cat(sprintf("\n===== %s vs %s (iter %d) =====\n", label1, label2,
df1$iter[1]))
cat(sprintf(" %-20s %12s %12s\n", "Feature", label1, label2))
cat(sprintf(" %s\n", strrep("-", 46)))
cat(sprintf(" %-20s %12d %12d\n", "mask_size", df1$mask_size[1], df2$mask_size[1]))
cat(sprintf(" %-20s %12d %12d\n", "n_active", df1$n_active[1], df2$n_active[1]))
cat(sprintf(" %-20s %12.4f %12.4f\n", "sigma2", df1$sigma2[1], df2$sigma2[1]))
cat(sprintf(" %-20s %12.2e %12.2e\n", "theta_ss", df1$theta_ss[1], df2$theta_ss[1]))
cat(sprintf(" %-20s %12.2e %12.2e\n", "b_conf_ss", df1$b_conf_ss[1], df2$b_conf_ss[1]))
# Per-slot comparison for active slots
active1 <- df1[df1$V > 0 | df1$status != "null", ]
active2 <- df2[df2$V > 0 | df2$status != "inactive", ]
n_show <- max(nrow(active1), nrow(active2))
cat(sprintf("\n Per-slot:\n"))
cat(sprintf(" %2s | %-5s %5s %6s %5s | %-5s %5s %6s %5s\n",
"L", "Sent1", "Pur1", "V1", "Sta1", "Sent2", "Pur2", "V2", "Sta2"))
cat(sprintf(" %s\n", strrep("-", 60)))
for (i in seq_len(n_show)) {
r1 <- if (i <= nrow(active1)) active1[i, ] else NULL
r2 <- if (i <= nrow(active2)) active2[i, ] else NULL
l <- if (!is.null(r1)) r1$slot else r2$slot
cat(sprintf(" %2d |", l))
if (!is.null(r1))
cat(sprintf(" %5d %5.3f %6.4f %5s |", r1$sentinel, r1$purity, r1$V,
substr(r1$status, 1, 5)))
else
cat(sprintf(" %5s %5s %6s %5s |", "-", "-", "-", "-"))
if (!is.null(r2))
cat(sprintf(" %5d %5.3f %6.4f %5s", r2$sentinel, r2$purity, r2$V,
substr(r2$status, 1, 5)))
else
cat(sprintf(" %5s %5s %6s %5s", "-", "-", "-", "-"))
cat("\n")
}
cat(sprintf("==========================================\n"))
}
#' Extract ML feature table from a completed BB+ash fit
#'
#' Computes per-slot features from the converged model. Call with
#' susieR:::extract_bb_ash_features(fit, X_or_Xcorr, causal).
#'
#' @param fit Completed susie fit (with slot_prior + ash)
#' @param X Design matrix (used to compute Xcorr if needed)
#' @param causal Integer vector of true causal indices (for labeling)
#' @return data.frame with one row per slot, all features + TP/FP label
#' @keywords internal
extract_bb_ash_features <- function(fit, X, causal = NULL) {
L <- nrow(fit$alpha)
p <- ncol(fit$alpha)
Xcorr <- cor(X)
cs_coverage <- 0.9
c_hat <- if (!is.null(fit$c_hat)) fit$c_hat else
if (!is.null(fit$slot_weights)) fit$slot_weights else rep(1, L)
# Map CS to slots
cs <- fit$sets$cs
cs_owner <- rep(NA, L)
cs_is_tp <- rep(NA, L)
if (length(cs) > 0) {
for (i in seq_along(cs)) {
sent <- cs[[i]][which.max(fit$pip[cs[[i]]])]
owner <- which.max(fit$alpha[, sent])
cs_owner[owner] <- i
if (!is.null(causal))
cs_is_tp[owner] <- any(cs[[i]] %in% causal)
}
}
rows <- list()
for (l in seq_len(L)) {
alpha_l <- fit$alpha[l, ]
sent <- which.max(alpha_l)
max_a <- max(alpha_l)
# Purity
cs_order <- order(alpha_l, decreasing = TRUE)
cs_size <- min(which(cumsum(alpha_l[cs_order]) >= cs_coverage))
if (cs_size > 1 && fit$V[l] > 0) {
cs_idx <- cs_order[1:cs_size]
pur <- min(abs(Xcorr[cs_idx, cs_idx]))
} else if (cs_size == 1) {
pur <- 1.0
} else {
pur <- 0.0
}
# Alpha entropy
alpha_nz <- alpha_l[alpha_l > 1e-10]
alpha_entropy <- -sum(alpha_nz * log(alpha_nz))
# Max cross-sentinel |r|
max_cross_r <- 0
n_colliding <- 0
active <- which(fit$V > 0)
if (sent > 0 && length(active) > 1) {
other_sents <- sapply(setdiff(active, l), function(ll) which.max(fit$alpha[ll, ]))
other_sents <- other_sents[other_sents > 0]
if (length(other_sents) > 0) {
cross_r <- abs(Xcorr[sent, other_sents])
max_cross_r <- max(cross_r)
n_colliding <- sum(cross_r > 0.9)
}
}
# Theta at sentinel
theta_at_sent <- if (!is.null(fit$theta) && sent > 0) fit$theta[sent] else 0
# CS label
cs_label <- if (is.na(cs_owner[l])) "-"
else if (!is.null(causal) && cs_is_tp[l]) "TP"
else if (!is.null(causal)) "FP"
else paste0("CS", cs_owner[l])
rows[[l]] <- data.frame(
slot = l, sentinel = sent, purity = pur, V = fit$V[l],
c_hat = c_hat[l],
lbf = if (!is.null(fit$lbf)) fit$lbf[l] else NA,
max_alpha = max_a, cs_size = cs_size,
alpha_entropy = alpha_entropy,
max_cross_r = max_cross_r, n_colliding = n_colliding,
theta_at_sent = theta_at_sent,
n_active = length(active),
cs_label = cs_label,
stringsAsFactors = FALSE
)
}
do.call(rbind, rows)
}
================================================
FILE: R/example_dataset.R
================================================
#' @name N2finemapping
#'
#' @title Simulated Fine-mapping Data with Two Effect Variables
#'
#' @docType data
#'
#' @description This data set contains a genotype matrix for 574
#' individuals and 1,002 variables. The variables are genotypes after
#' centering and scaling, and therefore retain the correlation
#' structure of the original genotype data. Two of the variables have
#' non-zero effects on the multivariate response. The response data
#' are generated under a multivariate linear regression model. See
#' Wang \emph{et al} (2020) for details.
#'
#' @format \code{N2finemapping} is a list with the following elements:
#'
#' \describe{
#'
#' \item{X}{Centered and scaled genotype data.}
#'
#' \item{chrom}{Chromomsome of the original data, in hg38 coordinates.}
#'
#' \item{pos}{Chromomosomal position of the original data, in hg38
#' coordinates. The information can be used to compare impact of using
#' other genotype references of the same variables in \code{susie_rss}
#' application.}
#'
#' \item{true_coef}{Simulated effect sizes.}
#'
#' \item{residual_variance}{Simulated residual covariance matrix.}
#'
#' \item{Y}{Simulated multivariate response.}
#'
#' \item{allele_freq}{Allele frequencies based on the original
#' genotype data.}
#'
#' \item{V}{Suggested prior covariance matrix for effect sizes of
#' the two non-zero effect variables.}
#' }
#'
#' @keywords data
#'
#' @references
#' G. Wang, A. Sarkar, P. Carbonetto and M. Stephens (2020). A simple
#' new approach to variable selection in regression, with application
#' to genetic fine-mapping. \emph{Journal of the Royal Statistical
#' Society, Series B} \doi{10.1101/501114}.
#'
#' @examples
#' data(N2finemapping)
NULL
#' @name N3finemapping
#'
#' @title Simulated Fine-mapping Data with Three Effect Variables.
#'
#' @docType data
#'
#' @description The data-set contains a matrix of 574
#' individuals and 1,001 variables. These variables are real-world
#' genotypes centered and scaled, and therefore retains the
#' correlation structure of variables in the original genotype data. 3
#' out of the variables have non-zero effects. The response data is
#' generated under a multivariate linear regression model. See Wang
#' \emph{et al} (2020) for more details.
#'
#' @format \code{N3finemapping} is a list with the following elements:
#'
#' \describe{
#'
#' \item{X}{N by P variable matrix of centered and scaled genotype
#' data.}
#'
#' \item{chrom}{Chromomsome of the original data, in hg38 coordinate.}
#'
#' \item{pos}{Chromomosomal positoin of the original data, in hg38
#' coordinate. The information can be used to compare impact of using
#' other genotype references of the same variables in susie_rss
#' application.}
#'
#' \item{true_coef}{The simulated effect sizes.}
#'
#' \item{residual_variance}{The simulated residual covariance matrix.}
#'
#' \item{Y}{The simulated response variables.}
#'
#' \item{allele_freq}{Allele frequency of the original genotype data.}
#'
#' \item{V}{Prior covariance matrix for effect size of the three
#' non-zero effect variables.} }
#'
#' @keywords data
#'
#' @references
#' G. Wang, A. Sarkar, P. Carbonetto and M. Stephens (2020). A simple
#' new approach to variable selection in regression, with application
#' to genetic fine-mapping. \emph{Journal of the Royal Statistical
#' Society, Series B} \doi{10.1101/501114}.
#'
#' @examples
#' data(N3finemapping)
NULL
#' @name FinemappingConvergence
#'
#' @title Simulated Fine-mapping Data with Convergence Problem.
#'
#' @description Data simulated using real genotypes from 50,000
#' individuals and 200 SNPs. Two of the SNPs have non-zero effects
#' on the multivariate response. The response data are generated under
#' a linear regression model. The simulated response and the columns
#' of the genotype matrix are centered.
#'
#' @format \code{FinemappingConvergence} is a list with the following
#' elements:
#'
#' \describe{
#'
#' \item{XtX}{Summary statistics computed using the centered and
#' scaled genotype matrix.}
#'
#' \item{Xty}{Summary statistics computed using the centered and
#' scaled genotype data, and the centered simulated response.}
#'
#' \item{yty}{yty is computed using the centered simulated response.}
#'
#' \item{n}{The sample size (50,000).}
#'
#' \item{true_coef}{The coefficients used to simulate the responses.}
#'
#' \item{z}{z-scores from a simple (single-SNP) linear regression.}}
#'
#' @docType data
#'
#' @keywords data
#'
#' @seealso A similar data set with more SNPs is used in the
#' \dQuote{Refine SuSiE model} vignette.
#'
#' @examples
#' data(FinemappingConvergence)
NULL
#' @name SummaryConsistency
#'
#' @title Simulated Fine-mapping Data with LD matrix From Reference Panel.
#'
#' @description Data simulated using real genotypes from 10,000
#' individuals and 200 SNPs. One SNP have non-zero effect
#' on the multivariate response. The response data are generated under
#' a linear regression model. There is also one SNP with flipped allele
#' between summary statistics and the reference panel.
#'
#' @format \code{SummaryConsistency} is a list with the following
#' elements:
#'
#' \describe{
#'
#' \item{z}{z-scores computed by fitting univariate simple regression
#' variable-by-variable.}
#'
#' \item{ldref}{LD matrix estimated from the reference panel.}
#'
#' \item{flip_id}{The index of the SNP with the flipped allele.}
#'
#' \item{signal_id}{The index of the SNP with the non-zero effect.}}
#'
#' @seealso A similar data set with more samples is used in the
#' \dQuote{Diagnostic for fine-mapping with summary statistics}
#' vignette.
#'
#' @docType data
#'
#' @keywords data
#'
#' @examples
#' data(SummaryConsistency)
NULL
#' @name data_small
#'
#' @title Simulated Small-sample eQTL Data.
#'
#' @description A simulated eQTL data set with 47 individuals and 7,430
#' variables. The response is a simulated gene expression phenotype and
#' the variables are genotypes. This data set illustrates the small
#' sample-size setting considered in Denault \emph{et al} (2025).
#'
#' @format \code{data_small} is a list with the following elements:
#'
#' \describe{
#'
#' \item{y}{Simulated gene expression response.}
#'
#' \item{X}{Genotype matrix.}}
#'
#' @docType data
#'
#' @keywords data
#'
#' @seealso The \dQuote{Small data example} vignette.
#'
#' @references
#' W. R. P. Denault \emph{et al} (2025). Accounting for uncertainty in
#' residual variances improves fine-mapping in small sample studies.
#' \emph{bioRxiv} \doi{10.1101/2025.05.16.654543}.
#'
#' @examples
#' data(data_small)
NULL
#' @name unmappable_data
#'
#' @title Simulated Fine-mapping Data with Sparse, Oligogenic and Polygenic Effects.
#'
#' @description A simulated data set with 1,000 individuals and 5,000
#' variants, combining 3 sparse, 5 oligogenic and 15 polygenic
#' non-zero effects. The response is generated under a linear
#' regression model. This data set illustrates fine-mapping with
#' SuSiE-ash and SuSiE-inf.
#'
#' @format \code{unmappable_data} is a list with the following elements:
#'
#' \describe{
#'
#' \item{X}{Centered and scaled genotype matrix.}
#'
#' \item{y}{Simulated response.}
#'
#' \item{beta}{Simulated effect sizes.}
#'
#' \item{h2g}{Total proportion of variance in the response explained
#' by the simulated effects.}}
#'
#' @docType data
#'
#' @keywords data
#'
#' @seealso The \dQuote{Fine-mapping with SuSiE-ash and SuSiE-inf}
#' vignette.
#'
#' @examples
#' data(unmappable_data)
NULL
================================================
FILE: R/generic_methods.R
================================================
# =============================================================================
# DATA INITIALIZATION & CONFIGURATION
#
# S3 generics dispatched on data objects setup, configuration, and preprocessing.
# These prepare data objects for model fitting and handle data-specific
# configurations like unmappable effects.
#
# Functions: configure_data, get_var_y
# =============================================================================
# Configure data object for specified method
#' @keywords internal
configure_data <- function(data, params) {
UseMethod("configure_data")
}
#' @keywords internal
configure_data.default <- function(data, params) {
return(data)
}
# Get variance of y
#' @keywords internal
get_var_y <- function(data, ...) {
UseMethod("get_var_y")
}
#' @keywords internal
get_var_y.default <- function(data, ...) {
stop("get_var_y: no method for class '", class(data)[1], "'")
}
# =============================================================================
# MODEL INITIALIZATION & SETUP
#
# Functions for initializing model objects and setting up initial states.
# These create model matrices, initialize fitted values, and prepare
# the SuSiE model for iterative fitting.
#
# Functions: initialize_susie_model, initialize_fitted, validate_prior, track_ibss_fit
# =============================================================================
# Initialize susie model object
#' @keywords internal
initialize_susie_model <- function(data, params, ...) {
UseMethod("initialize_susie_model")
}
#' @keywords internal
initialize_susie_model.default <- function(data, params, ...) {
stop("initialize_susie_model: no method for class '", class(data)[1], "'")
}
# Initialize fitted values
#' @keywords internal
initialize_fitted <- function(data, mat_init) {
UseMethod("initialize_fitted")
}
#' @keywords internal
initialize_fitted.default <- function(data, mat_init, ...) {
stop("initialize_fitted: no method for class '", class(data)[1], "'")
}
# Validate prior variance
#' @keywords internal
validate_prior <- function(data, params, model, ...) {
UseMethod("validate_prior")
}
#' @keywords internal
validate_prior.default <- function(data, params, model, ...) {
invisible(TRUE)
}
# Track core parameters of a susie fit across iterations
#' @keywords internal
track_ibss_fit <- function(data, params, model, tracking, iter, ...) {
UseMethod("track_ibss_fit")
}
#' @keywords internal
track_ibss_fit.default <- function(data, params, model, tracking, iter, elbo, ...) {
# Store iteration snapshot if tracking is enabled.
# tracking is a purely numeric list: tracking[[1]], [[2]], etc.
if (isTRUE(params$track_fit)) {
tracking[[iter]] <- list(
alpha = model$alpha,
niter = iter,
V = model$V,
sigma2 = model$sigma2
)
# Track slot activity per iteration when active
if (!is.null(model$slot_weights)) {
tracking[[iter]]$slot_weights <- model$slot_weights
tracking[[iter]]$lbf <- model$lbf
}
}
return(tracking)
}
# =============================================================================
# SINGLE EFFECT REGRESSION & ELBO
#
# Core functions for single effect regression computation and ELBO calculation.
# These handle the mathematical core of SuSiE including residual computation, SER
# statistics, posterior moments, and log-likelihood calculations for the ELBO.
#
# Functions: compute_residuals, compute_ser_statistics, SER_posterior_e_loglik,
# calculate_posterior_moments, compute_kl, get_ER2, Eloglik, loglik, neg_loglik
# =============================================================================
#' Get the slot weight for effect l
#'
#' Returns the weight by which effect l's contribution to the fitted
#' values is scaled. When \code{model$slot_weights} is NULL (the default),
#' all effects have weight 1 and standard SuSiE behavior is recovered.
#'
#' Slot weights enable a natural mechanism for adaptively estimating the
#' number of effects: each slot l can have a weight in [0,1] reflecting
#' the posterior probability that the slot is active. With a suitable
#' prior on the number of active effects, this generalizes SuSiE's fixed
#' L to a data-driven estimate.
#'
#' @param model SuSiE model object.
#' @param l Effect index.
#'
#' @return Scalar weight (default 1).
#'
#' @keywords internal
get_slot_weight <- function(model, l) {
if (is.null(model$slot_weights)) 1 else model$slot_weights[l]
}
# Compute residuals for single effect regression
#' @keywords internal
compute_residuals <- function(data, params, model, l, ...) {
UseMethod("compute_residuals")
}
#' @keywords internal
compute_residuals.default <- function(data, params, model, l, ...) {
stop("compute_residuals: no method for class '", class(data)[1], "'")
}
# Compute SER statistics (betahat, shat2)
#' @keywords internal
compute_ser_statistics <- function(data, params, model, l, ...) {
UseMethod("compute_ser_statistics")
}
#' @keywords internal
compute_ser_statistics.default <- function(data, params, model, l, ...) {
stop("compute_ser_statistics: no method for class '", class(data)[1], "'")
}
# Single effect regression posterior expected log-likelihood
#' @keywords internal
SER_posterior_e_loglik <- function(data, params, model, l) {
UseMethod("SER_posterior_e_loglik")
}
#' @keywords internal
SER_posterior_e_loglik.default <- function(data, params, model, l) {
stop("SER_posterior_e_loglik: no method for class '", class(data)[1], "'")
}
# Calculate posterior moments for single effect regression
#' @keywords internal
calculate_posterior_moments <- function(data, params, model, V, l, ...) {
UseMethod("calculate_posterior_moments")
}
#' @keywords internal
calculate_posterior_moments.default <- function(data, params, model, V, l = NULL, ...) {
stop("calculate_posterior_moments: no method for class '", class(data)[1], "'")
}
# Calculate KL divergence
#' @keywords internal
compute_kl <- function(data, params, model, l) {
UseMethod("compute_kl")
}
#' @keywords internal
compute_kl.default <- function(data, params, model, l) {
model$KL[l] <- -model$lbf[l] + SER_posterior_e_loglik(data, params, model, l)
return(model)
}
# Expected squared residuals
#' @keywords internal
get_ER2 <- function(data, model) {
UseMethod("get_ER2")
}
#' @keywords internal
get_ER2.default <- function(data, model) {
stop("get_ER2: no method for class '", class(data)[1], "'")
}
# Expected log-likelihood
#' @keywords internal
Eloglik <- function(data, model) {
UseMethod("Eloglik")
}
#' @keywords internal
Eloglik.default <- function(data, model) {
stop("Eloglik: no method for class '", class(data)[1], "'")
}
# Variational E_q[log p(y|b, sigma^2)] under SuSiE-NIG. Non-S3 helper called
# from get_objective so we don't break the Eloglik(data, model) signature
# that downstream packages override (mvsusieR, mfsusieR).
# Decomposition: E_q[||y-Xb||^2 | sigma^2] = A + sigma^2 B,
# B = sum_l sum_j alpha^(l)_j * r0^(l)_j * tau_j,
# A = get_ER2 - E[sigma^2] * B.
# Eloglik = -n/2 log(2 pi) - n/2 (log b - digamma(a)) - 0.5 (A * a/b + B).
#' @keywords internal
nig_eloglik <- function(data, params, model) {
n <- data$n
ERSS_marg <- get_ER2(data, model)
a_post <- (params$alpha0 + n) / 2
b_post <- (params$beta0 + ERSS_marg) / 2
tau_v <- if (!is.null(model$shat2_inflation)) model$shat2_inflation else 1
pw <- model$predictor_weights
B <- 0
for (l in seq_len(nrow(model$alpha))) {
r0_l <- model$V[l] / (model$V[l] + tau_v / pw)
B <- B + sum(model$alpha[l, ] * r0_l * tau_v)
}
A <- ERSS_marg - (b_post / (a_post - 1)) * B
-n / 2 * log(2 * pi) - n / 2 * (log(b_post) - digamma(a_post)) -
0.5 * (A * a_post / b_post + B)
}
# Log-likelihood and posterior moments for fixed mixture prior
# (estimate_prior_method = "fixed_mixture"). Evaluates BFs on a
# pre-specified variance grid with given mixture weights.
#' @keywords internal
loglik_mixture <- function(data, params, model, ser_stats, l, ...) {
UseMethod("loglik_mixture")
}
#' @keywords internal
loglik_mixture.default <- function(data, params, model, ser_stats, l, ...) {
# Shared implementation for all data types.
# compute_ser_statistics() (type-specific) has already produced betahat and shat2.
model <- loglik_mixture_common(params, model, ser_stats, l)
return(model)
}
#' @keywords internal
calculate_posterior_moments_mixture <- function(data, params, model, l, ...) {
UseMethod("calculate_posterior_moments_mixture")
}
#' @keywords internal
calculate_posterior_moments_mixture.default <- function(data, params, model, l, ...) {
# Shared implementation: mixture posterior from stored lbf_grid and ser_stats
model <- calculate_posterior_moments_mixture_common(params, model, l)
return(model)
}
# Log-likelihood for prior variance optimization
#' @keywords internal
loglik <- function(data, params, model, V, ser_stats, l = NULL, ...) {
UseMethod("loglik")
}
#' @keywords internal
loglik.default <- function(data, params, model, V, ser_stats, l = NULL, ...) {
stop("loglik: no method for class '", class(data)[1], "'")
}
# Negative log-likelihood for optimization (handles both log and linear scales)
#' @keywords internal
neg_loglik <- function(data, params, model, V_param, ser_stats, ...) {
UseMethod("neg_loglik")
}
#' @keywords internal
neg_loglik.default <- function(data, params, model, V_param, ser_stats, ...) {
stop("neg_loglik: no method for class '", class(data)[1], "'")
}
# EM update for prior variance
#' @keywords internal
em_update_prior_variance <- function(data, params, model, alpha, moments, V_init) {
UseMethod("em_update_prior_variance")
}
#' @keywords internal
em_update_prior_variance.default <- function(data, params, model, alpha, moments, V_init) {
if (!is.null(params$use_NIG) && params$use_NIG) {
nig_ss <- get_nig_sufficient_stats(data, model)
return(update_prior_variance_NIG_EM(data$n, model$predictor_weights,
model$residuals, nig_ss$yy, nig_ss$sxy,
alpha, V_init, params$alpha0, params$beta0,
nig_ss$tau))
}
# Standard EM update
sum(alpha * moments$post_mean2)
}
# =============================================================================
# MODEL UPDATES & FITTING
#
# Functions for iterative model updates and variance component estimation.
# These handle the dynamic aspects of model fitting including fitted value
# updates and variance component estimation.
#
# Functions: update_fitted_values, update_variance_components, update_derived_quantities
# =============================================================================
# Update fitted values
#' @keywords internal
update_fitted_values <- function(data, params, model, l, ...) {
UseMethod("update_fitted_values")
}
#' @keywords internal
update_fitted_values.default <- function(data, params, model, l, ...) {
stop("update_fitted_values: no method for class '", class(data)[1], "'")
}
# Update variance components
#' @keywords internal
update_variance_components <- function(data, params, model, ...) {
UseMethod("update_variance_components")
}
#' @keywords internal
update_variance_components.default <- function(data, params, model, ...) {
if (isTRUE(params$use_NIG)) {
# Posterior mean of IG((alpha0+n)/2, (beta0+ERSS)/2)
sigma2 <- (params$beta0 + get_ER2(data, model)) /
(params$alpha0 + data$n - 2)
} else {
sigma2 <- est_residual_variance(data, model)
}
return(list(sigma2 = sigma2))
}
# Update derived quantities after variance component changes
#' @keywords internal
update_derived_quantities <- function(data, params, model) {
UseMethod("update_derived_quantities")
}
#' @keywords internal
update_derived_quantities.default <- function(data, params, model) {
return(model)
}
# =============================================================================
# OUTPUT GENERATION & POST-PROCESSING
#
# Functions for generating final results and summary statistics.
# These process fitted models into interpretable outputs including
# credible sets, variable names, and fitted values.
#
# Functions: get_scale_factors, get_intercept, get_fitted, get_cs,
# get_variable_names, get_zscore, cleanup_model
# =============================================================================
# Get column scale factors
#' @keywords internal
get_scale_factors <- function(data, params, ...) {
UseMethod("get_scale_factors")
}
#' @keywords internal
get_scale_factors.default <- function(data, params, ...) {
stop("get_scale_factors: no method for class '", class(data)[1], "'")
}
# Get intercept
#' @keywords internal
get_intercept <- function(data, params, model, ...) {
UseMethod("get_intercept")
}
#' @keywords internal
get_intercept.default <- function(data, params, model, ...) {
stop("get_intercept: no method for class '", class(data)[1], "'")
}
# Get fitted values
#' @keywords internal
get_fitted <- function(data, params, model, ...) {
UseMethod("get_fitted")
}
#' @keywords internal
get_fitted.default <- function(data, params, model, ...) {
return(NULL)
}
# Get credible sets
#' @keywords internal
get_cs <- function(data, params, model, ...) {
UseMethod("get_cs")
}
#' @keywords internal
get_cs.default <- function(data, params, model, ...) {
stop("get_cs: no method for class '", class(data)[1], "'")
}
# Get variable names
#' @keywords internal
get_variable_names <- function(data, model, ...) {
UseMethod("get_variable_names")
}
#' @keywords internal
get_variable_names.default <- function(data, model, ...) {
stop("get_variable_names: no method for class '", class(data)[1], "'")
}
# Get univariate z-scores
#' @keywords internal
get_zscore <- function(data, params, model, ...) {
UseMethod("get_zscore")
}
#' @keywords internal
get_zscore.default <- function(data, params, model, ...) {
return(NULL)
}
# Clean up model object by removing temporary computational fields
#' @keywords internal
cleanup_model <- function(data, params, model, ...) {
UseMethod("cleanup_model")
}
#' Class-specific extra fields to strip in cleanup_model.default
#'
#' Default returns `character(0)`. Subclasses (e.g., mfsusieR's
#' `raw_residuals`, mvsusieR's `Y_imputed`/`llik_cache`) override
#' to add their per-class scratch fields. Result is unioned with
#' the standard temp_fields list inside `cleanup_model.default`.
#' @keywords internal
cleanup_extra_fields <- function(data) {
UseMethod("cleanup_extra_fields")
}
#' @keywords internal
cleanup_extra_fields.default <- function(data) {
character(0)
}
#' @keywords internal
cleanup_model.default <- function(data, params, model, ...) {
# Remove temporary fields common to all data types
temp_fields <- c("null_weight", "predictor_weights", "runtime",
"prev_elbo", "prev_alpha",
"residuals", "fitted_without_l", "residual_variance",
"shat2_inflation",
cleanup_extra_fields(data))
for (field in temp_fields) {
if (field %in% names(model)) {
model[[field]] <- NULL
}
}
return(model)
}
================================================
FILE: R/individual_data_methods.R
================================================
# =============================================================================
# DATA INITIALIZATION & CONFIGURATION
#
# Functions for data object setup, configuration, and preprocessing.
# These prepare data objects for model fitting and handle data-specific
# configurations like unmappable effects.
#
# Functions: configure_data, get_var_y
# =============================================================================
# Configure individual data for specified method
#' @keywords internal
configure_data.individual <- function(data, params) {
if (params$unmappable_effects == "none" || params$unmappable_effects %in% c("ash", "ash_filter_archived")) {
return(configure_data.default(data, params))
} else {
# "inf" mode still requires sufficient statistics conversion
warning_message("Individual-level data will be converted to sufficient statistics for unmappable effects methods (this step may take a while for a large data set)")
return(convert_individual_to_ss(data, params))
}
}
# Get variance of y
#' @keywords internal
#' @importFrom stats var
get_var_y.individual <- function(data, ...) {
return(var(drop(data$y)))
}
# =============================================================================
# MODEL INITIALIZATION & SETUP
#
# Functions for initializing model objects and setting up initial states.
# These create model matrices, initialize fitted values, and prepare
# the SuSiE model for iterative fitting.
#
# Functions: initialize_susie_model, initialize_fitted, validate_prior, track_ibss_fit
# =============================================================================
# Initialize SuSiE model
#' @keywords internal
initialize_susie_model.individual <- function(data, params, var_y, ...) {
# Base model
model <- initialize_matrices(data, params, var_y)
# Append predictor weights
model$predictor_weights <- attr(data$X, "d")
# Initialize NIG parameters
if (params$use_NIG) {
model$rv <- rep(1, params$L)
model$marginal_loglik <- rep(as.numeric(NA), params$L)
}
# Initialize ash (Mr.ASH) tracking fields
if (params$unmappable_effects == "ash") {
model <- init_ash_fields(model, data$n, data$p, params$L, is_individual = TRUE)
} else if (params$unmappable_effects == "ash_filter_archived") {
model <- init_ash_fields_filter_archived(model, data$n, data$p, params$L, is_individual = TRUE)
}
return(model)
}
# Initialize fitted values
#' @keywords internal
initialize_fitted.individual <- function(data, mat_init) {
return(list(Xr = compute_Xb(data$X, colSums(mat_init$alpha * mat_init$mu))))
}
# Validate prior variance
#' @keywords internal
validate_prior.individual <- function(data, params, model, ...) {
return(validate_prior.default(data, params, model, ...))
}
# Track core parameters across iterations
#' @keywords internal
track_ibss_fit.individual <- function(data, params, model, tracking, iter, elbo, ...) {
if (params$unmappable_effects %in% c("ash", "ash_filter_archived")) {
tracking <- track_ibss_fit.default(data, params, model, tracking, iter, elbo, ...)
if (isTRUE(params$track_fit)) {
tracking[[iter]]$tau2 <- model$tau2
}
return(tracking)
}
return(track_ibss_fit.default(data, params, model, tracking, iter, elbo, ...))
}
# =============================================================================
# SINGLE EFFECT REGRESSION & ELBO
#
# Core functions for single effect regression computation and ELBO calculation.
# These handle the mathematical core of SuSiE including residual computation, SER
# statistics, posterior moments, and log-likelihood calculations for the ELBO.
#
# Functions: compute_residuals, compute_ser_statistics, SER_posterior_e_loglik,
# calculate_posterior_moments, compute_kl, get_ER2, Eloglik, loglik, neg_loglik
# =============================================================================
# Compute residuals for single effect regression
#' @keywords internal
compute_residuals.individual <- function(data, params, model, l, ...) {
# Remove lth effect from fitted values (scaled by slot weight)
sw_l <- get_slot_weight(model, l)
Xr_without_l <- model$Xr - sw_l * compute_Xb(data$X, model$alpha[l, ] * model$mu[l, ])
# Compute residuals
if (params$unmappable_effects %in% c("ash", "ash_filter_archived")) {
# Subtract both sparse effects (without l) and ash theta
R <- data$y - Xr_without_l - model$X_theta
} else {
R <- data$y - Xr_without_l
}
XtR <- compute_Xty(data$X, R)
# Store unified residuals in model
model$residuals <- XtR
model$fitted_without_l <- Xr_without_l
model$raw_residuals <- R
model$residual_variance <- model$sigma2 # Standard residual variance
return(model)
}
# Compute SER statistics
#' @keywords internal
compute_ser_statistics.individual <- function(data, params, model, l, ...) {
betahat <- (1 / model$predictor_weights) * model$residuals
shat2 <- model$residual_variance / model$predictor_weights
# Optimization parameters
optim_init <- log(max(c(betahat^2 - shat2, 1), na.rm = TRUE))
optim_bounds <- c(-30, 15)
optim_scale <- "log"
return(list(
betahat = betahat,
shat2 = shat2,
optim_init = optim_init,
optim_bounds = optim_bounds,
optim_scale = optim_scale
))
}
# Posterior expected log-likelihood for single effect regression
#' @keywords internal
SER_posterior_e_loglik.individual <- function(data, params, model, l) {
Eb <- model$alpha[l, ] * model$mu[l, ]
Eb2 <- model$alpha[l, ] * model$mu2[l, ]
return(-0.5 * data$n * log(2 * pi * model$sigma2) -
0.5 / model$sigma2 * (sum(model$raw_residuals * model$raw_residuals)
- 2 * sum(model$raw_residuals * compute_Xb(data$X, Eb)) +
sum(model$predictor_weights * Eb2)))
}
# Calculate posterior moments for single effect regression
#' @keywords internal
calculate_posterior_moments.individual <- function(data, params, model, V, l, ...) {
if (params$use_NIG) {
if (V <= 0) {
# Zero variance case
post_mean <- rep(0, data$p)
post_mean2 <- rep(0, data$p)
model$rv[l] <- 1
} else {
# Compute posterior moments for NIG prior
nig_ss <- get_nig_sufficient_stats(data, model)
moments <- compute_posterior_moments_NIG(data$n, model$predictor_weights,
model$residuals, nig_ss$yy, nig_ss$sxy,
V, params$alpha0, params$beta0, nig_ss$tau)
post_mean <- moments$post_mean
post_mean2 <- moments$post_mean2
# Compute weighted average of residual variance modes using PIPs
model$rv[l] <- sum(model$alpha[l, ] * moments$rv)
}
} else {
# Standard Gaussian posterior calculations
post_var <- (1 / V + model$predictor_weights / model$residual_variance)^(-1)
post_mean <- (1 / model$residual_variance) * post_var * model$residuals
post_mean2 <- post_var + post_mean^2
}
# Store posterior moments in model
model$mu[l, ] <- post_mean
model$mu2[l, ] <- post_mean2
return(model)
}
# Calculate KL divergence
#' @keywords internal
compute_kl.individual <- function(data, params, model, l) {
if (params$use_NIG) {
# NIG KL only valid for L=1 (gIBSS for L>1 has no coherent ELBO; supp. line 503)
if (params$L == 1) {
ki <- nig_kl_inputs(data, params, model, l)
kl <- compute_kl_NIG(model$alpha[l, ], model$mu[l, ], model$mu2[l, ],
model$pi, model$V[l],
a0 = params$alpha0 / 2, b0 = params$beta0 / 2,
a_post = ki$a_post, b_post = ki$b_post,
s_j_sq = ki$s_j_sq)
} else {
kl <- 0
}
} else {
# Standard Gaussian KL divergence
loglik_term <- model$lbf[l] + sum(dnorm(model$raw_residuals, 0, sqrt(model$sigma2), log = TRUE))
kl <- -loglik_term + SER_posterior_e_loglik(data, params, model, l)
}
# Store in model and return
model$KL[l] <- kl
return(model)
}
# Expected squared residuals
#' @keywords internal
get_ER2.individual <- function(data, model) {
Xr_L <- compute_MXt(model$alpha * model$mu, data$X)
postb2 <- model$alpha * model$mu2
# For ash, subtract theta contribution from residuals
y_adj <- if (!is.null(model$X_theta)) data$y - model$X_theta else data$y
# Slot-weight correction: E[||y - sum_l c_l X beta^(l)||^2] under Bern(chat_l)
# = ||y - Xr||^2 + sum_l chat_l * E[||X b^(l)||^2] - chat_l^2 * ||X bbar_l||^2
# When slot_weights is NULL (all weights = 1), reduces to the standard formula.
sw <- if (!is.null(model$slot_weights)) model$slot_weights else rep(1, nrow(model$alpha))
per_slot_Eb2 <- as.vector(postb2 %*% model$predictor_weights) # L-vector
per_slot_Xb2 <- rowSums(Xr_L^2) # L-vector
return(sum((y_adj - model$Xr)^2) + sum(sw * per_slot_Eb2 - sw^2 * per_slot_Xb2))
}
# Expected log-likelihood
#' @keywords internal
Eloglik.individual <- function(data, model) {
return(-data$n / 2 * log(2 * pi * model$sigma2) -
1 / (2 * model$sigma2) * get_ER2(data, model))
}
#' @importFrom Matrix colSums
#' @importFrom stats dnorm
#' @importFrom stats cor
#' @keywords internal
loglik.individual <- function(data, params, model, V, ser_stats, l = NULL, ...) {
# Check if using NIG prior
if (params$use_NIG) {
# Compute log Bayes factors for NIG prior
nig_ss <- get_nig_sufficient_stats(data, model)
lbf <- compute_lbf_NIG(data$n, model$predictor_weights,
model$residuals, nig_ss$yy, nig_ss$sxy,
V, params$alpha0, params$beta0, nig_ss$tau)
} else {
# Standard Gaussian prior log Bayes factors
lbf <- dnorm(ser_stats$betahat, 0, sqrt(V + ser_stats$shat2), log = TRUE) -
dnorm(ser_stats$betahat, 0, sqrt(ser_stats$shat2), log = TRUE)
}
# Stabilize logged Bayes Factor
stable_res <- lbf_stabilization(lbf, model$pi, ser_stats$shat2)
# Compute posterior weights
weights_res <- compute_posterior_weights(stable_res$lpo)
# Store in model if l is provided, otherwise return lbf_model for prior variance optimization
if (!is.null(l)) {
# Store results in model
model$alpha[l, ] <- weights_res$alpha
model$lbf[l] <- weights_res$lbf_model
model$lbf_variable[l, ] <- stable_res$lbf
# Compute and store marginal log-likelihood for NIG prior
if (params$use_NIG) {
model$marginal_loglik[l] <- compute_marginal_loglik(weights_res$lbf_model, data$n,
nig_ss$yy, params$alpha0, params$beta0,
TRUE)
}
return(model)
} else {
return(weights_res$lbf_model)
}
}
#' @keywords internal
neg_loglik.individual <- function(data, params, model, V_param, ser_stats, ...) {
# Convert parameter to V based on optimization scale (always log for individual)
V <- exp(V_param)
lbf_model <- loglik.individual(data, params, model, V, ser_stats)
return(-lbf_model)
}
# =============================================================================
# MODEL UPDATES & FITTING
#
# Functions for iterative model updates and variance component estimation.
# These handle the dynamic aspects of model fitting including fitted value
# updates and variance component estimation.
#
# Functions: update_fitted_values, update_variance_components, update_derived_quantities
# =============================================================================
# Update fitted values
#' @keywords internal
update_fitted_values.individual <- function(data, params, model, l, ...) {
sw_l <- get_slot_weight(model, l)
model$Xr <- model$fitted_without_l + sw_l * compute_Xb(data$X, model$alpha[l, ] * model$mu[l, ])
return(model)
}
# Update variance components for individual data
#' @keywords internal
update_variance_components.individual <- function(data, params, model, ...) {
if (params$unmappable_effects == "ash_filter_archived") {
# Original filter-based masking (archived for internal diagnostics)
return(update_ash_variance_components_filter_archived(data, model, params))
} else if (params$unmappable_effects == "ash") {
# c_hat + 3 LD-interference heuristics
return(update_ash_variance_components(data, model, params))
}
return(update_variance_components.default(data, params, model, ...))
}
# Update derived quantities for individual data
#' @keywords internal
update_derived_quantities.individual <- function(data, params, model) {
if (params$unmappable_effects %in% c("ash", "ash_filter_archived")) {
# For ash, recompute full Xr including sparse effects only
# (theta is tracked separately via X_theta)
# Use slot_weights (c_hat) if available to maintain consistency
# with the c_hat-weighted Xr from ibss_fit + update_c_hat.
sw <- if (!is.null(model$slot_weights)) model$slot_weights else rep(1, nrow(model$alpha))
b <- colSums(sw * model$alpha * model$mu)
model$Xr <- as.vector(compute_Xb(data$X, b))
return(model)
}
return(update_derived_quantities.default(data, params, model))
}
# =============================================================================
# OUTPUT GENERATION & POST-PROCESSING
#
# Functions for generating final results and summary statistics.
# These process fitted models into interpretable outputs including
# credible sets, variable names, and fitted values.
#
# Functions: get_scale_factors, get_intercept, get_fitted, get_cs,
# get_variable_names, get_zscore
# =============================================================================
# Get column scale factors
#' @keywords internal
get_scale_factors.individual <- function(data, params) {
return(attr(data$X, "scaled:scale"))
}
# Get intercept
#' @keywords internal
get_intercept.individual <- function(data, params, model, ...) {
if (params$intercept) {
return(data$mean_y - sum(attr(data$X, "scaled:center") *
(colSums(model$alpha * model$mu) / attr(data$X, "scaled:scale"))))
} else {
return(0)
}
}
# Get Fitted Values
#' @keywords internal
get_fitted.individual <- function(data, params, model, ...) {
if (params$intercept) {
fitted <- model$Xr + data$mean_y
} else {
fitted <- model$Xr
}
# Include ash theta contribution
if (!is.null(model$X_theta)) {
fitted <- fitted + model$X_theta
}
fitted <- drop(fitted)
names(fitted) <- `if`(is.null(names(data$y)), rownames(data$X), names(data$y))
return(fitted)
}
# Get Credible Sets
#' @keywords internal
get_cs.individual <- function(data, params, model, ...) {
if (is.null(params$coverage) || is.null(params$min_abs_corr)) {
return(NULL)
}
return(susie_get_cs(model,
X = data$X,
coverage = params$coverage,
min_abs_corr = params$min_abs_corr,
n_purity = params$n_purity))
}
# Get Variable Names
#' @keywords internal
get_variable_names.individual <- function(data, model, ...) {
return(assign_names(data, model, colnames(data$X)))
}
# Get univariate z-score
#' @keywords internal
get_zscore.individual <- function(data, params, model, ...) {
if (isFALSE(params$compute_univariate_zscore)) {
return(get_zscore.default(data, params, model))
}
X <- data$X
if (!is.matrix(X)) {
warning_message(
"Calculation of univariate regression z-scores is not ",
"implemented specifically for sparse or trend filtering ",
"matrices, so this step may be slow if the matrix is large; ",
"to skip this step set compute_univariate_zscore = FALSE"
)
}
if (!is.null(model$null_weight) && model$null_weight != 0) {
X <- X[, 1:(ncol(X) - 1)]
}
return(calc_z(X, data$y, center = params$intercept, scale = params$standardize))
}
# Clean up model object for individual data
#' @keywords internal
cleanup_model.individual <- function(data, params, model, ...) {
# Remove common fields
model <- cleanup_model.default(data, params, model, ...)
# Remove individual-specific temporary fields
model$raw_residuals <- NULL
# Remove NIG specific temporary fields
if (params$use_NIG) {
model$marginal_loglik <- NULL
}
# Remove ash-specific runtime fields
if (!is.null(params$unmappable_effects) && params$unmappable_effects == "ash") {
model <- cleanup_ash_fields(model)
} else if (!is.null(params$unmappable_effects) && params$unmappable_effects == "ash_filter_archived") {
model <- cleanup_ash_fields_filter_archived(model)
}
return(model)
}
================================================
FILE: R/iterative_bayesian_stepwise_selection.R
================================================
# =============================================================================
# IBSS INITIALIZATION
#
# Initializes the SuSiE model object for Iterative Bayesian Stepwise Selection.
# Sets up model matrices, handles model_init, and prepares for IBSS.
# =============================================================================
#' Initialize IBSS model
#'
#' Creates and initializes the model object for the IBSS algorithm.
#'
#' @param data Data object (individual, ss, or rss_lambda)
#' @param params Validated params object
#'
#' @return Initialized model object ready for the IBSS iteration loop.
#' @importFrom utils modifyList
#' @export
#' @keywords internal
ibss_initialize <- function(data, params) {
UseMethod("ibss_initialize")
}
#' @rdname ibss_initialize
#' @export
#' @keywords internal
ibss_initialize.default <- function(data, params) {
# Set var(y)
var_y <- get_var_y(data)
# Adjust number of single effects if needed
if (data$p < params$L) {
params$L <- data$p
}
# Check & validate residual variance
if (is.null(params$residual_variance)) {
params$residual_variance <- var_y
}
# For multivariate models, residual_variance can be a matrix
if (!is.matrix(params$residual_variance)) {
if (!is.numeric(params$residual_variance)) {
stop("Input residual variance sigma2 must be numeric.")
}
params$residual_variance <- as.numeric(params$residual_variance)
if (length(params$residual_variance) != 1) {
stop("Input residual variance sigma2 must be a scalar.")
}
if (params$residual_variance <= 0) {
stop("Residual variance sigma2 must be positive (is your var(Y) zero?).")
}
}
# Handle model initialization
if (!is.null(params$model_init)) {
# Validate the contents of model_init
validate_init(data, params)
# Prune effects with zero prior variance
model_init_pruned <- prune_single_effects(params$model_init)
# Adjust the number of effects
adjustment <- adjust_L(params, model_init_pruned, var_y)
params$L <- adjustment$L
# Create base model with all required fields
mat_init <- initialize_susie_model(data, params, var_y)
# Merge with adjusted model_init
mat_init <- modifyList(mat_init, adjustment$model_init)
# Reset iteration-specific values
mat_init$KL <- rep(as.numeric(NA), params$L)
mat_init$lbf <- rep(as.numeric(NA), params$L)
} else {
# Create fresh model
mat_init <- initialize_susie_model(data, params, var_y)
}
# Initialize fitted values and null index
fitted <- initialize_fitted(data, mat_init)
null_index <- initialize_null_index(data, mat_init)
# Preserve model class set by initialize_susie_model (e.g., "mvsusie")
model_class <- class(mat_init)
# Return assembled SuSiE object
model <- c(mat_init,
list(null_index = null_index),
fitted)
# Use the class from initialize_susie_model if it inherits from "susie",
# otherwise default to "susie"
if (inherits(mat_init, "susie")) {
class(model) <- model_class
} else {
class(model) <- "susie"
}
model$converged <- FALSE
# Initialize slot activity (c_hat) if specified
sp <- params$slot_prior
if (!is.null(sp)) {
if (!is.slot_prior(sp))
stop("slot_prior must be created by slot_prior_betabinom() or ",
"slot_prior_poisson(). ",
"Got class: ", paste(class(sp), collapse=", "))
L <- nrow(model$alpha)
if (inherits(sp, "slot_prior_betabinom")) {
a_beta <- sp$a_beta
b_beta <- sp$b_beta
prior_mean <- a_beta / (a_beta + b_beta)
if (!is.null(sp$c_hat_init) && length(sp$c_hat_init) == L) {
c_hat <- sp$c_hat_init
} else {
c_hat <- rep(min(prior_mean, 1 - 1e-10), L)
}
model$slot_weights <- c_hat
model$c_hat_state <- list(
prior_type = "betabinom", a_beta = a_beta, b_beta = b_beta,
update_schedule = sp$update_schedule,
skip_threshold_multiplier = sp$skip_threshold_multiplier,
skip_threshold = 0
)
} else {
C_val <- sp$C
nu <- sp$nu
if (!is.null(sp$c_hat_init) && length(sp$c_hat_init) == L) {
c_hat <- sp$c_hat_init
a_g <- nu + sum(c_hat)
} else {
c_hat <- rep(min(C_val / L, 1 - 1e-10), L)
a_g <- nu + C_val
}
b_g <- nu / max(C_val, 1e-6) + 1
model$slot_weights <- c_hat
model$c_hat_state <- list(
prior_type = "poisson", C = C_val, nu = nu, a_g = a_g, b_g = b_g,
update_schedule = sp$update_schedule,
skip_threshold_multiplier = sp$skip_threshold_multiplier,
skip_threshold = 0
)
}
model <- recompute_fitted_weighted(data, model)
}
# SS-path lambda_bias is stored as a scalar on the model. Only
# allocate when R_mismatch is active so R_mismatch = "none" returns to the
# un-augmented behavior (compute_shat2_inflation falls back to 0
# when model$lambda_bias is NULL). The rss_lambda dispatch is out
# of scope and continues to use a length-L vector.
R_mismatch_mode <- if (!is.null(params$R_mismatch)) params$R_mismatch else "none"
if (R_mismatch_mode != "none" && !is.null(data$R_finite_B) &&
inherits(data, c("ss", "ss_mixture"))) {
model$lambda_bias <- 0
model$B_corrected <- data$R_finite_B
model$R_finite_B <- data$R_finite_B
}
return(model)
}
# =============================================================================
# IBSS FITTING
#
# Updates all L single effects in the SuSiE model for one IBSS iteration.
# Calls single_effect_update for each effect and validates prior variance estimates.
# =============================================================================
#'
#' @param data Data object (individual, ss, or rss_lambda)
#' @param params Validated params object
#' @param model Current SuSiE model object
#'
#' @return Updated SuSiE model object with new alpha, mu, mu2, V, lbf, KL, and
#' fitted values
#'
#' @keywords internal
#' @noRd
ibss_fit <- function(data, params, model) {
L <- nrow(model$alpha)
use_c_hat <- !is.null(model$c_hat_state)
# SS / ss_mixture: lambda_bias / B_corrected are scalars set per-sweep
# by fit_R_mismatch at the end of the sweep. rss_lambda does not carry
# these (lambda > 0 + R_mismatch != "none" errors at entry). No reset
# needed.
if (L > 0) {
for (l in seq_len(L)) {
if (use_c_hat &&
model$slot_weights[l] < model$c_hat_state$skip_threshold) {
next
}
model <- single_effect_update(data, params, model, l)
if (use_c_hat) {
model <- update_c_hat(data, model, l)
}
}
}
# Gamma-Poisson batch shape update (once per sweep)
if (use_c_hat && model$c_hat_state$update_schedule == "batch" &&
model$c_hat_state$prior_type != "betabinom") {
model$c_hat_state$a_g <- model$c_hat_state$nu + sum(model$slot_weights)
}
# Adaptive skip threshold: baseline c_hat with lbf=0, scaled by multiplier
if (use_c_hat && model$c_hat_state$skip_threshold_multiplier > 0) {
st <- model$c_hat_state
L_val <- nrow(model$alpha)
if (st$prior_type == "betabinom") {
# Self-consistent baseline: one Newton step for k_{-l} = k_total - baseline
k_total <- sum(model$slot_weights)
approx <- log(st$a_beta + k_total) - log(st$b_beta + L_val - 1 - k_total)
k_others <- k_total - 1 / (1 + exp(-approx))
baseline_logodds <- log(st$a_beta + k_others) -
log(st$b_beta + L_val - 1 - k_others)
} else {
baseline_logodds <- digamma(st$a_g) - log(st$b_g) - log(L_val)
}
c_hat_baseline <- 1 / (1 + exp(-baseline_logodds))
model$c_hat_state$skip_threshold <-
st$skip_threshold_multiplier * c_hat_baseline
}
# Region-level R-bias fit at the end of the sweep, before validate.
# No-op when R_mismatch = "none" or on the rss_lambda dispatch (out of
# scope; that path keeps the legacy per-slot fit). Reuses the
# existing estimate_lambda_bias optimizer; only the cadence and
# storage shape change (was per-slot inside the SER step; now
# scalar at sweep boundary).
if (inherits(data, c("ss", "ss_mixture"))) {
old_lambda_bias <- model$lambda_bias
model <- fit_R_mismatch(data, params, model)
new_lambda_bias <- model$lambda_bias
if (!is.null(old_lambda_bias) || !is.null(new_lambda_bias)) {
old <- if (is.null(old_lambda_bias)) 0 else old_lambda_bias
new <- if (is.null(new_lambda_bias)) 0 else new_lambda_bias
model$runtime$lambda_bias_diff <- max(abs(new - old))
}
}
# Validate prior variance is reasonable
validate_prior(data, params, model)
return(model)
}
# =============================================================================
# SLOT ACTIVITY (c_hat) HELPERS
#
# c_hat[l] = posterior probability that slot l is active.
# Beta-Binomial: logit(c_l) = log(a + k_{-l}) - log(b + L-1 - k_{-l}) + lbf_l
# Gamma-Poisson: logit(c_l) = psi(a_g) - log(b_g) - log(L) + lbf_l
# =============================================================================
#' Update c_hat for slot l after its SER step.
#' @keywords internal
#' @noRd
update_c_hat <- function(data, model, l) {
st <- model$c_hat_state
old_c <- model$slot_weights[l]
L <- nrow(model$alpha)
lbf_l <- model$lbf[l]
if (is.na(lbf_l) || !is.finite(lbf_l)) lbf_l <- 0
if (st$prior_type == "betabinom") {
k_others <- sum(model$slot_weights[-l])
log_odds <- log(st$a_beta + k_others) -
log(st$b_beta + L - 1 - k_others) + lbf_l
} else {
log_odds <- digamma(st$a_g) - log(st$b_g) - log(L) + lbf_l
}
log_odds <- max(min(log_odds, 20), -20)
new_c <- 1 / (1 + exp(-log_odds))
model$slot_weights[l] <- new_c
if (abs(new_c - old_c) > 1e-15) {
b_bar_l <- model$alpha[l, ] * model$mu[l, ]
model <- adjust_fitted_for_c_hat(data, model, b_bar_l, new_c - old_c)
}
# Gamma shape update (sequential mode; Beta-Binomial uses k_others directly)
if (st$prior_type != "betabinom" && st$update_schedule == "sequential") {
model$c_hat_state$a_g <- st$nu + sum(model$slot_weights)
}
return(model)
}
#' Add delta_weight * R*b_bar_l to the fitted-values field (Xr/XtXr/Rz).
#' @keywords internal
#' @noRd
adjust_fitted_for_c_hat <- function(data, model, b_bar_l, delta_weight) {
fitted_field <- detect_fitted_field(model)
if (fitted_field == "Xr") {
model$Xr <- model$Xr +
delta_weight * as.vector(compute_Xb(data$X, b_bar_l))
} else {
model[[fitted_field]] <- model[[fitted_field]] +
delta_weight * as.vector(compute_Rv(data, b_bar_l, model$X_meta))
}
return(model)
}
#' @keywords internal
#' @noRd
detect_fitted_field <- function(model) {
if ("Rz" %in% names(model)) "Rz"
else if ("XtXr" %in% names(model)) "XtXr"
else if ("Xr" %in% names(model)) "Xr"
else stop("Cannot detect fitted-values field on model object.")
}
#' Recompute fitted = sum_l c_hat[l] * R * bbar[l] from scratch.
#' @keywords internal
#' @noRd
recompute_fitted_weighted <- function(data, model) {
fitted_field <- detect_fitted_field(model)
L <- nrow(model$alpha)
c_hat <- model$slot_weights
b_weighted <- rep(0, ncol(model$alpha))
for (ll in seq_len(L)) {
b_weighted <- b_weighted + c_hat[ll] * model$alpha[ll, ] * model$mu[ll, ]
}
if (fitted_field == "Xr") {
model$Xr <- as.vector(compute_Xb(data$X, b_weighted))
} else {
model[[fitted_field]] <- as.vector(compute_Rv(data, b_weighted, model$X_meta))
}
return(model)
}
# =============================================================================
# IBSS FINALIZATION
#
# Finalizes the SuSiE model after convergence or maximum number of iterations
# reached. Computes credible sets, PIPs, intercept, fitted values, and z-scores.
# =============================================================================
#' Finalize IBSS model
#'
#' Computes credible sets, PIPs, z-scores, and cleans up temporary
#' fields from the model object.
#'
#' @param data Data object (individual, ss, or rss_lambda)
#' @param params Validated params object
#' @param model Converged model object
#' @param elbo ELBO values (optional)
#' @param iter Number of iterations completed
#' @param tracking Tracking data (optional)
#'
#' @return Finalized model object with credible sets and PIPs.
#' @export
#' @keywords internal
ibss_finalize <- function(data, params, model, elbo = NULL, iter = NA_integer_,
tracking = NULL) {
# Append ELBO & iteration count to model output
model$niter <- iter
# Intercept & Fitted Values
model$X_column_scale_factors <- get_scale_factors(data, params)
model$intercept <- get_intercept(data, params, model)
model$fitted <- get_fitted(data, params, model)
# Posterior Inclusion Probabilities, credible sets, z-scores
model$sets <- get_cs(data, params, model)
model$pip <- susie_get_pip(model, prior_tol = params$prior_tol)
model$z <- get_zscore(data, params, model)
# Tracking Across Iterations
if (params$track_fit) model$trace <- tracking
# Assign Variable Names
model <- get_variable_names(data, model)
# R diagnostics (from data -> model, following sets/pip/z pattern).
# SS / ss_mixture paths store lambda_bias / B_corrected as scalars
# (set by fit_R_mismatch once per sweep). The rss_lambda dispatch keeps
# the per-slot vector form. Copy whatever shape lives on the model.
R_finite_diagnostics <- data$R_finite_diagnostics
if (!is.null(R_finite_diagnostics)) {
model$R_finite_diagnostics <- R_finite_diagnostics
if (!is.null(model$lambda_bias))
model$R_finite_diagnostics$lambda_bias <- model$lambda_bias
if (!is.null(model$B_corrected))
model$R_finite_diagnostics$B_corrected <- model$B_corrected
if (!is.null(data$R_mismatch))
model$R_finite_diagnostics$R_mismatch <- data$R_mismatch
if (!is.null(model$shat2_inflation))
model$R_finite_diagnostics$per_variable_penalty <- as.vector(model$shat2_inflation - 1)
# Q_art / artifact fields are present only for R_mismatch = "map_qc"
# (set by fit_R_mismatch). Copy whichever exist.
for (fld in c("Q_art", "artifact_flag", "artifact_evaluable",
"low_eigen_count", "low_eigen_fraction", "eig_delta",
"mode_label"))
if (!is.null(model[[fld]]))
model$R_finite_diagnostics[[fld]] <- model[[fld]]
}
# Multi-panel omega weights
if (!is.null(model$omega))
model$omega_weights <- model$omega
# Store Gamma-Poisson c_hat results on output for user access
# and for susieAnn to extract (a_g, b_g needed for genome-wide nu update).
if (!is.null(model$c_hat_state)) {
model$c_hat <- model$slot_weights
model$C_hat <- sum(model$slot_weights)
if (model$c_hat_state$prior_type == "betabinom") {
model$a_beta <- model$c_hat_state$a_beta
model$b_beta <- model$c_hat_state$b_beta
} else {
model$a_g <- model$c_hat_state$a_g
model$b_g <- model$c_hat_state$b_g
}
model$c_hat_state <- NULL # cleanup internal state
}
# Clean up temporary computational fields
model <- cleanup_model(data, params, model)
return(model)
}
================================================
FILE: R/mixture_prior.R
================================================
# =============================================================================
# FIXED MIXTURE PRIOR
#
# Shared implementations for estimate_prior_method = "fixed_mixture".
# Evaluates Bayes factors on a pre-specified variance grid with given mixture
# weights, computes mixture posterior moments, and stores per-grid BF matrix.
#
# These functions are data-type-agnostic: they operate on betahat and shat2
# produced by the type-specific compute_ser_statistics().
# =============================================================================
#' Resolve fixed mixture prior parameters
#'
#' Called from susie, susie_ss, and susie_rss to handle the
#' prior_variance_grid / mixture_weights parameters. When
#' prior_variance_grid is non-NULL, overrides estimate_prior_method
#' to "fixed_mixture" and validates inputs. Returns a list with
#' the resolved estimate_prior_method, estimate_prior_variance,
#' prior_variance_grid, and mixture_weights.
#'
#' @keywords internal
resolve_mixture_prior <- function(estimate_prior_method,
estimate_prior_variance,
prior_variance_grid,
mixture_weights) {
if (!is.null(prior_variance_grid)) {
K <- length(prior_variance_grid)
if (is.null(mixture_weights))
mixture_weights <- rep(1 / K, K)
stopifnot(
length(mixture_weights) == K,
all(prior_variance_grid > 0),
all(mixture_weights >= 0),
abs(sum(mixture_weights) - 1) < 1e-8
)
estimate_prior_method <- "fixed_mixture"
estimate_prior_variance <- FALSE
} else {
estimate_prior_method <- match.arg(
estimate_prior_method, c("optim", "EM", "simple")
)
}
list(
estimate_prior_method = estimate_prior_method,
estimate_prior_variance = estimate_prior_variance,
prior_variance_grid = prior_variance_grid,
mixture_weights = mixture_weights
)
}
#' Compute mixture log-Bayes factors and posterior inclusion probabilities
#'
#' For each grid point k and variant j, computes the Wakefield approximate
#' Bayes factor (ABF), then forms the mixture BF as a weighted sum over grid
#' points. Stores the full p x K log-BF matrix in model$lbf_grid[[l]] for
#' downstream use (e.g., mixsqp M-step in susieAnn).
#'
#' @param params Params object with prior_variance_grid (K-vector) and
#' mixture_weights (K-vector summing to 1)
#' @param model Current model object with pi (prior weights)
#' @param ser_stats List with betahat (p-vector) and shat2 (p-vector)
#' @param l Effect index
#'
#' @return Updated model with alpha[l,], lbf[l], lbf_variable[l,], lbf_grid[[l]]
#'
#' @keywords internal
loglik_mixture_common <- function(params, model, ser_stats, l) {
grid <- params$prior_variance_grid # K-vector
w <- params$mixture_weights # K-vector
K <- length(grid)
betahat <- ser_stats$betahat # p-vector
shat2 <- ser_stats$shat2 # p-vector (may contain Inf)
p <- length(betahat)
# Compute p x K matrix of log-BFs (Wakefield ABF at each grid point)
# Uses pmax only for the ABF computation to avoid log(0), matching the
# scalar path which applies pmax inside loglik.rss_lambda
shat2_safe <- pmax(shat2, .Machine$double.eps)
lbf_grid <- matrix(0, nrow = p, ncol = K)
for (k in seq_len(K)) {
V_k <- grid[k]
lbf_grid[, k] <- -0.5 * log(1 + V_k / shat2_safe) +
0.5 * betahat^2 * V_k / (shat2_safe * (V_k + shat2_safe))
}
# Mixture log-BF per variant: log(sum_k w_k * exp(lbf_jk))
# For K=1 this reduces to lbf_grid[,1] exactly (no numerical error)
if (K == 1L) {
lbf_mix <- lbf_grid[, 1]
} else {
log_w <- log(w)
lbf_mix <- apply(lbf_grid, 1, function(row) {
shifted <- row + log_w
max_val <- max(shifted)
max_val + log(sum(exp(shifted - max_val)))
})
}
# Store per-grid BF matrix for M-step (e.g., mixsqp in susieAnn)
if (is.null(model$lbf_grid)) {
model$lbf_grid <- vector("list", nrow(model$alpha))
}
model$lbf_grid[[l]] <- lbf_grid
# Cache ser_stats for calculate_posterior_moments_mixture_common
model$.ser_stats <- ser_stats
# Compute posterior inclusion probabilities using existing machinery.
# Pass raw shat2 (not pmax'd) to lbf_stabilization, matching the scalar path.
stable_res <- lbf_stabilization(lbf_mix, model$pi, shat2)
weights_res <- compute_posterior_weights(stable_res$lpo)
model$alpha[l, ] <- weights_res$alpha
model$lbf[l] <- weights_res$lbf_model
model$lbf_variable[l, ] <- stable_res$lbf
return(model)
}
#' Compute mixture posterior moments
#'
#' For each grid point k, computes the conjugate normal posterior moments
#' (mean, variance) given prior variance V_k. Forms the mixture posterior
#' using responsibility weights r_jk = w_k * BF_jk / sum_k' w_k' * BF_jk'.
#'
#' Uses betahat and shat2 from ser_stats (produced by the data-type-specific
#' compute_ser_statistics), so this function is data-type-agnostic.
#'
#' @param params Params object with prior_variance_grid and mixture_weights
#' @param model Model with lbf_grid[[l]] (p x K), alpha[l,] already computed,
#' and ser_stats cached from loglik_mixture_common
#' @param l Effect index
#'
#' @return Updated model with mu[l,] and mu2[l,]
#'
#' @keywords internal
calculate_posterior_moments_mixture_common <- function(params, model, l) {
grid <- params$prior_variance_grid
w <- params$mixture_weights
K <- length(grid)
lbf_grid <- model$lbf_grid[[l]] # p x K
betahat <- model$.ser_stats$betahat # cached by loglik_mixture_common
shat2 <- model$.ser_stats$shat2
shat2_safe <- pmax(shat2, .Machine$double.eps)
p <- length(betahat)
# Responsibility weights: r_jk = w_k * BF_jk / sum_k' w_k' * BF_jk'
# Work in log space for stability
log_w <- log(w + .Machine$double.eps)
log_r <- sweep(lbf_grid, 2, log_w, "+") # p x K
log_r_max <- apply(log_r, 1, max)
r <- exp(log_r - log_r_max) # p x K, shifted
r <- r / rowSums(r) # normalize to responsibilities
# Per-grid posterior moments
post_mean <- matrix(0, p, K)
post_mean2 <- matrix(0, p, K)
for (k in seq_len(K)) {
V_k <- grid[k]
pv_k <- V_k * shat2_safe / (V_k + shat2_safe) # posterior variance
pm_k <- pv_k / shat2_safe * betahat # posterior mean
post_mean[, k] <- pm_k
post_mean2[, k] <- pv_k + pm_k^2 # E[beta^2]
}
# Mixture posterior: weighted average over grid points
model$mu[l, ] <- rowSums(r * post_mean)
model$mu2[l, ] <- rowSums(r * post_mean2)
# Clean up cached ser_stats
model$.ser_stats <- NULL
return(model)
}
================================================
FILE: R/model_methods.R
================================================
# =============================================================================
# MODEL-LEVEL S3 METHODS
# S3 generics dispatched on model class (model field access, initialization,
# convergence, ELBO)
# =============================================================================
#' Get prior variance for effect l
#' @keywords internal
get_prior_variance_l <- function(model, l) {
UseMethod("get_prior_variance_l")
}
#' @keywords internal
get_prior_variance_l.default <- function(model, l) {
model$V[l]
}
#' Set prior variance for effect l
#' @keywords internal
set_prior_variance_l <- function(model, l, V) {
UseMethod("set_prior_variance_l")
}
#' @keywords internal
set_prior_variance_l.default <- function(model, l, V) {
model$V[l] <- V
model
}
#' Get posterior inclusion probabilities for effect l
#' @keywords internal
get_alpha_l <- function(model, l) {
UseMethod("get_alpha_l")
}
#' @keywords internal
get_alpha_l.default <- function(model, l) {
model$alpha[l, ]
}
#' Get posterior moments for effect l (for EM prior variance update)
#' @keywords internal
get_posterior_moments_l <- function(model, l) {
UseMethod("get_posterior_moments_l")
}
#' @keywords internal
get_posterior_moments_l.default <- function(model, l) {
list(post_mean = model$mu[l, ], post_mean2 = model$mu2[l, ])
}
#' Get PIP-weighted posterior mean for effect l (alpha * mu)
#' @keywords internal
get_posterior_mean_l <- function(model, l) {
UseMethod("get_posterior_mean_l")
}
#' @keywords internal
get_posterior_mean_l.default <- function(model, l) {
model$alpha[l, ] * model$mu[l, ]
}
#' Get sum of PIP-weighted posterior means across all effects
#' @keywords internal
get_posterior_mean_sum <- function(model) {
UseMethod("get_posterior_mean_sum")
}
#' @keywords internal
get_posterior_mean_sum.default <- function(model) {
colSums(model$alpha * model$mu)
}
# =============================================================================
# MODEL INITIALIZATION
#
# Initialize core model matrices and parameter storage.
# =============================================================================
#' @keywords internal
initialize_matrices <- function(data, params, var_y) {
UseMethod("initialize_matrices")
}
#' @keywords internal
initialize_matrices.default <- function(data, params, var_y) {
L <- params$L
mat_init <- list(
alpha = matrix(1 / data$p, L, data$p),
mu = matrix(0, L, data$p),
mu2 = matrix(0, L, data$p),
V = expand_scaled_prior_variance(params$scaled_prior_variance, var_y, L),
KL = rep(as.numeric(NA), L),
lbf = rep(as.numeric(NA), L),
lbf_variable = matrix(as.numeric(NA), L, data$p),
sigma2 = params$residual_variance,
pi = params$prior_weights,
null_weight = params$null_weight,
predictor_weights = rep(as.numeric(NA), data$p)
)
return(mat_init)
}
# =============================================================================
# VARIANCE UPDATE
#
# Update residual variance (and possibly other variance components) after
# each IBSS iteration.
# =============================================================================
#' @keywords internal
#' @importFrom utils modifyList
update_model_variance <- function(data, params, model) {
UseMethod("update_model_variance")
}
#' @keywords internal
update_model_variance.default <- function(data, params, model) {
if (!isTRUE(params$estimate_residual_variance)) return(model)
# Update variance components
variance_result <- update_variance_components(data, params, model)
model <- modifyList(model, variance_result)
# Apply bounds to residual variance
model$sigma2 <- min(max(model$sigma2, params$residual_variance_lowerbound),
params$residual_variance_upperbound)
# Update derived quantities after variance component changes
model <- update_derived_quantities(data, params, model)
return(model)
}
# =============================================================================
# CONVERGENCE CHECKING
# =============================================================================
#' @keywords internal
check_convergence <- function(data, params, model, elbo, iter) {
UseMethod("check_convergence")
}
#' Format the per-iter sigma2 cell for verbose output
#'
#' Default returns the scalar sigma2 in `%.4f`. Subclasses
#' (e.g., mfsusieR's list-of-vectors sigma2; mvsusieR's
#' matrix sigma2) override to a compact summary string of
#' fixed width.
#' @keywords internal
format_sigma2_summary <- function(model) {
UseMethod("format_sigma2_summary")
}
#' @keywords internal
format_sigma2_summary.default <- function(model) {
sprintf("%.4f", model$sigma2)
}
#' Append class-specific extra-diag columns to the verbose row
#'
#' Default returns an empty string. Subclasses override to inject
#' columns such as `max_pi_null`, `max_KL_l`, alpha-entropy
#' n_eff. Output is appended after the V column in the per-iter
#' tabular line.
#' @keywords internal
format_extra_diag <- function(model) {
UseMethod("format_extra_diag")
}
#' @keywords internal
format_extra_diag.default <- function(model) {
if (is.null(model$lambda_bias))
return("")
lambda_infl <- model$lambda_bias
# Zero-masking of small finite values happens at source in
# estimate_lambda_bias; we just sanitize non-finite for display.
lambda_infl[!is.finite(lambda_infl)] <- 0
if (length(lambda_infl) != 1)
stop("lambda_bias must be a scalar on the SS path.")
lb <- paste0("lambda_infl=", format(lambda_infl, digits = 2,
scientific = TRUE))
if (!is.null(model$B_corrected)) {
B_corrected <- model$B_corrected
if (length(unique(B_corrected[is.finite(B_corrected)])) == 1) {
lb <- paste0(lb, " B_eff=", format(B_corrected[which(is.finite(B_corrected))[1]],
digits = 4, scientific = FALSE))
}
}
lb
}
#' @keywords internal
check_convergence.default <- function(data, params, model, elbo, iter) {
verbose <- isTRUE(params$verbose)
V_str <- format_V_summary(model$V)
chat_str <- format_chat_summary(model)
sigma2_str <- format_sigma2_summary(model)
extra_str <- format_extra_diag(model)
# Tabular verbose format (ELBO-convergence path): columns are
# iter, ELBO, delta, sigma2, mem, V (variable-width, last),
# plus optional class-specific extras after V.
verbose_row_fmt <- "%4d %11.4f %9s %-9s %-7s %s%s%s"
verbose_header <- sprintf("%-4s %11s %9s %-9s %-7s %s%s",
"iter", "ELBO", "delta", "sigma2", "mem", "V",
if (nzchar(extra_str)) " extras" else "")
# Skip convergence check on first iteration
if (iter == 1) {
model$converged <- FALSE
if (verbose) {
elbo_val <- elbo[iter + 1]
if (!is.na(elbo_val) && is.finite(elbo_val)) {
message(verbose_header)
message(sprintf(verbose_row_fmt,
iter, elbo_val, "-", sigma2_str,
sprintf("%.2f GB", mem_used_gb()),
paste0(V_str, chat_str),
if (nzchar(extra_str)) paste0(" ", extra_str) else "",
""))
} else {
message(sprintf("iter %3d: sigma2=%s, V=%s%s [mem: %.2f GB]",
iter, sigma2_str, V_str, chat_str, mem_used_gb()))
}
}
return(model)
}
# Calculate difference in ELBO values
ELBO_diff <- elbo[iter + 1] - model$runtime$prev_elbo
ELBO_failed <- is.na(ELBO_diff) || is.infinite(ELBO_diff)
if (params$convergence_method == "pip" || ELBO_failed) {
if (ELBO_failed && params$convergence_method == "elbo") {
warning_message(paste0("Iteration ", iter, " produced an NA/infinite ELBO",
" value. Using pip-based convergence this iteration."))
}
# PIP/alpha convergence. pip_stall_window is reused as the maximum
# short-cycle lag; it is no longer a "no improvement" stop.
model <- check_alpha_pip_cycle_convergence(data, params, model)
pip_diff <- model$runtime$pip_diff
lambda_diff <- if (!is.null(model$runtime$lambda_bias_diff))
model$runtime$lambda_bias_diff else 0
# Coordinate EB guard: fit_R_mismatch runs after the SER sweep, so a material
# lambda update must be consumed by one more sweep before convergence.
if (isTRUE(model$converged) && lambda_diff > params$tol) {
model$converged <- FALSE
model$convergence_reason <- paste0("lambda_infl_changed(",
format(lambda_diff, digits = 3,
scientific = TRUE), ")")
}
if (verbose) {
conv_tag <- if (model$converged)
paste0(" -- converged (", model$convergence_reason, ")")
else
""
lambda_str <- if (lambda_diff > 0)
paste0(", max|d(lambda_infl)|=", format(lambda_diff, digits = 3,
scientific = TRUE))
else ""
message(sprintf("iter %3d: max|d(alpha,PIP)|=%.2e%s, V=%s%s%s%s [mem: %.2f GB]",
iter, pip_diff, lambda_str, V_str, chat_str,
if (nzchar(extra_str)) paste0(", ", extra_str) else "",
conv_tag, mem_used_gb()))
}
if (model$converged && !is.null(params$unmappable_effects) &&
params$unmappable_effects %in% c("ash", "ash_filter_archived")) {
model <- run_final_ash_pass(data, params, model)
}
return(model)
}
# Converge when ELBO stabilizes: small non-negative change.
# A large negative ELBO_diff means the objective dropped, not convergence.
if (ELBO_diff < -params$tol) {
warning_message(sprintf("ELBO decreased by %.2e at iteration %d",
-ELBO_diff, iter))
}
model$converged <- (ELBO_diff >= 0 && ELBO_diff < params$tol)
lambda_diff <- if (!is.null(model$runtime$lambda_bias_diff))
model$runtime$lambda_bias_diff else 0
# Coordinate EB guard: fit_R_mismatch runs after the SER sweep, so a material
# lambda update must be consumed by one more sweep before declaring convergence.
if (isTRUE(model$converged) && lambda_diff > params$tol)
model$converged <- FALSE
if (verbose)
message(sprintf(verbose_row_fmt,
iter, elbo[iter + 1],
sprintf("%.2e", ELBO_diff),
sigma2_str,
sprintf("%.2f GB", mem_used_gb()),
paste0(V_str, chat_str),
if (nzchar(extra_str)) paste0(" ", extra_str) else "",
if (model$converged) " converged" else ""))
if (model$converged && !is.null(params$unmappable_effects) &&
params$unmappable_effects %in% c("ash", "ash_filter_archived")) {
model <- run_final_ash_pass(data, params, model)
}
return(model)
}
# =============================================================================
# OBJECTIVE FUNCTION (ELBO)
# =============================================================================
#' Compute the SuSiE ELBO (evidence lower bound)
#'
#' Building-block function used by downstream packages implementing
#' custom IBSS loops.
#'
#' @param data Data object.
#' @param params Params object.
#' @param model Model object.
#'
#' @return Scalar ELBO value.
#'
#' @export
#' @keywords internal
get_objective <- function(data, params, model) {
UseMethod("get_objective")
}
#' @export
#' @keywords internal
get_objective.default <- function(data, params, model) {
if (!is.null(params$unmappable_effects) && params$unmappable_effects == "inf") {
# Compute omega
L <- nrow(model$alpha)
omega_res <- compute_omega_quantities(data, model$tau2, model$sigma2)
omega <- matrix(0, L, data$p)
for (l in seq_len(L)) {
omega[l, ] <- omega_res$diagXtOmegaX + 1 / model$V[l]
}
# Compute total ELBO for infinitesimal effects model
objective <- compute_elbo_inf(
model$alpha, model$mu, omega, model$lbf,
model$sigma2, model$tau2, data$n, data$p,
data$eigen_vectors, data$eigen_values,
data$VtXty, data$yty
)
} else if (params$use_NIG && nrow(model$alpha) == 1) {
objective <- model$marginal_loglik[1]
} else if (isTRUE(params$use_NIG)) {
# NIG L>1: KL[l] is gated to 0 (gIBSS has no coherent ELBO); use the
# proper variational expected log-likelihood.
objective <- nig_eloglik(data, params, model)
} else {
# Standard ELBO computation. `na.rm = TRUE` so subclasses that
# leave KL[l] = NA on null-effect rows (mfsusieR, mvsusieR) do
# not need to override get_objective just to skip NAs.
objective <- Eloglik(data, model) - sum(model$KL, na.rm = TRUE)
}
# Add slot prior ELBO terms when c_hat is active.
# Without these, the ELBO is missing the prior and entropy contributions
# from the slot activity model.
if (!is.null(model$c_hat_state)) {
objective <- objective + slot_prior_elbo(model)
}
if (is.infinite(objective)) {
stop("get_objective() produced an infinite ELBO value")
}
return(objective)
}
# =============================================================================
# EFFECT TRIMMING
#
# Zero out effects with negligible prior variance after convergence.
# =============================================================================
#' @keywords internal
trim_null_effects <- function(data, params, model) {
UseMethod("trim_null_effects")
}
#' @keywords internal
trim_null_effects.default <- function(data, params, model) {
null_idx <- which(model$V < params$prior_tol)
if (length(null_idx) == 0) return(model)
model$V[null_idx] <- 0
model$alpha[null_idx, ] <- rep(model$pi, each = length(null_idx))
model$mu[null_idx, ] <- 0
model$mu2[null_idx, ] <- 0
model$lbf_variable[null_idx, ] <- 0
model$lbf[null_idx] <- 0
model$KL[null_idx] <- 0
return(model)
}
================================================
FILE: R/mr.ash.R
================================================
#' @title Multiple Regression with Adaptive Shrinkage
#'
#' @description Model fitting algorithms for Multiple Regression with
#' Adaptive Shrinkage ("Mr.ASH"). Mr.ASH is a variational empirical
#' Bayes (VEB) method for multiple linear regression. The fitting
#' algorithms (locally) maximize the approximate marginal likelihood
#' (the "evidence lower bound", or ELBO) via coordinate-wise updates.
#'
#' @details Mr.ASH is a statistical inference method for the following
#' multiple linear regression model: \deqn{y | X, \beta, \sigma^2 ~
#' N(X \beta, \sigma I_n),} in which the regression coefficients
#' \eqn{\beta} admit a mixture-of-normals prior, \deqn{\beta | \pi,
#' \sigma ~ g = \sum_{k=1}^K N(0, \sigma^2 \sigma_k^2).} Each mixture
#' component in the prior, \eqn{g}, is a normal density centered at
#' zero, with variance \eqn{\sigma^2 \sigma_k^2}.
#'
#' The fitting algorithm, it run for a large enough number of
#' iterations, will find an approximate posterior for the regression
#' coefficients, denoted by \eqn{q(\beta)}, residual variance
#' parameter \eqn{sigma^2}, and prior mixture weights \eqn{\pi_1,
#' \ldots, \pi_K} maximizing the evidence lower bound, \deqn{F(q, \pi,
#' \sigma^2) = E_q \log p(y | X, \beta, \sigma^2) - \sum_{j=1}^p
#' D_{KL}(q_j || g),} where \eqn{D_{KL}(q_j || g)} denotes the
#' Kullback-Leibler (KL) divergence, a measure of the "distance"
#' between (approximate) posterior \eqn{q_j(\beta_j)} and prior
#' \eqn{g(\beta_j)}. The fitting algorithm iteratively updates the
#' approximate posteriors \eqn{q_1, \ldots, q_p}, separately for each
#' \eqn{j = 1, \ldots, p} (in an order determined by
#' \code{update.order}), then separately updates the mixture weights
#' and \eqn{\pi} and residual variance \eqn{\sigma^2}. This
#' coordinate-wise update scheme iterates until the convergence
#' criterion is met, or until the algorithm hits an upper bound on
#' the number of iterations (specified by \code{max.iter}). Coordinate-wise
#' optimization algorithms for model fitting are implemented in C++ for
#' efficient handling of large-scale data
#'
#' See \sQuote{References} for more details about the model and
#' algorithm.
#'
#' @param X The input matrix, of dimension (n,p); each column is a
#' single predictor; and each row is an observation vector. Here, n is
#' the number of samples and p is the number of predictors. The matrix
#' cannot be sparse.
#'
#' @param y The observed continuously-valued responses, a vector of
#' length p.
#'
#' @param Z The covariate matrix, of dimension (n,k), where k is the
#' number of covariates. This feature is not yet implemented;
#' \code{Z} must be set to \code{NULL}.
#'
#' @param sa2 The vector of prior mixture component variances. The
#' variances should be in increasing order, starting at zero; that is,
#' \code{sort(sa2)} should be the same as \code{sa2}. When \code{sa2}
#' is \code{NULL}, the default setting is used, \code{sa2[k] =
#' (2^(0.05*(k-1)) - 1)^2}, for \code{k = 1:20}. For this default
#' setting, \code{sa2[1] = 0}, and \code{sa2[20]} is roughly 1.
#'
#' @param method_q The algorithm used to update the variational
#' approximation to the posterior distribution of the regression
#' coefficients, \code{method = "sigma_dep_q"} and \code{method =
#' "sigma_indep_q"}, take different approaches to updating the
#' residual variance \eqn{sigma^2}.
#'
#' @param method_g \code{method = "caisa"}, an abbreviation of
#' "Cooridinate Ascent Iterative Shinkage Algorithm", fits the model
#' by approximate EM; it iteratively updates the variational
#' approximation to the posterior distribution of the regression
#' coefficients (the approximate E-step) and the model parameters
#' (mixture weights and residual covariance) in an approximate
#' M-step. Settings \code{method = "block"} and
#' \code{method = "accelerate"} are considered experimental.
#'
#' @param max.iter The maximum number of outer loop iterations allowed.
#'
#' @param min.iter The minimum number of outer loop iterations allowed.
#'
#' @param beta.init The initial estimate of the (approximate)
#' posterior mean regression coefficients. This should be \code{NULL},
#' or a vector of length p. When \code{beta.init} is \code{NULL}, the
#' posterior mean coefficients are all initially set to zero.
#'
#' @param update.pi If \code{update.pi = TRUE}, the mixture
#' proportions in the mixture-of-normals prior are estimated from the
#' data. In the manuscript, \code{update.pi = TRUE}.
#'
#' @param pi The initial estimate of the mixture proportions
#' \eqn{\pi_1, \ldots, \pi_K}. If \code{pi} is \code{NULL}, the
#' mixture weights are initialized to \code{rep(1/K,K)}}, where
#' \code{K = length(sa2).
#'
#' @param update.sigma2 If \code{update.sigma2 = TRUE}, the residual
#' variance \eqn{sigma^2} is estimated from the data. In the manuscript,
#' \code{update.sigma = TRUE}.
#'
#' @param sigma2 The initial estimate of the residual variance,
#' \eqn{\sigma^2}. If \code{sigma2 = NULL}, the residual variance is
#' initialized to the empirical variance of the residuals based on the
#' initial estimates of the regression coefficients, \code{beta.init},
#' after removing linear effects of the intercept and any covariances.
#'
#' @param update.order The order in which the co-ordinate ascent
#' updates for estimating the posterior mean coefficients are
#' performed. \code{update.order} can be \code{NULL}, \code{"random"},
#' or any permutation of \eqn{(1,\ldots,p)}, where \code{p} is the number
#' of columns in the input matrix \code{X}. When \code{update.order}
#' is \code{NULL}, the co-ordinate ascent updates are performed in
#' order in which they appear in \code{X}; this is equivalent to
#' setting \code{update.order = 1:p}. When \code{update.order =
#' "random"}, the co-ordinate ascent updates are performed in a
#' randomly generated order, and this random ordering is different at
#' each outer-loop iteration.
#'
#' @param standardize The logical flag for standardization of the
#' columns of X variable, prior to the model fitting. The coefficients
#' are always returned on the original scale.
#'
#' @param intercept When \code{intercept = TRUE}, an intercept is
#' included in the regression model.
#'
#' @param tol Additional settings controlling behaviour of the model
#' fitting algorithm. \code{tol$convtol} controls the termination
#' criterion for the model fitting. The outer-loop updates stop when
#' the relative L2 change in the estimates of the posterior mean
#' coefficients is less than \code{convtol}, i.e., \code{||beta_new -
#' beta_old||_2 / max(1, ||beta_old||_2) < convtol}.
#' \code{tol$epstol} is a small, positive number added to the
#' likelihoods to avoid logarithms of zero.
#'
#' @param verbose If \code{verbose = TRUE}, some information about the
#' status of the model fitting is printed to the console.
#'
#' @return A list object with the following elements:
#'
#' \item{intercept}{The estimated intercept.}
#'
#' \item{beta}{A vector containing posterior mean estimates of the
#' regression coefficients for all predictors.}
#'
#' \item{sigma2}{The estimated residual variance.}
#'
#' \item{pi}{A vector of containing the estimated mixture
#' proportions.}
#'
#' \item{iter}{The number of outer-loop iterations that were
#' performed.}
#'
#' \item{update.order}{The ordering used for performing the
#' coordinate-wise updates. For \code{update.order = "random"}, the
#' orderings for outer-loop iterations are provided in a vector of
#' length \code{p*max.iter}, where \code{p} is the number of predictors.}
#'
#' \item{varobj}{A vector of length \code{numiter}, containing the
#' value of the variational objective (equal to the negative "evidence
#' lower bound") attained at each (outer-loop) model fitting
#' iteration. Note that the objective does not account for the
#' intercept term, even when \code{intercept = TRUE}; therefore, this
#' value shoudl be interpreted as being an approximation to the
#' marginal likelihood \emph{conditional} on the estimate of the
#' intercept.}
#'
#' \item{data}{The preprocessed data (X, Z, y) provided as input to the model
#' fitting algorithm. \code{data$w} is equal to
#' \code{diag(crossprod(X))}, in which \code{X} is the preprocessed
#' data matrix. Additionally, \code{data$sa2} gives the prior variances
#' used.}
#'
#' @seealso \code{\link{get.full.posterior}}, \code{\link{predict.mr.ash}}
#'
#' @references
#'
#' Y. Kim (2020), Bayesian shrinkage methods for high dimensional
#' regression. Ph.D. thesis, University of Chicago.
#'
#' @useDynLib susieR, .registration = TRUE
#'
#' @importFrom utils modifyList
#' @importFrom stats var
#'
#' @examples
#' ### generate synthetic data
#' set.seed(1)
#' n = 200
#' p = 300
#' X = matrix(rnorm(n*p),n,p)
#' beta = double(p)
#' beta[1:10] = 1:10
#' y = X %*% beta + rnorm(n)
#'
#' ### fit Mr.ASH
#' fit.mr.ash = mr.ash(X,y, method_q = "sigma_indep_q")
#' #' fit.mr.ash = mr.ash(X,y, method_q = "sigma_dep_q")
#'
#' ### prediction routine
#' Xnew = matrix(rnorm(n*p),n,p)
#' ynew = Xnew %*% beta + rnorm(n)
#' ypred = predict(fit.mr.ash, Xnew)
#'
#' ### test error
#' rmse = norm(ynew - ypred, '2') / sqrt(n)
#'
#' ### coefficients
#' betahat = predict(fit.mr.ash, type = "coefficients")
#' # this equals c(fit.mr.ash$intercept, fit.mr.ash$beta)
#'
#' @export
#'
mr.ash = function(X, y, Z = NULL, sa2 = NULL,
method_q = c("sigma_dep_q","sigma_indep_q"),
method_g = c("caisa","accelerate","block"),
max.iter = 1000, min.iter = 1,
beta.init = NULL,
update.pi = TRUE, pi = NULL,
update.sigma2 = TRUE, sigma2 = NULL,
update.order = NULL,
standardize = FALSE, intercept = TRUE,
tol = set_default_tolerance(),
verbose = TRUE){
# get sizes
n = nrow(X)
p = ncol(X)
# check necessary conditions
if (!is.null(sa2)) {
if (any(sa2 < 0)) {
stop ("all the mixture component variances must be non-negative.")
}
if (sa2[1] != 0) {
stop ("the first mixture component variance sa2[1] must be 0.")
}
}
# check Z
if (!is.null(Z)) {
stop("covariates are not currently fully implemented; Z should be set to NULL")
}
# match method
method_q = match.arg(method_q)
method_g = match.arg(method_g)
# set default tolerances unless specified
tol0 = set_default_tolerance()
tol = modifyList(tol0,tol,keep.null = TRUE)
# remove covariates
data = remove_covariate(X, y, Z, standardize, intercept)
# initialize beta
if ( is.null(beta.init) ){
data$beta = as.vector(double(p))
} else {
if (standardize) {
data$beta = as.vector(beta.init) * attr(data$X,"scaled:scale")
} else {
data$beta = as.vector(beta.init)
}
}
data$beta[1] = data$beta[1] + 0 # to make sure beta.init is not modified
# initialize r
r = data$y - data$X %*% data$beta
# sigma2
if (is.null(sigma2))
sigma2 = c(var.n(r))
# precompute x_j^T x_j
w = colSums(data$X^2)
data$w = w
# set sa2 if missing
if ( is.null(sa2) ) {
sa2 = (2^((0:24) / 25) - 1)^2
sa2 = sa2 / median(data$w) * n
}
K = length(sa2)
data$sa2 = sa2
# initialize other parameters
if ( is.null(pi) ) {
if ( is.null(beta.init) ){
Phi = matrix(1,p,K)/K
pi = rep(1,K)/K
} else {
S = outer(1/w, sa2, '+') * sigma2
Phi = -data$beta^2/S/2 - log(S)/2
Phi = exp(Phi - apply(Phi,1,max))
Phi = Phi / rowSums(Phi)
pi = colMeans(Phi)
}
} else
Phi = matrix(rep(pi, each = p), nrow = p)
pi[1] <- pi[1] + 0
# run algorithm
if ( is.null(update.order) ) {
o = rep(0:(p-1), max.iter)
} else if (is.numeric(update.order)) {
o = rep(update.order - 1, max.iter)
} else if (update.order == "random") {
o = random_order(p, max.iter)
}
out = caisa_cpp (data$X, w, sa2, pi, data$beta, r, sigma2, o,
max.iter, min.iter, tol$convtol, tol$epstol,
method_q, update.pi, update.sigma2, verbose)
## Convert to plain numeric vectors (drop matrix dim from Armadillo)
out$beta = c(out$beta)
out$pi = c(out$pi)
out$sigma2 = c(out$sigma2)
if (method_q == "sigma_scaled_beta") {
out$beta = out$beta * sqrt(out$sigma2)
}
## polish return object
out$intercept = c(data$ZtZiZy - data$ZtZiZX %*% out$beta)
data["beta"] = NULL
out$data = data
out$update.order = o
## rescale beta is needed
if (standardize)
out$beta = out$beta / attr(data$X, "scaled:scale")
class(out) <- c("mr.ash", "list")
## warn if necessary
if (update.pi & out$pi[K] > 1/K) {
warning(sprintf(paste("The mixture proportion associated with the",
"largest prior variance is greater than %0.2e;",
"this indicates that the model fit could be",
"improved by using a larger setting of the",
"prior variance. Consider increasing the range",
"of the variances \"sa2\"."),1/K))
}
return(out)
}
#' @title Extract Regression Coefficients from Mr.ASH Fit
#'
#' @description Retrieve posterior mean estimates of the regression
#' coefficients in a Mr.ASH model.
#'
#' @param object A Mr.ASH fit, usually the result of calling
#' \code{mr.ash}.
#'
#' @param ... Additional arguments passed to the default S3 method.
#'
#' @return A p+1 vector. The first element gives the estimated
#' intercept, and the remaining p elements are the estimated
#' regression coefficients.
#'
#' ## generate synthetic data
#' set.seed(1)
#' n = 200
#' p = 300
#' X = matrix(rnorm(n*p),n,p)
#' beta = double(p)
#' beta[1:10] = 1:10
#' y = X %*% beta + rnorm(n)
#'
#' ## fit mr.ash
#' fit.mr.ash = mr.ash(X, y)
#'
#' ## coefficient
#' coef.mr.ash = coef(fit.mr.ash)
#' intercept = coef.mr.ash[1]
#' beta = coef.mr.ash[-1]
#'
#' @importFrom stats coef
#'
#' @export coef.mr.ash
#'
#' @export
#'
coef.mr.ash = function (object, ...)
c(object$intercept,object$beta)
#' @title Predict Outcomes or Extract Coefficients from Mr.ASH Fit
#'
#' @description This function predicts outcomes (y) given the observed
#' variables (X) and a Mr.ASH model; alternatively, retrieve the
#' estimates of the regression coefficients.
#'
#' @param object A mr_ash fit, usually the result of calling
#' \code{mr.ash}.
#'
#' @param newx The input matrix, of dimension (n,p); each column is a
#' single predictor; and each row is an observation vector. Here, n is
#' the number of samples and p is the number of predictors. When
#' \code{newx} is \code{NULL}, the fitted values for the training data
#' are provided.
#'
#' @param type The type of output. For \code{type = "response"},
#' predicted or fitted outcomes are returned; for \code{type =
#' "coefficients"}, the estimated coefficients are returned.
#'
#' @param ... Additional arguments passed to the default S3 method.
#'
#' @return For \code{type = "response"}, predicted or fitted outcomes
#' are returned; for \code{type = "coefficients"}, the estimated
#' coefficients are returned.
#'
#' @examples
#' ## generate synthetic data
#' set.seed(1)
#' n = 200
#' p = 300
#' X = matrix(rnorm(n*p),n,p)
#' beta = double(p)
#' beta[1:10] = 1:10
#' y = X %*% beta + rnorm(n)
#'
#' ## fit mr.ash
#' fit.mr.ash = mr.ash(X, y)
#'
#' ## predict
#' Xnew = matrix(rnorm(n*p),n,p)
#' ypred = predict(fit.mr.ash, Xnew)
#'
#' @importFrom stats predict
#'
#' @export predict.mr.ash
#'
#' @export
#'
predict.mr.ash = function(object,newx = NULL,
type=c("response","coefficients"),...) {
type <- match.arg(type)
if (type == "coefficients"){
if(!missing(newx))
stop("Do not supply newx when predicting coefficients")
return(coef(object))
}
else if(missing(newx))
return(object$fitted)
else {
if (!all(object$data$Z == 1))
stop("predict.mr.ash is not implemented for covariates Z other than ",
"intercept")
return(drop(object$intercept + newx %*% coef(object)[-1]))
}
}
set_default_tolerance = function(){
epstol = 1e-12
convtol = 1e-4
return ( list(epstol = epstol, convtol = convtol ) )
}
#' @title Approximation Posterior Expectations from Mr.ASH Fit
#'
#' @description Recover the parameters specifying the variational
#' approximation to the posterior distribution of the regression
#' coefficients. To streamline the model fitting implementation, and
#' to reduce memory requirements, \code{\link{mr.ash}} does not store
#' all the parameters needed to specify the approximate posterior.
#'
#' @param fit A Mr.ASH fit obtained, for example, by running
#' \code{mr.ash}.
#'
#' @return A list object with the following elements:
#'
#' \item{phi}{An p x K matrix containing the posterior assignment
#' probabilities, where p is the number of predictors, and K is the
#' number of mixture components. (Each row of \code{phi} should sum to
#' 1.)}
#'
#' \item{m}{An p x K matrix containing the posterior means conditional
#' on assignment to each mixture component.}
#'
#' \item{s2}{An p x K matrix containing the posterior variances
#' conditional on assignment to each mixture component.}
#'
#' @examples
#' ## generate synthetic data
#' set.seed(1)
#' n = 200
#' p = 300
#' X = matrix(rnorm(n*p),n,p)
#' beta = double(p)
#' beta[1:10] = 1:10
#' y = X %*% beta + rnorm(n)
#'
#' ## fit mr.ash
#' fit.mr.ash = mr.ash(X, y)
#'
#' ## recover full posterior
#' full.post = get.full.posterior(fit.mr.ash)
#'
#' @export
#'
get.full.posterior <- function(fit) {
# compute residual
r = fit$data$y - fit$data$X %*% fit$beta
# compute bw and s2
bw = as.vector((t(fit$data$X) %*% r) + fit$data$w * fit$beta)
s2 = fit$sigma2 / outer(fit$data$w, 1/fit$data$sa2, '+')
# compute m, phi
m = bw * s2 / fit$sigma2
phi = -log(1 + outer(fit$data$w,fit$data$sa2))/2 + m * (bw/2/fit$sigma2)
phi = c(fit$pi) * t(exp(phi - apply(phi,1,max)))
phi = t(phi) / colSums(phi)
return (list(phi = phi, m = m, s2 = s2))
}
gibbs.sampling = function(X, y, pi, sa2 = (2^((0:19) / 20) - 1)^2,
max.iter = 1500, burn.in = 500,
standardize = FALSE, intercept = TRUE,
sigma2 = NULL, beta.init = NULL,
verbose = TRUE){
# get sizes
n = nrow(X)
p = ncol(X)
# remove covariates
data = remove_covariate(X, y, NULL, standardize, intercept)
if ( is.null(beta.init) )
data$beta = as.vector(double(p))
else
data$beta = as.vector(beta.init)
# initialize r
r = data$y - data$X %*% data$beta
# sigma2
if ( is.null(sigma2) )
sigma2 = c(var(r))
# precalculate
w = colSums(data$X^2)
data$w = w
# gibbs sampling
out = gibbs.sampling(data$X, w, sa2, pi, data$beta, r, sigma2, max.iter, burn.in, verbose)
out$data = data
out$mu = c(data$ZtZiZy - data$ZtZiZX %*% out$beta)
return (out)
}
var.n = function(x) {
a = x - mean(x)
return (sum(a^2) / length(a))
}
================================================
FILE: R/mr.ash.rss.R
================================================
#' @title Bayesian Multiple Regression with Mixture-of-Normals Prior (RSS)
#'
#' @description This function performs Bayesian multiple regression with a
#' mixture-of-normals prior using summary statistics (RSS: Regression with
#' Summary Statistics). It uses a C++ implementation for efficient computation.
#'
#' @param bhat Numeric vector of observed effect sizes (standardized).
#' @param shat Numeric vector of standard errors of effect sizes.
#' @param R Numeric matrix of the correlation matrix.
#' @param var_y Numeric value of the variance of the outcome.
#' If NULL, it is set to Inf (effects on standardized scale).
#' @param n Integer value of the sample size.
#' @param s0 Numeric vector of prior variances for the mixture components.
#' @param w0 Numeric vector of prior weights for the mixture components.
#' @param sigma2_e Numeric value of the initial error variance estimate.
#' If \code{NULL} (default), initialized to \code{var_y} (matching
#' \code{mr.ash} behavior of using residual variance with zero
#' initialization), or 1 when \code{var_y = Inf}.
#' @param mu1_init Numeric vector of initial values for the posterior mean of
#' the coefficients. Default is \code{numeric(0)} (initialize to zero).
#' @param tol Numeric value of the convergence tolerance. Default is 1e-8.
#' @param max_iter Integer value of the maximum number of iterations.
#' Default is 1e5.
#' @param z Numeric vector of Z-scores. If not provided, computed as
#' \code{bhat / shat}.
#' @param update_w0 Logical value indicating whether to update the mixture
#' weights. Default is TRUE.
#' @param update_sigma Logical value indicating whether to update the error
#' variance. Default is TRUE.
#' @param compute_ELBO Logical value indicating whether to compute the
#' Evidence Lower Bound (ELBO). Default is TRUE.
#' @param standardize Logical value indicating whether to standardize the
#' input data. Default is FALSE.
#'
#' @return A list containing the following components:
#' \describe{
#' \item{beta}{Numeric vector of posterior mean coefficients (same as mu1).}
#' \item{sigma2}{Numeric value of the residual variance (same as sigma2_e).}
#' \item{pi}{Numeric vector of mixture weights (same as w0).}
#' \item{iter}{Integer, number of iterations performed.}
#' \item{varobj}{Numeric vector of ELBO values per iteration.}
#' \item{mu1}{Numeric vector of the posterior mean of the coefficients.}
#' \item{sigma2_1}{Numeric vector of the posterior variance of the coefficients.}
#' \item{w1}{Numeric matrix of the posterior assignment probabilities.}
#' \item{sigma2_e}{Numeric value of the error variance.}
#' \item{w0}{Numeric vector of the mixture weights.}
#' \item{ELBO}{Numeric value of the Evidence Lower Bound (if \code{compute_ELBO = TRUE}).}
#' }
#'
#' @examples
#' # Generate example data
#' set.seed(985115)
#' n <- 350
#' p <- 16
#' sigmasq_error <- 0.5
#' zeroes <- rbinom(p, 1, 0.6)
#' beta.true <- rnorm(p, 1, sd = 4)
#' beta.true[zeroes] <- 0
#'
#' X <- cbind(matrix(rnorm(n * p), nrow = n))
#' X <- scale(X, center = TRUE, scale = FALSE)
#' y <- X %*% matrix(beta.true, ncol = 1) + rnorm(n, 0, sqrt(sigmasq_error))
#' y <- scale(y, center = TRUE, scale = FALSE)
#'
#' # Set the prior
#' K <- 9
#' sigma0 <- c(0.001, .1, .5, 1, 5, 10, 20, 30, .005)
#' omega0 <- rep(1 / K, K)
#'
#' # Calculate summary statistics
#' b.hat <- sapply(1:p, function(j) {
#' summary(lm(y ~ X[, j]))$coefficients[-1, 1]
#' })
#' s.hat <- sapply(1:p, function(j) {
#' summary(lm(y ~ X[, j]))$coefficients[-1, 2]
#' })
#' R.hat <- cor(X)
#' var_y <- var(y)
#' sigmasq_init <- 1.5
#'
#' # Run mr.ash.rss
#' out <- mr.ash.rss(b.hat, s.hat,
#' R = R.hat, var_y = var_y, n = n,
#' sigma2_e = sigmasq_init, s0 = sigma0, w0 = omega0,
#' mu1_init = rep(0, ncol(X)), tol = 1e-8, max_iter = 1e5,
#' update_w0 = TRUE, update_sigma = TRUE, compute_ELBO = TRUE,
#' standardize = FALSE
#' )
#'
#' @export
mr.ash.rss <- function(bhat, shat, R, var_y, n,
s0, w0,
sigma2_e = NULL, mu1_init = numeric(0),
tol = 1e-8, max_iter = 1e5, z = numeric(0),
update_w0 = TRUE, update_sigma = TRUE,
compute_ELBO = TRUE, standardize = FALSE) {
if (is.null(var_y)) var_y <- Inf
if (identical(z, numeric(0))) z <- bhat / shat
# Default sigma2_e: use var_y when available (matches mr.ash behavior of
# initializing to var(residuals) when beta.init is zero, since var(y - X*0) = var(y)).
# When var_y is Inf (standardized scale), default to 1.
if (is.null(sigma2_e)) {
sigma2_e <- if (is.finite(var_y)) var_y else 1
}
result <- mr_ash_rss_cpp(
bhat = bhat, shat = shat, z = z, R = R,
var_y = var_y, n = n, sigma2_e = sigma2_e,
s0 = s0, w0 = w0, mu1_init = mu1_init,
tol = tol, max_iter = max_iter,
update_w0 = update_w0, update_sigma = update_sigma,
compute_ELBO = compute_ELBO, standardize = standardize
)
# Add mr.ash-compatible output names for consistency
result$beta <- c(result$mu1) # posterior mean coefficients
result$sigma2 <- c(result$sigma2_e) # residual variance (scalar)
result$pi <- c(result$w0) # mixture weights
result$iter <- as.integer(c(result$iter)) # iteration count
result$varobj <- c(result$varobj) # ELBO per iteration
return(result)
}
================================================
FILE: R/predict.susie.R
================================================
#' @title Extract regression coefficients from susie fit
#'
#' @param object A susie fit.
#'
#' @param \dots Additional arguments passed to the generic \code{coef}
#' method.
#'
#' @return A p+1 vector, the first element being an intercept, and the
#' remaining p elements being estimated regression coefficients.
#'
#' @importFrom stats coef
#'
#' @method coef susie
#'
#' @export coef.susie
#'
#' @export
#'
coef.susie <- function(object, ...) {
s <- object
# Compute mappable effects
mappable_coef <- colSums(s$alpha * s$mu) / s$X_column_scale_factors
if (!is.null(s$theta)) {
total_coef <- mappable_coef + s$theta / s$X_column_scale_factors
} else {
total_coef <- mappable_coef
}
return(c(s$intercept, total_coef))
}
#' @title Predict outcomes or extract coefficients from susie fit.
#'
#' @param object A susie fit.
#'
#' @param newx A new value for X at which to do predictions.
#'
#' @param type The type of output. For \code{type = "response"},
#' predicted or fitted outcomes are returned; for \code{type =
#' "coefficients"}, the estimated coefficients are returned.
#'
#' @param \dots Other arguments used by generic predict function. These
#' extra arguments are not used here.
#'
#' @return For \code{type = "response"}, predicted or fitted outcomes
#' are returned; for \code{type = "coefficients"}, the estimated
#' coefficients are returned. If the susie fit has intercept =
#' \code{NA} (which is common when using \code{susie_ss}) then
#' predictions are computed using an intercept of 0, and a warning is
#' emitted.
#'
#' @importFrom stats coef
#'
#' @method predict susie
#'
#' @export predict.susie
#'
#' @export
#'
predict.susie <- function(object, newx = NULL,
type = c("response", "coefficients"), ...) {
s <- object
type <- match.arg(type)
if (type == "coefficients") {
if (!missing(newx)) {
stop("Do not supply newx when predicting coefficients")
}
return(coef(s))
}
if (missing(newx)) {
return(s$fitted)
}
if (is.na(s$intercept)) {
warning_message("The prediction assumes intercept = 0")
return(drop(newx %*% coef(s)[-1]))
} else {
return(drop(s$intercept + newx %*% coef(s)[-1]))
}
}
================================================
FILE: R/refinement.R
================================================
#' Block coordinate ascent for iterative model refinement.
#'
#' Generic framework for post-convergence refinement of fitted models.
#' Repeatedly applies a block update (\code{step_fn}) to a fitted model,
#' monitoring ELBO for convergence and stability. Both CS refinement
#' (\code{\link{run_refine}}) and residual variance estimation (mvsusieR)
#' are instances of block coordinate ascent over different parameter blocks.
#'
#' Convergence is reached when either:
#' \itemize{
#' \item \code{step_fn} returns \code{converged = TRUE}
#' (the block update signals no further improvement), or
#' \item the relative ELBO change falls below \code{tol}
#' (ELBO stabilized across block updates).
#' }
#'
#' A warning is issued if the ELBO decreases between iterations.
#'
#' @param model Fitted model (e.g., from \code{susie_workhorse} or
#' \code{mvsusie_workhorse}).
#' @param data Data object passed to \code{step_fn}.
#' @param step_fn A function with signature
#' \code{function(model, data, iter)} that performs one block coordinate
#' update. Must return a named list with elements:
#' \describe{
#' \item{model}{(required) The updated model.}
#' \item{data}{(optional) Updated data object, e.g. after changing
#' residual variance. If \code{NULL}, the data is unchanged.}
#' \item{converged}{(optional) Logical; if \code{TRUE}, stop
#' iterating.}
#' \item{log_msg}{(optional) Character string appended to verbose
#' output.}
#' }
#' @param max_iter Maximum number of block ascent iterations
#' (default 100).
#' @param tol Convergence tolerance for relative ELBO change
#' (default 1e-3).
#' @param verbose If \code{TRUE}, print progress each iteration
#' (default \code{FALSE}).
#'
#' @return The refined model, with \code{model$converged} set to
#' \code{TRUE} or \code{FALSE}.
#'
#' @export
block_coordinate_ascent <- function(model, data, step_fn,
max_iter = 100, tol = 1e-3,
verbose = FALSE) {
prev_elbo <- susie_get_objective(model)
prev_model <- model
converged <- FALSE
for (iter in seq_len(max_iter)) {
result <- step_fn(model, data, iter)
model <- result$model
if (!is.null(result$data)) data <- result$data
current_elbo <- susie_get_objective(model)
elbo_change <- current_elbo - prev_elbo
# If ELBO decreased, the step did not improve the objective.
# Revert to the previous model and treat as converged.
if (elbo_change < 0) {
if (verbose)
message(sprintf(
"Block ascent iter %d: update did not improve ELBO (change=%.4g); ",
iter, elbo_change),
"rejecting update and stopping.")
model <- prev_model
converged <- TRUE
break
}
if (verbose) {
msg <- sprintf("Block ascent iter %d: ELBO=%.4f, change=%.4g",
iter, current_elbo, elbo_change)
if (!is.null(result$log_msg))
msg <- paste0(msg, ", ", result$log_msg)
message(msg)
}
# Convergence: step_fn signals done, or ELBO stabilized
if (isTRUE(result$converged)) {
converged <- TRUE
break
}
elbo_delta <- abs(elbo_change) / max(1, abs(current_elbo))
if (elbo_delta < tol) {
converged <- TRUE
break
}
prev_elbo <- current_elbo
prev_model <- model
}
model$converged <- converged
if (!converged)
warning_message("Block coordinate ascent did not converge in ",
max_iter, " iterations")
return(model)
}
# Credible set refinement via block coordinate ascent.
#
# For each credible set, perturbs prior weights (zeroing out the CS
# variables) and re-fits via a two-step procedure:
# Step 1: fit with zeroed CS weights (explores alternative signals)
# Step 2: re-fit with original weights, warm-started from Step 1
# The best candidate (highest ELBO) is accepted if it improves beyond
# tolerance. This is repeated until no further improvement.
#
# @keywords internal
run_refine <- function(model, data, params) {
if (!is.null(params$model_init))
warning_message("The given model_init is not used in refinement")
pw_s <- extract_prior_weights(model)
# One block coordinate step: try refining each CS, pick best candidate.
cs_refine_step <- function(model, data, iter) {
if (is.null(model$sets) || length(model$sets$cs) == 0)
return(list(model = model, converged = TRUE))
candidates <- list()
for (cs_idx in seq_along(model$sets$cs)) {
# Zero out prior weights for variables in this CS
pw_cs <- pw_s
pw_cs[model$sets$cs[[cs_idx]]] <- 0
if (all(pw_cs == 0)) break
# Step 1: fit with zeroed CS weights (no initialization)
p1 <- params
p1$prior_weights <- reconstruct_full_weights(pw_cs, model$null_weight)
p1$null_weight <- model$null_weight
p1$model_init <- NULL
p1$verbose <- FALSE
p1$track_fit <- FALSE
p1$refine <- FALSE
m1 <- susie_workhorse(data, p1)
# Step 2: re-fit with original weights, warm-started from Step 1
init <- list(alpha = m1$alpha, mu = m1$mu, mu2 = m1$mu2)
class(init) <- "susie"
p2 <- params
p2$prior_weights <- reconstruct_full_weights(pw_s, model$null_weight)
p2$null_weight <- model$null_weight
p2$model_init <- init
p2$verbose <- FALSE
p2$track_fit <- FALSE
p2$refine <- FALSE
candidates <- c(candidates, list(susie_workhorse(data, p2)))
}
if (length(candidates) == 0)
return(list(model = model, converged = TRUE))
elbos <- sapply(candidates, susie_get_objective)
current_elbo <- susie_get_objective(model)
if (max(elbos) - current_elbo > params$tol) {
# Accept best candidate
list(model = candidates[[which.max(elbos)]])
} else {
# No improvement beyond tolerance -- converged
list(model = model, converged = TRUE)
}
}
block_coordinate_ascent(model, data, cs_refine_step,
max_iter = 100, tol = params$tol,
verbose = params$verbose)
}
================================================
FILE: R/rss_lambda_methods.R
================================================
# =============================================================================
# OMEGA OPTIMIZATION TOLERANCES
#
# Named constants for multi-panel mixture weight optimization.
# Collected here to avoid scattered magic numbers.
# =============================================================================
.omega_tol <- list(
convergence = 1e-3, # max|delta omega| to skip future updates
grid_spacing = 0.25, # K=2 warm-start grid resolution
fw_stop = 1e-6, # Frank-Wolfe improvement stopping criterion
fw_max_iter = 5L # Frank-Wolfe max iterations
)
# =============================================================================
# DATA INITIALIZATION & CONFIGURATION
#
# Functions for data object setup, configuration, and preprocessing.
# These prepare data objects for model fitting and handle data-specific
# configurations like unmappable effects.
#
# Functions: configure_data, get_var_y
# =============================================================================
# Configure data
#' @keywords internal
configure_data.rss_lambda <- function(data, params) {
return(configure_data.default(data, params))
}
# Get variance of y
#' @keywords internal
get_var_y.rss_lambda <- function(data, ...) {
return(1)
}
# =============================================================================
# MODEL INITIALIZATION & SETUP
#
# Functions for initializing model objects and setting up initial states.
# These create model matrices, initialize fitted values, and prepare
# the SuSiE model for iterative fitting.
#
# Functions: initialize_susie_model, initialize_fitted, validate_prior, track_ibss_fit
# =============================================================================
# Initialize SuSiE model
#' @keywords internal
initialize_susie_model.rss_lambda <- function(data, params, var_y, ...) {
# Base model
model <- initialize_matrices(data, params, var_y)
# Initialize SinvRj and RjSinvRj
eigen_R <- get_eigen_R(data, model)
D <- eigen_R$values
V <- eigen_R$vectors
Vt <- t(V)
Dinv <- compute_Dinv(model, data)
model$SinvRj <- V %*% (Dinv * D * Vt)
model$RjSinvRj <- colSums(Vt * (Dinv * D^2 * Vt))
return(model)
}
# Initialize fitted values.
#' @keywords internal
initialize_fitted.rss_lambda <- function(data, mat_init) {
return(list(Rz = as.vector(compute_Rv(data, colSums(mat_init$alpha * mat_init$mu)))))
}
# Validate prior variance
#' @keywords internal
validate_prior.rss_lambda <- function(data, params, model, ...) {
return(validate_prior.default(data, params, model, ...))
}
# Track core parameters for tracking
#' @keywords internal
track_ibss_fit.rss_lambda <- function(data, params, model, tracking, iter, elbo, ...) {
return(track_ibss_fit.default(data, params, model, tracking, iter, elbo, ...))
}
# =============================================================================
# SINGLE EFFECT REGRESSION & ELBO
#
# Core functions for single effect regression computation and ELBO calculation.
# These handle the mathematical core of SuSiE including residual computation, SER
# statistics, posterior moments, and log-likelihood calculations for the ELBO.
#
# Functions: compute_residuals, compute_ser_statistics, SER_posterior_e_loglik,
# calculate_posterior_moments, compute_kl, get_ER2, Eloglik, loglik, neg_loglik
# =============================================================================
# Compute residuals for single effect regression
#' @keywords internal
compute_residuals.rss_lambda <- function(data, params, model, l, ...) {
# Remove lth effect from fitted values (scaled by slot weight)
sw_l <- get_slot_weight(model, l)
Rz_without_l <- model$Rz - sw_l * compute_Rv(data, model$alpha[l, ] * model$mu[l, ])
# Store unified residuals in model
model$residuals <- data$z - Rz_without_l
model$fitted_without_l <- Rz_without_l
model$residual_variance <- 1 # RSS lambda uses normalized residual variance
return(model)
}
# Compute SER statistics
#' @keywords internal
compute_ser_statistics.rss_lambda <- function(data, params, model, l, ...) {
signal <- as.vector(crossprod(model$SinvRj, model$residuals))
shat2 <- 1 / model$RjSinvRj
betahat <- signal * shat2
# Optimization parameters
optim_init <- log(max(c(betahat^2 - shat2, 1e-6), na.rm = TRUE))
optim_bounds <- c(-30, 15)
optim_scale <- "log"
return(list(
betahat = betahat,
shat2 = shat2,
optim_init = optim_init,
optim_bounds = optim_bounds,
optim_scale = optim_scale
))
}
# SER posterior expected log-likelihood
#' @keywords internal
SER_posterior_e_loglik.rss_lambda <- function(data, params, model, l) {
Eb <- model$alpha[l, ] * model$mu[l, ]
Eb2 <- model$alpha[l, ] * model$mu2[l, ]
eigen_R <- get_eigen_R(data, model)
V <- eigen_R$vectors
Dinv <- compute_Dinv(model, data)
rR <- compute_Rv(data, model$residuals)
SinvEb <- V %*% (Dinv * crossprod(V, Eb))
return(-0.5 * (-2 * sum(rR * SinvEb) + sum(model$RjSinvRj * Eb2)))
}
# Calculate posterior moments for single effect regression
#' @keywords internal
calculate_posterior_moments.rss_lambda <- function(data, params, model, V, l, ...) {
shat2 <- 1 / model$RjSinvRj
post_var <- V * shat2 / (V + shat2)
signal <- as.vector(crossprod(model$SinvRj, model$residuals))
betahat <- signal * (1 / model$RjSinvRj)
post_mean <- post_var / shat2 * betahat
post_mean2 <- post_var + post_mean^2
# Store posterior moments in model
model$mu[l, ] <- post_mean
model$mu2[l, ] <- post_mean2
return(model)
}
# Calculate KL divergence
#' @keywords internal
compute_kl.rss_lambda <- function(data, params, model, l) {
model <- compute_kl.default(data, params, model, l)
return(model)
}
# Expected squared residuals
#' @keywords internal
get_ER2.rss_lambda <- function(data, model) {
# Eigen decomposition components
eigen_R <- get_eigen_R(data, model)
D <- eigen_R$values
V <- eigen_R$vectors
Dinv <- compute_Dinv(model, data)
# Cached quantities
Vtz <- get_Vtz(data, model)
zbar <- model$zbar
postb2 <- model$diag_postb2
# z^T S^{-1} z (use model z_null_norm2 if omega changed, else data)
# When lambda=0, null-space components are projected out (ignored).
z_null_norm2 <- if (!is.null(model$z_null_norm2)) model$z_null_norm2 else data$z_null_norm2
zSinvz <- sum((Dinv * Vtz) * Vtz)
if (data$lambda > 0) zSinvz <- zSinvz + z_null_norm2 / data$lambda
# -2 zbar^T S^{-1} z
tmp <- V %*% (Dinv * (D * Vtz))
term2 <- -2 * sum(tmp * zbar)
# zbar^T R S^{-1} R zbar
Vtzbar <- crossprod(V, zbar)
term3 <- sum((Vtzbar^2) * (Dinv * D^2))
# RZ2 = sum((Z %*% RSinvR) * Z)
VtZ <- model$Z %*% V
term4 <- sum((VtZ^2) %*% (Dinv * D^2))
# diag(RSinvR)^T postb2
diag_RSinvR <- rowSums((V^2) * rep(Dinv * D^2, each = nrow(V)))
term5 <- sum(diag_RSinvR * postb2)
return(zSinvz + term2 + term3 - term4 + term5)
}
# Expected log-likelihood
#' @keywords internal
Eloglik.rss_lambda <- function(data, model) {
D <- get_eigen_R(data, model)$values
d <- model$sigma2 * D + data$lambda
# When lambda=0, zero eigenvalues give d=0; project out null-space.
d_pos <- d[d > 0]
r_eff <- length(d_pos)
return(-(r_eff / 2) * log(2 * pi) - 0.5 *
sum(log(d_pos)) - 0.5 * get_ER2.rss_lambda(data, model))
}
# Log-likelihood for RSS
#' @keywords internal
loglik.rss_lambda <- function(data, params, model, V, ser_stats, l = NULL, ...) {
# Wakefield ABF using betahat/shat2 from ser_stats (supports inflation)
shat2 <- pmax(ser_stats$shat2, .Machine$double.eps)
lbf <- -0.5 * log(1 + V / shat2) +
0.5 * ser_stats$betahat^2 * V / (shat2 * (V + shat2))
# Stabilize logged Bayes Factor
stable_res <- lbf_stabilization(lbf, model$pi, ser_stats$shat2)
# Compute posterior weights
weights_res <- compute_posterior_weights(stable_res$lpo)
# Store in model if l is provided, otherwise return lbf_model for prior variance optimization
if (!is.null(l)) {
model$alpha[l, ] <- weights_res$alpha
model$lbf[l] <- weights_res$lbf_model
model$lbf_variable[l, ] <- stable_res$lbf
return(model)
} else {
return(weights_res$lbf_model)
}
}
#' @keywords internal
neg_loglik.rss_lambda <- function(data, params, model, V_param, ser_stats, ...) {
# Convert parameter to V based on optimization scale (always log for RSS lambda)
V <- exp(V_param)
lbf_model <- loglik.rss_lambda(data, params, model, V, ser_stats)
return(-lbf_model)
}
# =============================================================================
# MODEL UPDATES & FITTING
#
# Functions for iterative model updates and variance component estimation.
# These handle the dynamic aspects of model fitting including fitted value
# updates and variance component estimation.
#
# Functions: update_fitted_values, update_variance_components, update_derived_quantities
# =============================================================================
# Update fitted values
#' @keywords internal
update_fitted_values.rss_lambda <- function(data, params, model, l, ...) {
# Add back lth effect (scaled by slot weight)
sw_l <- get_slot_weight(model, l)
model$Rz <- model$fitted_without_l + sw_l *
as.vector(compute_Rv(data, model$alpha[l, ] * model$mu[l, ]))
model <- precompute_rss_lambda_terms(data, model)
return(model)
}
# Update model variance
#' @keywords internal
update_model_variance.rss_lambda <- function(data, params, model) {
if (!isTRUE(params$estimate_residual_variance)) return(model)
variance_result <- update_variance_components(data, params, model)
model <- modifyList(model, variance_result)
model$sigma2 <- min(max(model$sigma2, params$residual_variance_lowerbound),
params$residual_variance_upperbound)
model <- update_derived_quantities(data, params, model)
return(model)
}
# Update variance components
#' @keywords internal
#' @importFrom stats optimize
update_variance_components.rss_lambda <- function(data, params, model, ...) {
if (!isTRUE(params$estimate_residual_variance)) return(list())
upper_bound <- 1 - data$lambda
objective <- function(sigma2) {
temp_model <- model
temp_model$sigma2 <- sigma2
Eloglik.rss_lambda(data, temp_model)
}
est_sigma2 <- optimize(objective, interval = c(1e-4, upper_bound),
maximum = TRUE)$maximum
if (objective(est_sigma2) < objective(upper_bound))
est_sigma2 <- upper_bound
list(sigma2 = est_sigma2)
}
# Update derived quantities
#' @keywords internal
update_derived_quantities.rss_lambda <- function(data, params, model) {
eigen_R <- get_eigen_R(data, model)
Dinv <- compute_Dinv(model, data)
V <- eigen_R$vectors
D <- eigen_R$values
Vt <- t(V)
# Update SinvRj and RjSinvRj
model$SinvRj <- V %*% (Dinv * D * Vt)
model$RjSinvRj <- colSums(Vt * (Dinv * (D^2) * Vt))
return(model)
}
# =============================================================================
# OUTPUT GENERATION & POST-PROCESSING
#
# Functions for generating final results and summary statistics.
# These process fitted models into interpretable outputs including
# credible sets, variable names, and fitted values.
#
# Functions: get_scale_factors, get_intercept, get_fitted, get_cs,
# get_variable_names, get_zscore
# =============================================================================
# Get scale factors
#' @keywords internal
get_scale_factors.rss_lambda <- function(data, params) {
return(rep(1, data$p))
}
# Get intercept
#' @keywords internal
get_intercept.rss_lambda <- function(data, params, model, ...) {
return(data$intercept_value)
}
# Get fitted values
#' @keywords internal
get_fitted.rss_lambda <- function(data, params, model, ...) {
return(get_fitted.default(data, params, model, ...))
}
# Get credible sets
#' @keywords internal
get_cs.rss_lambda <- function(data, params, model, ...) {
if (is.null(params$coverage) || is.null(params$min_abs_corr)) {
return(NULL)
}
if (!is.null(data$X)) {
return(susie_get_cs(model,
X = data$X,
coverage = params$coverage,
min_abs_corr = params$min_abs_corr,
n_purity = params$n_purity))
}
return(susie_get_cs(model,
Xcorr = safe_cov2cor(data$R),
check_symmetric = FALSE,
coverage = params$coverage,
min_abs_corr = params$min_abs_corr,
n_purity = params$n_purity))
}
# Get variable names
#' @keywords internal
get_variable_names.rss_lambda <- function(data, model, ...) {
return(assign_names(data, model, names(data$z)))
}
# Get univariate z-scores
#' @keywords internal
get_zscore.rss_lambda <- function(data, params, model, ...) {
return(get_zscore.default(data, params, model))
}
# Clean up model object for RSS lambda data
#' @keywords internal
cleanup_model.rss_lambda <- function(data, params, model, ...) {
# Remove common fields
model <- cleanup_model.default(data, params, model, ...)
# Remove RSS-lambda-specific temporary fields
rss_fields <- c("SinvRj", "RjSinvRj", "Rz", "Z", "zbar", "diag_postb2",
"eigen_R", "Vtz", "z_null_norm2",
"residuals", "fitted_without_l", "residual_variance")
for (field in rss_fields) {
if (field %in% names(model)) {
model[[field]] <- NULL
}
}
return(model)
}
================================================
FILE: R/rss_mismatch.R
================================================
# RSS R-reference mismatch handling.
#
# Single home for code that targets the discrepancy between the
# supplied R reference and the target population. Active on the SS
# / ss_mixture dispatches; the rss_lambda dispatch (lambda > 0) does
# NOT use any of this (entry-level errors block lambda > 0 with
# R_finite or R_mismatch != "none").
#
# * 1-D MAP optimizer for the variance component lambda_bias
# (estimate_lambda_bias)
# * per-variable inflation factor used inside the SER step
# (compute_shat2_inflation)
# * model-state storage helper for per-slot inflation diagnostics
# (apply_inflation_state)
# * per-sweep region-level fit (fit_R_mismatch)
# * residual R-mismatch QC diagnostic Q_art (R_mismatch = "map_qc")
#
# Storage convention on the model:
# model$lambda_bias scalar set once per sweep by fit_R_mismatch
# model$B_corrected 1 / (1/B + lambda_bias)
# model$shat2_inflation per-variable inflation vector of length p,
# consumed by the SER step.
# =============================================================================
# FINITE-REFERENCE SETUP AND DIAGNOSTICS
# =============================================================================
# Resolve R_finite into an explicit reference sample size B.
# R_finite = TRUE is only meaningful when the reference factor X is available;
# for precomputed R, the caller must provide B explicitly.
#' @keywords internal
resolve_R_finite <- function(R_finite, X = NULL, is_multi_panel = FALSE) {
if (is.null(R_finite))
return(NULL)
if (isTRUE(R_finite)) {
if (is.null(X))
stop("R_finite = TRUE requires X input. ",
"When using a precomputed R matrix, provide a positive number ",
"specifying the reference sample size B instead.")
if (is_multi_panel)
return(min(vapply(X, nrow, integer(1))))
return(nrow(X))
}
if (!is.numeric(R_finite) || any(!is.finite(R_finite)) ||
any(R_finite <= 0)) {
stop("R_finite must be NULL, TRUE, or positive numeric value(s).")
}
if (is_multi_panel) {
K <- if (is.null(X)) length(R_finite) else length(X)
if (length(R_finite) == 1)
return(rep(as.numeric(R_finite), K))
if (length(R_finite) == K)
return(as.numeric(R_finite))
stop("For multi-panel input, R_finite must be TRUE, a single positive ",
"number, or one positive number per panel.")
}
if (length(R_finite) == 1)
return(as.numeric(R_finite))
stop("R_finite must be NULL, TRUE, or a single positive number.")
}
# Compute finite-reference R diagnostics (debiased Frobenius norm,
# effective rank, r/B ratio, per-variant diagonal deviation from 1).
# Used by both summary_stats_constructor and rss_lambda_constructor.
#
# @param X Factor matrix (B x p), or NULL.
# @param R Precomputed R matrix (p x p), or NULL.
# @param B Reference panel sample size.
# @param p Number of variants.
# @param x_is_standardized If TRUE, X has been standardized so X'X = R_hat
# directly (no normalization). If FALSE, R_hat = X'X/B so the Frobenius
# norm needs a /B^2 correction.
# @return List with B, p, R_frob_sq_debiased, effective_rank, r_over_B,
# Rhat_diag_deviation.
#' @keywords internal
compute_R_finite_diagnostics <- function(X = NULL, R = NULL, B, p,
x_is_standardized = FALSE) {
if (!is.null(X)) {
A <- tcrossprod(X) # B x B Gram matrix
R_frob_sq <- sum(A * A) # ||XX'||_F^2 = ||X'X||_F^2
if (!x_is_standardized)
R_frob_sq <- R_frob_sq / nrow(X)^2
Rhat_diag <- colSums(X^2)
if (!x_is_standardized)
Rhat_diag <- Rhat_diag / nrow(X)
} else if (!is.null(R)) {
R_frob_sq <- sum(R * R)
Rhat_diag <- diag(R)
} else {
R_frob_sq <- p # identity fallback
Rhat_diag <- rep(1, p)
}
# Debiased Frobenius norm (Ledoit-Wolf unbiased estimator)
R_frob_sq_db <- (B * R_frob_sq - p^2) / (B + 1)
eff_rank <- p^2 / max(R_frob_sq_db, 1)
list(
B = B,
p = p,
R_frob_sq_debiased = R_frob_sq_db,
effective_rank = eff_rank,
r_over_B = eff_rank / B,
Rhat_diag_deviation = abs(Rhat_diag - 1)
)
}
# =============================================================================
# 1-D MAP OPTIMIZER FOR lambda_bias
# =============================================================================
# Estimate extra R-bias variance beyond finite-reference uncertainty.
# Likelihood on the z-score residual scale,
# tau_j^2 = sigma2 + (1/R_finite_B + lambda_bias) * s_j,
# with a half-Cauchy(prior_scale) prior on u = sqrt(lambda_bias).
# The Fisher-information boundary SE,
# SE_0 = sqrt(2) * sigma2 / sqrt(sum(s^2)),
# defines a data-driven floor: estimates below 0.1 * SE_0 are zeroed.
# This both suppresses Brent boundary noise and replaces ad-hoc display
# thresholds with one rule; "none" short-circuits before optimization.
#' @keywords internal
estimate_lambda_bias <- function(r, s, sigma2, R_finite_B, method) {
if (is.null(method) || method == "none")
return(0)
keep <- is.finite(r) & is.finite(s) & s > .Machine$double.eps
if (!any(keep) || !is.finite(sigma2) || sigma2 <= .Machine$double.eps)
return(0)
cache <- list(r2 = r[keep]^2, s = s[keep])
cache$base <- sigma2 + cache$s / R_finite_B
pos <- (cache$r2 - cache$base) / cache$s
pos <- pos[is.finite(pos) & pos > 0]
prior_scale <- sqrt(max(1 / R_finite_B, 1 / 10000))
upper_lambda <- max(c(1, 100 / R_finite_B, 100 * prior_scale^2,
10 * pos), na.rm = TRUE)
upper_u <- sqrt(upper_lambda)
nll <- function(u) {
lambda_bias <- u^2
tau <- cache$base + lambda_bias * cache$s
0.5 * sum(log(tau) + cache$r2 / tau) + log1p((u / prior_scale)^2)
}
lambda_hat <- optimize(nll, interval = c(0, upper_u))$minimum^2
ss2 <- sum(cache$s^2)
if (ss2 <= 0) return(0)
se_boundary <- sqrt(2) * sigma2 / sqrt(ss2)
if (lambda_hat < 0.1 * se_boundary) 0 else lambda_hat
}
# =============================================================================
# PER-VARIABLE INFLATION
# =============================================================================
# SS-path per-variable inflation factor tau_j^2 / sigma2 with
# tau_j^2 = sigma2 + (1/R_finite_B + lambda_bias) * (eta_j^2 + v_g),
# eta_j^2 = XtXr_without_l[j]^2 / (n-1) (z-score scale)
# v_g = sum(b_minus_l * XtXr_without_l).
# Reads the region-level scalar lambda_bias from model (set once per
# sweep by fit_R_mismatch). Per-slot lambda_bias re-fitting was removed:
# the previous design re-estimated lambda_bias inside every SER step
# from the leave-one-effect residual, which intentionally contains the
# lth sparse signal and so confounded signal with R-bias. The fix is
# the per-sweep fit_R_mismatch hook in ibss_fit; this function only
# applies the scalar to the slot-specific xi_l.
# Returns NULL when no inflation applies, otherwise a list with the
# inflation vector and lambda_bias / B_corrected = NULL so that
# apply_inflation_state does not write per-slot diagnostics on the SS
# path (those are scalars on the model, set by fit_R_mismatch).
#' @keywords internal
compute_shat2_inflation <- function(data, model, XtXr_without_l, b_minus_l, r) {
R_finite_B <- if (!is.null(model$R_finite_B)) model$R_finite_B else data$R_finite_B
if (is.null(R_finite_B) ||
model$sigma2 <= .Machine$double.eps) {
return(NULL)
}
v_g <- max(sum(b_minus_l * XtXr_without_l), 0)
eta2 <- XtXr_without_l^2 / (data$n - 1)
s <- eta2 + v_g
lambda_bias <- if (is.null(model$lambda_bias)) 0 else model$lambda_bias
infl <- 1 + (1 / R_finite_B + lambda_bias) * s / model$sigma2
list(infl = infl, lambda_bias = NULL, B_corrected = NULL)
}
# =============================================================================
# MODEL-STATE STORAGE FOR PER-SLOT INFLATION DIAGNOSTICS
# =============================================================================
# Unpack the inflation list from compute_shat2_inflation into the model.
# Sets model$shat2_inflation to the per-variant inflation vector. The
# per-slot writes to model$lambda_bias[l] / model$B_corrected[l] gated
# below are dormant in the current code: SS / ss_mixture callers always
# pass infl_state$lambda_bias = NULL (the scalar lambda_bias is set
# once per sweep by fit_R_mismatch), and the rss_lambda path no longer
# calls this function. The per-slot machinery is retained as inert
# back-compat shim and will be removed when the next constructor pass
# converges on a single storage shape.
#' @keywords internal
apply_inflation_state <- function(model, infl_state, l) {
if (is.null(infl_state)) {
model$shat2_inflation <- NULL
return(model)
}
model$shat2_inflation <- infl_state$infl
L <- nrow(model$alpha)
if (!is.null(infl_state$lambda_bias)) {
if (is.null(model$lambda_bias) || length(model$lambda_bias) != L)
model$lambda_bias <- rep(0, L)
model$lambda_bias[l] <- infl_state$lambda_bias
}
if (!is.null(infl_state$B_corrected)) {
if (is.null(model$B_corrected) || length(model$B_corrected) != L)
model$B_corrected <- rep(NA_real_, L)
model$B_corrected[l] <- infl_state$B_corrected
}
model
}
# =============================================================================
# PER-SWEEP REGION-LEVEL fit_R_mismatch
# =============================================================================
#' Fit the region-level lambda_bias from the post-sweep fitted residual.
#'
#' Math (see archive/ld_mismatch_generativemodel.tex):
#' beta_bar = colSums(slot_weight * alpha * mu) (full posterior mean, betahat scale)
#' XtXr_full = X'X * beta_bar = (n-1) * R * beta_bar
#' r_fit = (data$Xty - XtXr_full) / sqrt(n-1) (z-scale fitted residual)
#' eta_fit_j^2 = XtXr_full[j]^2 / (n-1) (z-scale per-variant signal)
#' v_g_fit = sum(beta_bar * XtXr_full) (= beta_bar_z' R beta_bar_z)
#' xi_fit_j = eta_fit_j^2 + v_g_fit
#' MAP for lambda_bias on the working likelihood
#' r_fit_j ~ N(0, sigma2 + (1/B + lambda) * xi_fit_j)
#' with half-Cauchy(scale = sqrt(max(1/B, 1e-4))) prior on sqrt(lambda).
#' Fisher SE zero-mask applied (see estimate_lambda_bias).
#'
#' Replaces the per-slot re-fit that used to live inside
#' compute_shat2_inflation, which estimated lambda_bias from the
#' intra-sweep r_full_z that drifts as the slot loop progresses.
#'
#' For mode = "map_qc" the same lambda_bias fit is followed by the
#' Q_art residual artifact diagnostic; see compute_Q_art. The
#' The artifact diagnostic emits an R warning when flagged; it does
#' not change lambda_bias or the SER likelihood.
#'
#' @keywords internal
#' @noRd
fit_R_mismatch <- function(data, params, model) {
R_mismatch <- if (!is.null(params$R_mismatch)) params$R_mismatch else "none"
if (R_mismatch == "none") return(model)
R_finite_B <- if (!is.null(model$R_finite_B)) model$R_finite_B else data$R_finite_B
if (is.null(R_finite_B) || !is.finite(model$sigma2) ||
model$sigma2 <= .Machine$double.eps)
return(model)
if (!inherits(data, c("ss", "ss_mixture"))) return(model)
sw <- if (!is.null(model$slot_weights)) model$slot_weights else
rep(1, nrow(model$alpha))
b_full <- colSums(sw * model$alpha * model$mu)
XtXr_full <- if (!is.null(model$XtXr))
model$XtXr
else compute_Rv(data, b_full)
nm1 <- if (!is.null(data$nm1)) data$nm1 else (data$n - 1)
if (!is.finite(nm1) || nm1 <= 0) return(model)
r_fit_z <- (data$Xty - XtXr_full) / sqrt(nm1)
v_g_full <- max(sum(b_full * XtXr_full), 0)
s_full <- XtXr_full^2 / nm1 + v_g_full
model$lambda_bias <- estimate_lambda_bias(r_fit_z, s_full, model$sigma2,
R_finite_B, R_mismatch)
model$B_corrected <- 1 / (1 / R_finite_B + model$lambda_bias)
if (R_mismatch == "map_qc") {
eigen_R <- get_R_mismatch_eigen(data, model)
if (is.null(eigen_R))
stop("R_mismatch = 'map_qc' requires data$eigen_R; ",
"summary_stats_constructor should have cached it.")
eig_delta_rel <- if (!is.null(params$eig_delta_rel))
params$eig_delta_rel else 1e-3
eig_delta_abs <- if (!is.null(params$eig_delta_abs))
params$eig_delta_abs else 0
art <- compute_Q_art(eigen_R, r_fit_z, eig_delta_rel, eig_delta_abs)
threshold <- if (!is.null(params$artifact_threshold))
params$artifact_threshold else 0.1
flagged <- isTRUE(art$evaluable) && isTRUE(art$Q_art > threshold)
model$Q_art <- art$Q_art
model$artifact_evaluable <- art$evaluable
model$artifact_flag <- flagged
model$low_eigen_count <- art$low_eigen_count
model$low_eigen_fraction <- art$low_eigen_count /
length(eigen_R$values)
model$eig_delta <- art$eig_delta
if (flagged) {
msg <- paste0("Residual R-bias artifact detected (Q_art = ",
sprintf("%.3g", art$Q_art),
" > threshold ", sprintf("%.3g", threshold),
"). Fine-mapping results may be unreliable with ",
"this R reference. Consider allele/QC review, ",
"multi-reference analysis, or conservative fallback.")
model$mode_label <- "warning"
warning_message(msg)
warning(msg, call. = FALSE)
} else {
model$mode_label <- "normal"
}
}
model
}
# Eigen accessor for map_qc. The ordinary SS path stores data$eigen_R.
# The ss_mixture path can change R through omega, so recover the current
# mixture spectrum from panel_R when omega is available; otherwise fall
# back to the initialized X_meta crossproduct.
#' @keywords internal
get_R_mismatch_eigen <- function(data, model) {
if (!is.null(model$eigen_R))
return(model$eigen_R)
if (!is.null(data$eigen_R) && !inherits(data, "ss_mixture"))
return(data$eigen_R)
if (inherits(data, "ss_mixture")) {
if (!is.null(model$omega) && !is.null(data$omega_cache)) {
eig <- eigen_from_reduced(data$omega_cache, model$omega,
data$K, data$p)
eig$values <- pmax(eig$values, 0)
return(eig)
}
if (!is.null(model$omega) && !is.null(data$panel_R)) {
R_mix <- Reduce("+", Map(function(w, R) w * R, model$omega, data$panel_R))
R_mix <- 0.5 * (R_mix + t(R_mix))
eig <- eigen(R_mix, symmetric = TRUE)
eig$values <- pmax(eig$values, 0)
return(eig)
}
if (!is.null(data$X)) {
R_init <- crossprod(data$X) / data$nm1
R_init <- 0.5 * (R_init + t(R_init))
eig <- eigen(R_init, symmetric = TRUE)
eig$values <- pmax(eig$values, 0)
return(eig)
}
}
data$eigen_R
}
# =============================================================================
# Q_art residual R-bias artifact diagnostic
# =============================================================================
# Fraction of fitted-residual energy in low-eigenvalue directions of R.
# delta = max(eig_delta_abs, eig_delta_rel * max(d))
# A_delta = {k : d_k <= delta}
# Q_art = sum_{k in A_delta} (v_k' r_fit)^2 / sum(r_fit^2)
# When the fitted residual is well-explained by R, energy in
# low-eigenvalue directions is near the noise floor. A large Q_art
# flags residual structure that the supplied reference says should be
# weak or absent; allele/strand flips should still be checked by
# kriging-style diagnostics.
#
# Returns a list with Q_art (in [0, 1]), evaluable (FALSE when no
# low-eigenvalues exist or r_fit has negligible energy),
# low_eigen_count, eig_delta. Q_art is a heuristic proportion, not a
# calibrated test statistic; see archive/ld_mismatch_generativemodel.tex
# Sec. "Detecting residual R-bias artifacts".
#' @keywords internal
compute_Q_art <- function(eigen_R, r_fit, eig_delta_rel = 1e-3,
eig_delta_abs = 0,
residual_energy_floor = 1e-12) {
d <- eigen_R$values
V <- eigen_R$vectors
delta <- max(eig_delta_abs, eig_delta_rel * max(d))
A_delta <- which(d <= delta)
rss <- sum(r_fit^2)
if (length(A_delta) == 0L || rss <= residual_energy_floor) {
return(list(Q_art = 0, evaluable = FALSE,
low_eigen_count = length(A_delta), eig_delta = delta))
}
proj <- as.numeric(crossprod(V[, A_delta, drop = FALSE], r_fit))
Q <- sum(proj^2) / rss
list(Q_art = Q, evaluable = TRUE,
low_eigen_count = length(A_delta), eig_delta = delta)
}
================================================
FILE: R/single_effect_regression.R
================================================
# =============================================================================
# SINGLE EFFECT REGRESSION
#
# Performs single effect regression for the lth effect in the SuSiE model.
# Computes posterior moments, log Bayes factors, and optimizes prior variance.
# =============================================================================
#'
#' @param data Data object (individual, ss, or rss_lambda)
#' @param params Validated params object
#' @param model Current SuSiE model object
#' @param l Effect index being updated
#'
#' @return Updated model with alpha, mu, mu2, lbf, lbf_variable, V, and KL stored for the lth effect
#'
#' @keywords internal
#' @noRd
single_effect_regression <- function(data, params, model, l) {
# Fixed mixture prior path: evaluate BFs on a pre-specified variance grid
# with given mixture weights, bypassing scalar V optimization entirely.
# Activated by estimate_prior_method = "fixed_mixture" with non-NULL
# prior_variance_grid and mixture_weights
gitextract_f5azyrwd/
├── .Rbuildignore
├── .github/
│ ├── dependabot.yml
│ ├── rattler-build_container.df
│ ├── recipe/
│ │ ├── recipe.yaml
│ │ ├── variant_r44.yaml
│ │ └── variant_r45.yaml
│ └── workflows/
│ ├── ci.yml
│ ├── conda_build.yml
│ ├── dispatch_pkgdown_build.yml
│ └── release.yml
├── .gitignore
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── Makefile
├── NAMESPACE
├── R/
│ ├── cpp11.R
│ ├── diagnosis_reports.R
│ ├── example_dataset.R
│ ├── generic_methods.R
│ ├── individual_data_methods.R
│ ├── iterative_bayesian_stepwise_selection.R
│ ├── mixture_prior.R
│ ├── model_methods.R
│ ├── mr.ash.R
│ ├── mr.ash.rss.R
│ ├── predict.susie.R
│ ├── refinement.R
│ ├── rss_lambda_methods.R
│ ├── rss_mismatch.R
│ ├── single_effect_regression.R
│ ├── slot_prior.R
│ ├── sparse_multiplication.R
│ ├── ss_mixture_methods.R
│ ├── sufficient_stats_methods.R
│ ├── summary.susie.R
│ ├── susie.R
│ ├── susieR-package.R
│ ├── susie_auto.R
│ ├── susie_constructors.R
│ ├── susie_get_functions.R
│ ├── susie_plot.R
│ ├── susie_post_outcome_configuration.R
│ ├── susie_rss_utils.R
│ ├── susie_trendfilter.R
│ ├── susie_trendfilter_utils.R
│ ├── susie_utils.R
│ ├── susie_workhorse.R
│ └── univariate_regression.R
├── README.md
├── _pkgdown.yml
├── data/
│ ├── FinemappingConvergence.RData
│ ├── N2finemapping.RData
│ ├── N3finemapping.RData
│ ├── SummaryConsistency.RData
│ ├── data_small.RData
│ └── unmappable_data.RData
├── inst/
│ ├── CITATION
│ ├── analysis/
│ │ ├── optimize.Rmd
│ │ ├── test_susie_auto.Rmd
│ │ └── testing.Rmd
│ ├── code/
│ │ ├── caviar.R
│ │ ├── compute_ss_memory.R
│ │ ├── dap-g.py
│ │ ├── finemap.R
│ │ ├── finemap_1p4.R
│ │ ├── gen_original_results.R
│ │ ├── monitor_memory.py
│ │ ├── python_example/
│ │ │ ├── N3finemapping_python.ipynb
│ │ │ └── environment.yml
│ │ ├── simulate_lambda_pop_ld_bias.R
│ │ ├── small_sim.R
│ │ ├── sparse_matrix_strategy.Rmd
│ │ ├── summarize_small_sim.R
│ │ ├── susie_memory.R
│ │ └── susie_rss_memory.R
│ ├── datafiles/
│ │ ├── FinemappingConvergence1k.RData
│ │ ├── N3finemapping.CAVIAR.RData
│ │ ├── N3finemapping.DAP.RData
│ │ ├── N3finemapping.FINEMAP.RData
│ │ ├── SummaryConsistency1k.RData
│ │ └── small_sim_out_v0.14.48.RData
│ ├── misc/
│ │ ├── README_susie_v2.md
│ │ ├── format_r_code.sh
│ │ ├── post-commit.sh
│ │ ├── pre-commit.sh
│ │ └── uncrustify_default.cfg
│ └── notebooks/
│ ├── benchmark_mix_vs_sp.R
│ ├── small_sample_benchmark.ipynb
│ └── stochastic_ld_benchmark.ipynb
├── man/
│ ├── FinemappingConvergence.Rd
│ ├── N2finemapping.Rd
│ ├── N3finemapping.Rd
│ ├── SummaryConsistency.Rd
│ ├── absolute.order.Rd
│ ├── add_delta_features.Rd
│ ├── block_coordinate_ascent.Rd
│ ├── calculate_posterior_moments_mixture_common.Rd
│ ├── check_alpha_pip_cycle_convergence.Rd
│ ├── cleanup_extra_fields.Rd
│ ├── coef.mr.ash.Rd
│ ├── coef.susie.Rd
│ ├── collect_ash_diag.Rd
│ ├── compare_ash_methods.Rd
│ ├── compute_marginal_bhat_shat.Rd
│ ├── compute_suff_stat.Rd
│ ├── data_small.Rd
│ ├── diagnose_ash_filter_archived_iter.Rd
│ ├── diagnose_bb_ash_iter.Rd
│ ├── estimate_s_rss.Rd
│ ├── extract_bb_ash_features.Rd
│ ├── format_extra_diag.Rd
│ ├── format_sigma2_summary.Rd
│ ├── get.full.posterior.Rd
│ ├── get_alpha_l.Rd
│ ├── get_cs_correlation.Rd
│ ├── get_objective.Rd
│ ├── get_posterior_mean_l.Rd
│ ├── get_posterior_mean_sum.Rd
│ ├── get_posterior_moments_l.Rd
│ ├── get_prior_variance_l.Rd
│ ├── get_slot_weight.Rd
│ ├── ibss_finalize.Rd
│ ├── ibss_initialize.Rd
│ ├── is_symmetric_matrix.Rd
│ ├── kriging_rss.Rd
│ ├── label_diag_truth.Rd
│ ├── loglik_mixture_common.Rd
│ ├── mr.ash.Rd
│ ├── mr.ash.rss.Rd
│ ├── path.order.Rd
│ ├── post_loglik_prior_hook.Rd
│ ├── pre_loglik_prior_hook.Rd
│ ├── predict.mr.ash.Rd
│ ├── predict.susie.Rd
│ ├── print.summary.susie_post_outcome_configuration.Rd
│ ├── resolve_mixture_prior.Rd
│ ├── safe_cor.Rd
│ ├── safe_cov2cor.Rd
│ ├── scale_design_matrix.Rd
│ ├── set_prior_variance_l.Rd
│ ├── slot_prior_betabinom.Rd
│ ├── summary.susie.Rd
│ ├── summary.susie_post_outcome_configuration.Rd
│ ├── susie.Rd
│ ├── susieR-package.Rd
│ ├── susie_auto.Rd
│ ├── susie_get_methods.Rd
│ ├── susie_init_coef.Rd
│ ├── susie_plot_changepoint.Rd
│ ├── susie_plots.Rd
│ ├── susie_post_outcome_configuration.Rd
│ ├── susie_rss.Rd
│ ├── susie_rss_lambda.Rd
│ ├── susie_ss.Rd
│ ├── susie_trendfilter.Rd
│ ├── susie_workhorse.Rd
│ ├── univar.order.Rd
│ ├── univariate_regression.Rd
│ └── unmappable_data.Rd
├── pixi.toml
├── src/
│ ├── Makevars
│ ├── Makevars.win
│ ├── caisa.cpp
│ ├── cpp11.cpp
│ ├── mr_ash.h
│ ├── mr_ash_rss.cpp
│ └── mr_ash_rss.h
├── tests/
│ ├── README.md
│ ├── testthat/
│ │ ├── helper_nig_reference.R
│ │ ├── helper_reference.R
│ │ ├── helper_testthat.R
│ │ ├── reference/
│ │ │ ├── test_susie_auto_reference.R
│ │ │ ├── test_susie_nig_reference.R
│ │ │ ├── test_susie_reference.R
│ │ │ ├── test_susie_rss_lambda_reference.R
│ │ │ ├── test_susie_rss_reference.R
│ │ │ └── test_susie_ss_reference.R
│ │ ├── test_X_centering.R
│ │ ├── test_coef_predict.R
│ │ ├── test_compute_marginal_bhat_shat.R
│ │ ├── test_generic_methods.R
│ │ ├── test_ibss.R
│ │ ├── test_individual_data_methods.R
│ │ ├── test_l_greedy.R
│ │ ├── test_mixture_prior.R
│ │ ├── test_mr_ash_equivalence.R
│ │ ├── test_plotting.R
│ │ ├── test_post_outcome_configuration_summary.R
│ │ ├── test_refinement.R
│ │ ├── test_rss_lambda_methods.R
│ │ ├── test_rss_mismatch.R
│ │ ├── test_rss_utils.R
│ │ ├── test_single_effect_regression.R
│ │ ├── test_slot_prior.R
│ │ ├── test_slot_weights.R
│ │ ├── test_sparse_multiplication.R
│ │ ├── test_sufficient_stats_methods.R
│ │ ├── test_summary_print.R
│ │ ├── test_susie.R
│ │ ├── test_susie_ash_ss_equivalence.R
│ │ ├── test_susie_auto.R
│ │ ├── test_susie_constructors.R
│ │ ├── test_susie_get_functions.R
│ │ ├── test_susie_small.R
│ │ ├── test_susie_utils.R
│ │ ├── test_susie_workhorse.R
│ │ ├── test_trendfilter.R
│ │ └── test_univariate_regression.R
│ └── testthat.R
└── vignettes/
├── announcements.Rmd
├── finemapping.Rmd
├── finemapping_summary_statistics.Rmd
├── l0_initialization.Rmd
├── mwe.Rmd
├── small_sample.Rmd
├── sparse_susie_eval.Rmd
├── susie_refine.Rmd
├── susie_rss.Rmd
├── susie_unmappable_effects.Rmd
├── susierss_diagnostic.Rmd
└── trend_filtering.Rmd
SYMBOL INDEX (29 symbols across 7 files)
FILE: inst/code/dap-g.py
function write_dap_full (line 7) | def write_dap_full(x,y,prefix,r):
function run_dap_full (line 13) | def run_dap_full(prefix, args):
function write_dap_ss (line 17) | def write_dap_ss(z,prefix):
function run_dap_z (line 23) | def run_dap_z(ld, prefix, args):
function extract_dap_output (line 27) | def extract_dap_output(prefix):
function dap_single (line 49) | def dap_single(x, y, prefix, r, args):
function dap_single_z (line 54) | def dap_single_z(z, ld, prefix, args):
function dap_batch (line 59) | def dap_batch(X, Y, prefix, *args):
function dap_batch_z (line 62) | def dap_batch_z(z, ld, prefix, *args):
FILE: inst/code/monitor_memory.py
class ProcessTimer (line 29) | class ProcessTimer:
method __init__ (line 30) | def __init__(self, command, interval = 1):
method execute (line 35) | def execute(self):
method poll (line 49) | def poll(self):
method is_running (line 95) | def is_running(self):
method check_execution_state (line 98) | def check_execution_state(self):
method close (line 107) | def close(self,kill=False):
function takewhile_excluding (line 118) | def takewhile_excluding(iterable, value = ['|', '<', '>']):
FILE: src/caisa.cpp
function integers (line 9) | [[cpp11::register]]
function caisa_cpp (line 15) | [[cpp11::register]]
FILE: src/cpp11.cpp
function SEXP (line 10) | SEXP _susieR_random_order(SEXP p, SEXP numiter) {
function SEXP (line 17) | SEXP _susieR_caisa_cpp(SEXP X, SEXP w, SEXP sa2, SEXP pi_init, SEXP beta...
function SEXP (line 24) | SEXP _susieR_mr_ash_rss_cpp(SEXP bhat, SEXP shat, SEXP z, SEXP R, SEXP v...
function attribute_visible (line 39) | attribute_visible void R_init_susieR(DllInfo* dll){
FILE: src/mr_ash.h
function arma (line 9) | inline arma::uvec random_order_impl(int p, int numiter) {
function arma (line 17) | inline arma::mat outerAddition(const arma::vec& a, const arma::vec& b) {
function updatebetaj (line 25) | inline void updatebetaj(const arma::vec& xj, double wj,
FILE: src/mr_ash_rss.cpp
function mr_ash_rss_cpp (line 9) | [[cpp11::register]]
FILE: src/mr_ash_rss.h
function vec (line 20) | inline vec softmax_rss(const vec& x) {
function mat (line 126) | mat w1_t(p, K, fill::zeros);
Condensed preview — 222 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (2,137K chars).
[
{
"path": ".Rbuildignore",
"chars": 394,
"preview": "^docs$\n^.gitignore$\n^appveyor\\.yml$\n^_pkgdown\\.yml$\n^Makefile$\n^\\.circleci$\n^susieR\\.Rproj$\n^\\.Rproj\\.user$\n^\\.travis\\.y"
},
{
"path": ".github/dependabot.yml",
"chars": 117,
"preview": "version: 2\nupdates:\n - package-ecosystem: \"github-actions\"\n directory: \"/\"\n schedule:\n interval: \"weekly\""
},
{
"path": ".github/rattler-build_container.df",
"chars": 305,
"preview": "FROM ghcr.io/prefix-dev/pixi:latest\n\nSHELL [\"/bin/bash\", \"-c\"]\nRUN apt-get update\nRUN apt-get install -y libgl1 ca-certi"
},
{
"path": ".github/recipe/recipe.yaml",
"chars": 1346,
"preview": "context:\n version: VERSION_PLACEHOLDER\n\npackage:\n name: r-susier\n version: ${{ version }}\n\nsource:\n path: susieR-${{"
},
{
"path": ".github/recipe/variant_r44.yaml",
"chars": 216,
"preview": "MACOSX_DEPLOYMENT_TARGET:\n - '11.0'\nc_stdlib_version:\n - if: linux\n then: 2.17\n - if: osx\n then: 11.0\nc_stdlib:"
},
{
"path": ".github/recipe/variant_r45.yaml",
"chars": 216,
"preview": "MACOSX_DEPLOYMENT_TARGET:\n - '11.0'\nc_stdlib_version:\n - if: linux\n then: 2.17\n - if: osx\n then: 11.0\nc_stdlib:"
},
{
"path": ".github/workflows/ci.yml",
"chars": 2230,
"preview": "name: Continuous Integration\n\non:\n push:\n branches: master\n pull_request:\n paths-ignore:\n - .github/*\n "
},
{
"path": ".github/workflows/conda_build.yml",
"chars": 6925,
"preview": "name: Build conda package\n\non:\n release:\n types: [published]\n workflow_dispatch:\n inputs:\n version:\n "
},
{
"path": ".github/workflows/dispatch_pkgdown_build.yml",
"chars": 995,
"preview": "name: Deploy website\non:\n push:\n branches: [\"master\"]\n workflow_dispatch:\n\nenv:\n GITHUB_TOKEN: ${{ secrets.GITHUB_"
},
{
"path": ".github/workflows/release.yml",
"chars": 6118,
"preview": "name: Upload new release\n\non:\n push:\n branches: [master]\n paths: [DESCRIPTION]\n workflow_dispatch:\n inputs:\n "
},
{
"path": ".gitignore",
"chars": 222,
"preview": "**/.Rhistory\n**/.DS_Store\n**/.Rapp.history\n**/.ipynb_checkpoints\n**/.virtual_documents\nsusieR.Rproj\n.Rproj.user\n.RData\n."
},
{
"path": "DESCRIPTION",
"chars": 2038,
"preview": "Encoding: UTF-8\nType: Package\nPackage: susieR\nTitle: Sum of Single Effects Linear Regression\nDescription: Implements met"
},
{
"path": "LICENSE",
"chars": 189,
"preview": "YEAR: 2017-2022\nCOPYRIGHT HOLDER: Gao Wang, Peter Carbonetto, Yuxin Zou, Kaiqian Zhang, Matthew Stephens\nORGANIZATION: G"
},
{
"path": "LICENSE.md",
"chars": 1637,
"preview": "Copyright (c) 2017-2022, Gao Wang, Peter Carbonetto, Yuxin Zou, Kaiqian Zhang, \nMatthew Stephens. \nAll rights reserved. "
},
{
"path": "Makefile",
"chars": 3918,
"preview": "# Makefile for susieR package\n.PHONY: all install document test test-coverage pkgdown lint style clean deep-clean check "
},
{
"path": "NAMESPACE",
"chars": 3482,
"preview": "# Generated by roxygen2: do not edit by hand\n\nS3method(coef,mr.ash)\nS3method(coef,susie)\nS3method(get_objective,default)"
},
{
"path": "R/cpp11.R",
"chars": 754,
"preview": "# Generated by cpp11: do not edit by hand\n\nrandom_order <- function(p, numiter) {\n .Call(`_susieR_random_order`, p, num"
},
{
"path": "R/diagnosis_reports.R",
"chars": 24262,
"preview": "# Diagnostic functions for SuSiE-ash filter\n#\n# Per-iteration functions (called from susie_utils.R, return data.frame):\n"
},
{
"path": "R/example_dataset.R",
"chars": 7612,
"preview": "#' @name N2finemapping\n#'\n#' @title Simulated Fine-mapping Data with Two Effect Variables\n#'\n#' @docType data\n#'\n#' @des"
},
{
"path": "R/generic_methods.R",
"chars": 15146,
"preview": "# =============================================================================\n# DATA INITIALIZATION & CONFIGURATION\n#\n"
},
{
"path": "R/individual_data_methods.R",
"chars": 16648,
"preview": "# =============================================================================\n# DATA INITIALIZATION & CONFIGURATION\n#\n"
},
{
"path": "R/iterative_bayesian_stepwise_selection.R",
"chars": 15185,
"preview": "# =============================================================================\n# IBSS INITIALIZATION\n#\n# Initializes th"
},
{
"path": "R/mixture_prior.R",
"chars": 6685,
"preview": "# =============================================================================\n# FIXED MIXTURE PRIOR\n#\n# Shared impleme"
},
{
"path": "R/model_methods.R",
"chars": 14061,
"preview": "# =============================================================================\n# MODEL-LEVEL S3 METHODS\n\n# S3 generics "
},
{
"path": "R/mr.ash.R",
"chars": 20493,
"preview": "#' @title Multiple Regression with Adaptive Shrinkage\n#' \n#' @description Model fitting algorithms for Multiple Regressi"
},
{
"path": "R/mr.ash.rss.R",
"chars": 5381,
"preview": "#' @title Bayesian Multiple Regression with Mixture-of-Normals Prior (RSS)\n#'\n#' @description This function performs Bay"
},
{
"path": "R/predict.susie.R",
"chars": 2236,
"preview": "#' @title Extract regression coefficients from susie fit\n#'\n#' @param object A susie fit.\n#'\n#' @param \\dots Additional "
},
{
"path": "R/refinement.R",
"chars": 6174,
"preview": "#' Block coordinate ascent for iterative model refinement.\n#'\n#' Generic framework for post-convergence refinement of fi"
},
{
"path": "R/rss_lambda_methods.R",
"chars": 13543,
"preview": "# =============================================================================\n# OMEGA OPTIMIZATION TOLERANCES\n#\n# Name"
},
{
"path": "R/rss_mismatch.R",
"chars": 16522,
"preview": "# RSS R-reference mismatch handling.\n#\n# Single home for code that targets the discrepancy between the\n# supplied R refe"
},
{
"path": "R/single_effect_regression.R",
"chars": 11882,
"preview": "# =============================================================================\n# SINGLE EFFECT REGRESSION\n#\n# Performs "
},
{
"path": "R/slot_prior.R",
"chars": 7503,
"preview": "#' @title Slot Activity Prior for SuSiE\n#'\n#' @description Construct a prior specification for the slot activity\n#' mo"
},
{
"path": "R/sparse_multiplication.R",
"chars": 3367,
"preview": "# @title Computes standardized.X %*% b using sparse multiplication trick\n# @param X an n by p unstandardized matrix with"
},
{
"path": "R/ss_mixture_methods.R",
"chars": 5998,
"preview": "# =============================================================================\n# SS MIXTURE PANEL METHODS\n#\n# Class c(\""
},
{
"path": "R/sufficient_stats_methods.R",
"chars": 23476,
"preview": "# =============================================================================\n# DATA INITIALIZATION & CONFIGURATION\n#\n"
},
{
"path": "R/summary.susie.R",
"chars": 2097,
"preview": "#' @title Summarize Susie Fit.\n#'\n#' @description \\code{summary} method for the \\dQuote{susie} class.\n#'\n#' @param objec"
},
{
"path": "R/susie.R",
"chars": 49424,
"preview": "# =============================================================================\n# SuSiE WITH INDIVIDUAL-LEVEL DATA\n# ==="
},
{
"path": "R/susieR-package.R",
"chars": 202,
"preview": "#' @keywords internal\n\"_PACKAGE\"\n\n# The following block is used by usethis to automatically manage\n# roxygen namespace t"
},
{
"path": "R/susie_auto.R",
"chars": 5705,
"preview": "#' @title Attempt at Automating SuSiE for Hard Problems\n#'\n#' @description \\code{susie_auto} is an attempt to automate r"
},
{
"path": "R/susie_constructors.R",
"chars": 55194,
"preview": "# =============================================================================\n# INDIVIDUAL-LEVEL DATA CONSTRUCTOR\n#\n# "
},
{
"path": "R/susie_get_functions.R",
"chars": 18897,
"preview": "#' @rdname susie_get_methods\n#'\n#' @title Inferences From Fitted SuSiE Model\n#'\n#' @description These functions access b"
},
{
"path": "R/susie_plot.R",
"chars": 13602,
"preview": "#' @rdname susie_plots\n#'\n#' @title SuSiE Plots.\n#'\n#' @description \\code{susie_plot} produces a per-variable summary of"
},
{
"path": "R/susie_post_outcome_configuration.R",
"chars": 36227,
"preview": "# Post-hoc causal-configuration probabilities for one or more SuSiE-class fits.\n#\n# Two algorithms live here, exposed th"
},
{
"path": "R/susie_rss_utils.R",
"chars": 25728,
"preview": "# =============================================================================\n# FUNDAMENTAL COMPUTATIONS\n#\n# Basic mat"
},
{
"path": "R/susie_trendfilter.R",
"chars": 4457,
"preview": "#' @title Apply susie to trend filtering (especially changepoint\n#' problems), a type of non-parametric regression.\n#'"
},
{
"path": "R/susie_trendfilter_utils.R",
"chars": 2801,
"preview": "# @title Compute unscaled X %*% b using the special structure of trend\n# filtering\n# @param order is the order of tren"
},
{
"path": "R/susie_utils.R",
"chars": 86206,
"preview": "# =============================================================================\n# FUNDAMENTAL BUILDING BLOCKS\n#\n# Basic "
},
{
"path": "R/susie_workhorse.R",
"chars": 4372,
"preview": "#' SuSiE workhorse function\n#'\n#' Main orchestration for the IBSS algorithm. When `params$L_greedy`\n#' is non-NULL, runs"
},
{
"path": "R/univariate_regression.R",
"chars": 13628,
"preview": "#' @title Perform Univariate Linear Regression Separately for Columns of X\n#' \n#' @description This function performs th"
},
{
"path": "README.md",
"chars": 5117,
"preview": "# susieR\n\n[](https://github.com/stephensl"
},
{
"path": "_pkgdown.yml",
"chars": 1908,
"preview": "url: https://stephenslab.github.io/susieR\n\ntemplate:\n bootstrap: 5\n light-switch: true\n math-rendering: katex\n bslib:\n "
},
{
"path": "inst/CITATION",
"chars": 5052,
"preview": "citHeader(\"To cite the susieR package, please use both:\")\n\nbibentry(bibtype = \"Article\",\n title = paste(\"A simpl"
},
{
"path": "inst/analysis/optimize.Rmd",
"chars": 1215,
"preview": "---\ntitle: \"optimize\"\nauthor: \"Matthew Stephens\"\ndate: \"4/15/2018\"\noutput: html_document\n---\n\n```{r setup, include=FALSE"
},
{
"path": "inst/analysis/test_susie_auto.Rmd",
"chars": 956,
"preview": "---\ntitle: \"Test susie auto\"\nauthor: \"Matthew Stephens\"\ndate: \"5/2/2018\"\noutput: html_document\n---\n\n```{r setup, include"
},
{
"path": "inst/analysis/testing.Rmd",
"chars": 3153,
"preview": "---\ntitle: \"test.Rmd\"\nauthor: \"Matthew Stephens\"\ndate: \"4/14/2018\"\noutput: html_document\n---\n\n```{r setup, include=FALSE"
},
{
"path": "inst/code/caviar.R",
"chars": 2131,
"preview": "#!/usr/bin/env Rscript\nlibrary(dplyr)\nlibrary(readr)\nlibrary(magrittr)\n\n#' CAVIAR I/O\nwrite_caviar_sumstats <- function("
},
{
"path": "inst/code/compute_ss_memory.R",
"chars": 678,
"preview": "# export MEM_CHECK_INTERVAL=0.01\n# python3 monitor_memory.py Rscript compute_ss_memory.R\n#\n# NOTES:\n#\n# - Without any im"
},
{
"path": "inst/code/dap-g.py",
"chars": 3177,
"preview": "#!/usr/bin/env python3\nimport sys\nimport subprocess\nimport pandas as pd\nimport numpy as np\n\ndef write_dap_full(x,y,prefi"
},
{
"path": "inst/code/finemap.R",
"chars": 3467,
"preview": "#!/usr/bin/env Rscript\nlibrary(dplyr)\nlibrary(readr)\nlibrary(magrittr)\n\n#' FINEMAP I/O\nwrite_finemap_sumstats <- functio"
},
{
"path": "inst/code/finemap_1p4.R",
"chars": 4005,
"preview": "#!/usr/bin/env Rscript\nlibrary(dplyr)\nlibrary(readr)\nlibrary(magrittr)\n\n#' FINEMAP I/O\nwrite_finemap_sumstats <- functio"
},
{
"path": "inst/code/gen_original_results.R",
"chars": 2126,
"preview": "## results from original susie\ndevtools::install_github(\"stephenslab/susieR\")\nlibrary(susieR)\n\ncreate_sparsity_mat = fun"
},
{
"path": "inst/code/monitor_memory.py",
"chars": 5134,
"preview": "#!/usr/bin/env python3\n#\n# Copyright (c) 2012 Realz Slaw, 2017 Gao Wang\n#\n# Permission is hereby granted, free of charge"
},
{
"path": "inst/code/python_example/N3finemapping_python.ipynb",
"chars": 14889,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"id\": \"c7ac5176-713f-4752-b055-607a22a6dc3e\",\n \"metadata\": {},\n \"so"
},
{
"path": "inst/code/python_example/environment.yml",
"chars": 4702,
"preview": "name: susie_rpy2\nchannels:\n - conda-forge\n - defaults\ndependencies:\n - _libgcc_mutex=0.1=conda_forge\n - _openmp_mute"
},
{
"path": "inst/code/simulate_lambda_pop_ld_bias.R",
"chars": 25255,
"preview": "#!/usr/bin/env Rscript\n\n# Simulate GTEx-like eQTL summary statistics from real-LD genotype matrices,\n# then compare in-s"
},
{
"path": "inst/code/small_sim.R",
"chars": 1972,
"preview": "# Small script to evaluate the NIG prior version of SuSiE in\n# simulated data sets.\nlibrary(matrixStats)\nlibrary(susieR)"
},
{
"path": "inst/code/sparse_matrix_strategy.Rmd",
"chars": 5147,
"preview": "---\ntitle: \"Sparse matrix multiplication strategy\"\nauthor: \"Kaiqian Zhang\"\ndate: \"`r Sys.Date()`\"\noutput: rmarkdown::htm"
},
{
"path": "inst/code/summarize_small_sim.R",
"chars": 3500,
"preview": "# Script to summarize the results of running small_sim.R.\nlibrary(ggplot2)\nlibrary(cowplot)\nload(\"../datafiles/small_sim"
},
{
"path": "inst/code/susie_memory.R",
"chars": 718,
"preview": "# export MEM_CHECK_INTERVAL=0.01\n# python3 monitor_memory.py Rscript susie_memory.R\n#\n# NOTES:\n#\n# - Without any improve"
},
{
"path": "inst/code/susie_rss_memory.R",
"chars": 923,
"preview": "# export MEM_CHECK_INTERVAL=0.01\n# python3 monitor_memory.py Rscript susie_rss_memory.R\n#\n# NOTES:\n#\n# - Without any imp"
},
{
"path": "inst/misc/README_susie_v2.md",
"chars": 5691,
"preview": "# susieR 2.0 Architecture\n\n## Overview\n\nsusieR 2.0 implements a unified architecture incorporating various extensions to"
},
{
"path": "inst/misc/format_r_code.sh",
"chars": 1373,
"preview": "#!/bin/bash\n\n# Function to display error messages and exit the script\ndisplay_error() {\n echo \"Error: $1\"\n exit 1\n"
},
{
"path": "inst/misc/post-commit.sh",
"chars": 706,
"preview": "#!/bin/bash\n#\n# This script will be executed every time you run \"git commit\". It\n# will commit changes made to package D"
},
{
"path": "inst/misc/pre-commit.sh",
"chars": 1478,
"preview": "#!/bin/bash\n#\n# This script will be executed every time you run \"git commit\". It\n# will update the 4th digit of package "
},
{
"path": "inst/misc/uncrustify_default.cfg",
"chars": 149351,
"preview": "# Downloaded from https://raw.githubusercontent.com/uncrustify/uncrustify/refs/heads/master/documentation/htdocs/default"
},
{
"path": "inst/notebooks/benchmark_mix_vs_sp.R",
"chars": 15136,
"preview": "# Benchmark: Multi-panel mixture vs single-panel\n#\n# Metrics:\n# ELBO: mixture ELBO vs best single-panel ELBO (should "
},
{
"path": "inst/notebooks/small_sample_benchmark.ipynb",
"chars": 58156,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"id\": \"1a065c06\",\n \"metadata\": {},\n \"source\": [\n \"# Small-Sample"
},
{
"path": "inst/notebooks/stochastic_ld_benchmark.ipynb",
"chars": 28600,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": \"# Stochastic LD Correction Benchmark\\n\\n#"
},
{
"path": "man/FinemappingConvergence.Rd",
"chars": 1262,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/example_dataset.R\n\\docType{data}\n\\name{Fin"
},
{
"path": "man/N2finemapping.Rd",
"chars": 1744,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/example_dataset.R\n\\docType{data}\n\\name{N2f"
},
{
"path": "man/N3finemapping.Rd",
"chars": 1710,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/example_dataset.R\n\\docType{data}\n\\name{N3f"
},
{
"path": "man/SummaryConsistency.Rd",
"chars": 1127,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/example_dataset.R\n\\docType{data}\n\\name{Sum"
},
{
"path": "man/absolute.order.Rd",
"chars": 851,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/univariate_regression.R\n\\name{absolute.ord"
},
{
"path": "man/add_delta_features.Rd",
"chars": 1069,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/diagnosis_reports.R\n\\name{add_delta_featur"
},
{
"path": "man/block_coordinate_ascent.Rd",
"chars": 2040,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/refinement.R\n\\name{block_coordinate_ascent"
},
{
"path": "man/calculate_posterior_moments_mixture_common.Rd",
"chars": 993,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/mixture_prior.R\n\\name{calculate_posterior_"
},
{
"path": "man/check_alpha_pip_cycle_convergence.Rd",
"chars": 485,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/susie_utils.R\n\\name{check_alpha_pip_cycle_"
},
{
"path": "man/cleanup_extra_fields.Rd",
"chars": 543,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/generic_methods.R\n\\name{cleanup_extra_fiel"
},
{
"path": "man/coef.mr.ash.Rd",
"chars": 975,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/mr.ash.R\n\\name{coef.mr.ash}\n\\alias{coef.mr"
},
{
"path": "man/coef.susie.Rd",
"chars": 547,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/predict.susie.R\n\\name{coef.susie}\n\\alias{c"
},
{
"path": "man/collect_ash_diag.Rd",
"chars": 1727,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/diagnosis_reports.R\n\\name{collect_ash_diag"
},
{
"path": "man/compare_ash_methods.Rd",
"chars": 663,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/diagnosis_reports.R\n\\name{compare_ash_meth"
},
{
"path": "man/compute_marginal_bhat_shat.Rd",
"chars": 2081,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/univariate_regression.R\n\\name{compute_marg"
},
{
"path": "man/compute_suff_stat.Rd",
"chars": 945,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/susie_rss_utils.R\n\\name{compute_suff_stat}"
},
{
"path": "man/data_small.Rd",
"chars": 925,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/example_dataset.R\n\\docType{data}\n\\name{dat"
},
{
"path": "man/diagnose_ash_filter_archived_iter.Rd",
"chars": 541,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/diagnosis_reports.R\n\\name{diagnose_ash_fil"
},
{
"path": "man/diagnose_bb_ash_iter.Rd",
"chars": 924,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/diagnosis_reports.R\n\\name{diagnose_bb_ash_"
},
{
"path": "man/estimate_s_rss.Rd",
"chars": 2002,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/susie_rss_utils.R\n\\name{estimate_s_rss}\n\\a"
},
{
"path": "man/extract_bb_ash_features.Rd",
"chars": 705,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/diagnosis_reports.R\n\\name{extract_bb_ash_f"
},
{
"path": "man/format_extra_diag.Rd",
"chars": 478,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/model_methods.R\n\\name{format_extra_diag}\n\\"
},
{
"path": "man/format_sigma2_summary.Rd",
"chars": 463,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/model_methods.R\n\\name{format_sigma2_summar"
},
{
"path": "man/get.full.posterior.Rd",
"chars": 1469,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/mr.ash.R\n\\name{get.full.posterior}\n\\alias{"
},
{
"path": "man/get_alpha_l.Rd",
"chars": 310,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/model_methods.R\n\\name{get_alpha_l}\n\\alias{"
},
{
"path": "man/get_cs_correlation.Rd",
"chars": 1368,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/susie_get_functions.R\n\\name{get_cs_correla"
},
{
"path": "man/get_objective.Rd",
"chars": 484,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/model_methods.R\n\\name{get_objective}\n\\alia"
},
{
"path": "man/get_posterior_mean_l.Rd",
"chars": 351,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/model_methods.R\n\\name{get_posterior_mean_l"
},
{
"path": "man/get_posterior_mean_sum.Rd",
"chars": 356,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/model_methods.R\n\\name{get_posterior_mean_s"
},
{
"path": "man/get_posterior_moments_l.Rd",
"chars": 376,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/model_methods.R\n\\name{get_posterior_moment"
},
{
"path": "man/get_prior_variance_l.Rd",
"chars": 299,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/model_methods.R\n\\name{get_prior_variance_l"
},
{
"path": "man/get_slot_weight.Rd",
"chars": 888,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/generic_methods.R\n\\name{get_slot_weight}\n\\"
},
{
"path": "man/ibss_finalize.Rd",
"chars": 763,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/iterative_bayesian_stepwise_selection.R\n\\n"
},
{
"path": "man/ibss_initialize.Rd",
"chars": 597,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/iterative_bayesian_stepwise_selection.R\n\\n"
},
{
"path": "man/is_symmetric_matrix.Rd",
"chars": 366,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/susie_utils.R\n\\name{is_symmetric_matrix}\n\\"
},
{
"path": "man/kriging_rss.Rd",
"chars": 2287,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/susie_rss_utils.R\n\\name{kriging_rss}\n\\alia"
},
{
"path": "man/label_diag_truth.Rd",
"chars": 614,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/diagnosis_reports.R\n\\name{label_diag_truth"
},
{
"path": "man/loglik_mixture_common.Rd",
"chars": 952,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/mixture_prior.R\n\\name{loglik_mixture_commo"
},
{
"path": "man/mr.ash.Rd",
"chars": 9305,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/mr.ash.R\n\\name{mr.ash}\n\\alias{mr.ash}\n\\tit"
},
{
"path": "man/mr.ash.rss.Rd",
"chars": 4092,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/mr.ash.rss.R\n\\name{mr.ash.rss}\n\\alias{mr.a"
},
{
"path": "man/path.order.Rd",
"chars": 1347,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/univariate_regression.R\n\\name{path.order}\n"
},
{
"path": "man/post_loglik_prior_hook.Rd",
"chars": 455,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/single_effect_regression.R\n\\name{post_logl"
},
{
"path": "man/pre_loglik_prior_hook.Rd",
"chars": 468,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/single_effect_regression.R\n\\name{pre_logli"
},
{
"path": "man/predict.mr.ash.Rd",
"chars": 1589,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/mr.ash.R\n\\name{predict.mr.ash}\n\\alias{pred"
},
{
"path": "man/predict.susie.Rd",
"chars": 1100,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/predict.susie.R\n\\name{predict.susie}\n\\alia"
},
{
"path": "man/print.summary.susie_post_outcome_configuration.Rd",
"chars": 728,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/susie_post_outcome_configuration.R\n\\name{p"
},
{
"path": "man/resolve_mixture_prior.Rd",
"chars": 704,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/mixture_prior.R\n\\name{resolve_mixture_prio"
},
{
"path": "man/safe_cor.Rd",
"chars": 767,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/susie_utils.R\n\\name{safe_cor}\n\\alias{safe_"
},
{
"path": "man/safe_cov2cor.Rd",
"chars": 527,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/susie_utils.R\n\\name{safe_cov2cor}\n\\alias{s"
},
{
"path": "man/scale_design_matrix.Rd",
"chars": 689,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/susie_utils.R\n\\name{scale_design_matrix}\n\\"
},
{
"path": "man/set_prior_variance_l.Rd",
"chars": 302,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/model_methods.R\n\\name{set_prior_variance_l"
},
{
"path": "man/slot_prior_betabinom.Rd",
"chars": 3629,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/slot_prior.R\n\\name{slot_prior_betabinom}\n\\"
},
{
"path": "man/summary.susie.Rd",
"chars": 643,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/summary.susie.R\n\\name{summary.susie}\n\\alia"
},
{
"path": "man/summary.susie_post_outcome_configuration.Rd",
"chars": 3371,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/susie_post_outcome_configuration.R\n\\name{s"
},
{
"path": "man/susie.Rd",
"chars": 16136,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/susie.R\n\\name{susie}\n\\alias{susie}\n\\title{"
},
{
"path": "man/susieR-package.Rd",
"chars": 1515,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/susieR-package.R\n\\docType{package}\n\\name{s"
},
{
"path": "man/susie_auto.Rd",
"chars": 2932,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/susie_auto.R\n\\name{susie_auto}\n\\alias{susi"
},
{
"path": "man/susie_get_methods.Rd",
"chars": 6255,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/susie_get_functions.R\n\\name{susie_get_obje"
},
{
"path": "man/susie_init_coef.Rd",
"chars": 1019,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/susie_get_functions.R\n\\name{susie_init_coe"
},
{
"path": "man/susie_plot_changepoint.Rd",
"chars": 1290,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/susie_plot.R\n\\name{susie_plot_changepoint}"
},
{
"path": "man/susie_plots.Rd",
"chars": 3374,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/susie_plot.R\n\\name{susie_plot}\n\\alias{susi"
},
{
"path": "man/susie_post_outcome_configuration.Rd",
"chars": 6095,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/susie_post_outcome_configuration.R\n\\name{s"
},
{
"path": "man/susie_rss.Rd",
"chars": 15962,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/susie.R\n\\name{susie_rss}\n\\alias{susie_rss}"
},
{
"path": "man/susie_rss_lambda.Rd",
"chars": 9372,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/susie.R\n\\name{susie_rss_lambda}\n\\alias{sus"
},
{
"path": "man/susie_ss.Rd",
"chars": 11364,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/susie.R\n\\name{susie_ss}\n\\alias{susie_ss}\n\\"
},
{
"path": "man/susie_trendfilter.Rd",
"chars": 3238,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/susie_trendfilter.R\n\\name{susie_trendfilte"
},
{
"path": "man/susie_workhorse.Rd",
"chars": 792,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/susie_workhorse.R\n\\name{susie_workhorse}\n\\"
},
{
"path": "man/univar.order.Rd",
"chars": 948,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/univariate_regression.R\n\\name{univar.order"
},
{
"path": "man/univariate_regression.Rd",
"chars": 1842,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/univariate_regression.R\n\\name{univariate_r"
},
{
"path": "man/unmappable_data.Rd",
"chars": 963,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/example_dataset.R\n\\docType{data}\n\\name{unm"
},
{
"path": "pixi.toml",
"chars": 1987,
"preview": "[workspace]\nname = \"r-susier\"\nchannels = [\"dnachun\", \"conda-forge\", \"bioconda\"]\nplatforms = [\"linux-64\", \"osx-arm64\"]\n\n["
},
{
"path": "src/Makevars",
"chars": 113,
"preview": "PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS)\nPKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)\n"
},
{
"path": "src/Makevars.win",
"chars": 114,
"preview": "PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) \nPKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)\n"
},
{
"path": "src/caisa.cpp",
"chars": 3639,
"preview": "#include <cpp11.hpp>\n#include <cpp11armadillo.hpp>\n#include \"mr_ash.h\"\n\nusing namespace cpp11;\nusing namespace arma;\n\n//"
},
{
"path": "src/cpp11.cpp",
"chars": 3797,
"preview": "// Generated by cpp11: do not edit by hand\n// clang-format off\n\n\n#include \"cpp11/declarations.hpp\"\n#include <R_ext/Visib"
},
{
"path": "src/mr_ash.h",
"chars": 1847,
"preview": "\n#ifndef MR_ASH_H\n#define MR_ASH_H\n#include <math.h>\n#include <cpp11armadillo.hpp>\n\n// Helper: build a random permutatio"
},
{
"path": "src/mr_ash_rss.cpp",
"chars": 1543,
"preview": "#include <cpp11.hpp>\n#include <cpp11armadillo.hpp>\n#include \"mr_ash_rss.h\"\n\nusing namespace cpp11;\nusing namespace arma;"
},
{
"path": "src/mr_ash_rss.h",
"chars": 12079,
"preview": "#ifndef MR_ASH_RSS_H\n#define MR_ASH_RSS_H\n\n#include <cpp11armadillo.hpp>\n#include <cmath>\n#include <vector>\n#include <st"
},
{
"path": "tests/README.md",
"chars": 596,
"preview": "# susieR Testing Framework\n\nThis directory contains the comprehensive test suite for the susieR 2.0 package, with **>1,0"
},
{
"path": "tests/testthat/helper_nig_reference.R",
"chars": 17283,
"preview": "# =============================================================================\n# HELPER FUNCTIONS FOR NIG REFERENCE COM"
},
{
"path": "tests/testthat/helper_reference.R",
"chars": 8809,
"preview": "# =============================================================================\n# HELPER FUNCTIONS FOR REFERENCE PACKAGE"
},
{
"path": "tests/testthat/helper_testthat.R",
"chars": 26272,
"preview": "# =============================================================================\n# HELPER FUNCTIONS FOR UNIT TESTS\n# ===="
},
{
"path": "tests/testthat/reference/test_susie_auto_reference.R",
"chars": 7734,
"preview": "# Source helper functions\nsource(file.path(\"..\", \"helper_reference.R\"), local = TRUE)\n\ncontext(\"susie_auto reference com"
},
{
"path": "tests/testthat/reference/test_susie_nig_reference.R",
"chars": 79634,
"preview": "# Source helper functions for NIG reference comparison\nsource(file.path(\"..\", \"helper_nig_reference.R\"), local = TRUE)\n\n"
},
{
"path": "tests/testthat/reference/test_susie_reference.R",
"chars": 82920,
"preview": "# Source helper functions\nsource(file.path(\"..\", \"helper_reference.R\"), local = TRUE)\n\ncontext(\"susie reference comparis"
},
{
"path": "tests/testthat/reference/test_susie_rss_lambda_reference.R",
"chars": 39827,
"preview": "# Source helper functions\nsource(file.path(\"..\", \"helper_reference.R\"), local = TRUE)\n\ncontext(\"susie_rss with lambda re"
},
{
"path": "tests/testthat/reference/test_susie_rss_reference.R",
"chars": 36156,
"preview": "# Source helper functions\nsource(file.path(\"..\", \"helper_reference.R\"), local = TRUE)\n\ncontext(\"susie_rss reference comp"
},
{
"path": "tests/testthat/reference/test_susie_ss_reference.R",
"chars": 38089,
"preview": "# Source helper functions\nsource(file.path(\"..\", \"helper_reference.R\"), local = TRUE)\n\ncontext(\"susie_ss reference compa"
},
{
"path": "tests/testthat/test_X_centering.R",
"chars": 4555,
"preview": "# Test that susie_rss gives equivalent results for raw, centered,\n# and standardized X inputs.\n\ncontext(\"X centering equ"
},
{
"path": "tests/testthat/test_coef_predict.R",
"chars": 6750,
"preview": "context(\"coef and predict S3 methods\")\n\n# =============================================================================\n"
},
{
"path": "tests/testthat/test_compute_marginal_bhat_shat.R",
"chars": 3013,
"preview": "# compute_marginal_bhat_shat\n#\n# Per-position marginal OLS regression helper. Used by susieR's own\n# T = 1 SER path (cos"
},
{
"path": "tests/testthat/test_generic_methods.R",
"chars": 14576,
"preview": "context(\"Generic methods infrastructure\")\n\n# ==========================================================================="
},
{
"path": "tests/testthat/test_ibss.R",
"chars": 24996,
"preview": "context(\"Iterative Bayesian Stepwise Selection (IBSS)\")\n\n# ============================================================="
},
{
"path": "tests/testthat/test_individual_data_methods.R",
"chars": 13309,
"preview": "context(\"S3 methods for individual data class\")\n\n# ====================================================================="
},
{
"path": "tests/testthat/test_l_greedy.R",
"chars": 3190,
"preview": "# Greedy-L outer loop in susie_workhorse.\n# Contracts: (1) L_greedy = NULL is bit-identical to fixed-L susie.\n# (2) L_gr"
},
{
"path": "tests/testthat/test_mixture_prior.R",
"chars": 12597,
"preview": "# Tests for estimate_prior_method = \"fixed_mixture\"\n#\n# Key invariant: a K=1 mixture with grid = c(V) and weights = c(1)"
},
{
"path": "tests/testthat/test_mr_ash_equivalence.R",
"chars": 6702,
"preview": "# =============================================================================\n# Test: mr.ash vs mr.ash.rss Equivalence"
},
{
"path": "tests/testthat/test_plotting.R",
"chars": 24203,
"preview": "context(\"Plotting functions\")\n\n# =============================================================================\n# SUSIE_P"
},
{
"path": "tests/testthat/test_post_outcome_configuration_summary.R",
"chars": 9299,
"preview": "# Tests for `summary.susie_post_outcome_configuration` and its print\n# method. The numerical algorithms (susiex / coloc_"
},
{
"path": "tests/testthat/test_refinement.R",
"chars": 21322,
"preview": "context(\"Refinement unit tests\")\n\n# =============================================================================\n# BASI"
},
{
"path": "tests/testthat/test_rss_lambda_methods.R",
"chars": 42977,
"preview": "context(\"S3 methods for rss_lambda data class\")\n\n# ====================================================================="
},
{
"path": "tests/testthat/test_rss_mismatch.R",
"chars": 6669,
"preview": "context(\"RSS R-reference mismatch (R_mismatch correction)\")\n\n# ---- API surface guards ----\n\ntest_that(\"R_mismatch = 'ma"
},
{
"path": "tests/testthat/test_rss_utils.R",
"chars": 16226,
"preview": "context(\"RSS utility functions\")\n\n# =============================================================================\n# FUND"
},
{
"path": "tests/testthat/test_single_effect_regression.R",
"chars": 10393,
"preview": "context(\"Single Effect Regression\")\n\n# =============================================================================\n# S"
},
{
"path": "tests/testthat/test_slot_prior.R",
"chars": 5716,
"preview": "context(\"slot_prior class\")\n\ntest_that(\"slot_prior_poisson constructs correctly\", {\n sp <- suppressMessages(slot_prior_"
},
{
"path": "tests/testthat/test_slot_weights.R",
"chars": 4167,
"preview": "# Tests for slot_weights mechanism\n#\n# Key invariant: slot_weights = rep(1, L) must produce identical results\n# to the s"
},
{
"path": "tests/testthat/test_sparse_multiplication.R",
"chars": 13155,
"preview": "context(\"sparse multiplication utilities\")\n\n# =========================================================================="
},
{
"path": "tests/testthat/test_sufficient_stats_methods.R",
"chars": 26857,
"preview": "context(\"S3 methods for sufficient statistics (ss) data class\")\n\n# ====================================================="
},
{
"path": "tests/testthat/test_summary_print.R",
"chars": 7272,
"preview": "context(\"summary and print S3 methods\")\n\n# ============================================================================="
},
{
"path": "tests/testthat/test_susie.R",
"chars": 36183,
"preview": "context(\"Main susie interface functions\")\n\n# ==========================================================================="
},
{
"path": "tests/testthat/test_susie_ash_ss_equivalence.R",
"chars": 5872,
"preview": "# =============================================================================\n# Test: SuSiE-ash (filter-archived) Indi"
},
{
"path": "tests/testthat/test_susie_auto.R",
"chars": 20179,
"preview": "context(\"susie_auto unit tests\")\n\n# =============================================================================\n# ALGO"
},
{
"path": "tests/testthat/test_susie_constructors.R",
"chars": 59068,
"preview": "context(\"SuSiE Data Constructors\")\n\n# =============================================================================\n# IN"
},
{
"path": "tests/testthat/test_susie_get_functions.R",
"chars": 27854,
"preview": "context(\"susie_get_* functions\")\n\n# =============================================================================\n# Get "
},
{
"path": "tests/testthat/test_susie_small.R",
"chars": 427,
"preview": "context(\"test_susie_small.R\")\n\ntest_that(paste(\"check that ELBO is monotonically increasing for \",\n \"esti"
},
{
"path": "tests/testthat/test_susie_utils.R",
"chars": 62902,
"preview": "context(\"Utility functions for susieR\")\n\n# ============================================================================="
},
{
"path": "tests/testthat/test_susie_workhorse.R",
"chars": 18081,
"preview": "context(\"SuSiE Workhorse - Main Orchestration\")\n\n# ====================================================================="
},
{
"path": "tests/testthat/test_trendfilter.R",
"chars": 16559,
"preview": "context(\"Trend filtering\")\n\n# =============================================================================\n# BASIC FUNC"
},
{
"path": "tests/testthat/test_univariate_regression.R",
"chars": 24579,
"preview": "context(\"Univariate regression\")\n\n# =============================================================================\n# BASI"
},
{
"path": "tests/testthat.R",
"chars": 55,
"preview": "library(testthat)\nlibrary(susieR)\ntest_check(\"susieR\")\n"
},
{
"path": "vignettes/announcements.Rmd",
"chars": 4892,
"preview": "---\ntitle: \"News and Updates\"\noutput: \n rmarkdown::html_vignette:\n toc: true\n toc_depth: 3\nvignette: >\n %\\Vignet"
},
{
"path": "vignettes/finemapping.Rmd",
"chars": 7706,
"preview": "---\ntitle: \"Fine-mapping example\"\nauthor: \"Gao Wang\"\ndate: \"`r Sys.Date()`\"\noutput: rmarkdown::html_vignette\nvignette: >"
}
]
// ... and 22 more files (download for full content)
About this extraction
This page contains the full source code of the stephenslab/susieR GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 222 files (1.9 MB), approximately 601.0k tokens, and a symbol index with 29 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.