SYMBOL INDEX (1534 symbols across 207 files) FILE: _ext/archive.py function archive_handler (line 6) | def archive_handler(app): function setup (line 25) | def setup(app): FILE: _ext/df_tables.py class DFTable (line 5) | class DFTable(CSVTable): method __init__ (line 9) | def __init__(self, name, arguments, options, content, lineno, method get_csv_data (line 15) | def get_csv_data(self): method run (line 18) | def run(self): function setup (line 42) | def setup(app): FILE: _ext/local_documenter.py class LocalModuleDocumenter (line 7) | class LocalModuleDocumenter(ModuleDocumenter): method import_object (line 19) | def import_object(self, *args): method get_module_members (line 27) | def get_module_members(self): class LocalFunctionDocumenter (line 38) | class LocalFunctionDocumenter(FunctionDocumenter): method format_name (line 39) | def format_name(self) -> str: function setup (line 47) | def setup(app): FILE: _ext/neuron_tag.py function _in_list (line 285) | def _in_list(cur_file, file_list): function _splitall (line 290) | def _splitall(path): function _get_explicit_override (line 307) | def _get_explicit_override(cur_file): function _get_page_override (line 453) | def _get_page_override(cur_file): class NeuronTag (line 586) | class NeuronTag(SphinxDirective): method run (line 588) | def run(self): method _render (line 682) | def _render(self, text): function setup (line 692) | def setup(app): FILE: _ext/sphinx_plotly_directive.py function save_plotly_figure (line 182) | def save_plotly_figure(fig, path): function assign_last_line_into_variable (line 207) | def assign_last_line_into_variable(code, variable_name): function create_directive_block (line 236) | def create_directive_block(name, arguments, options, content): function create_code_block (line 290) | def create_code_block(code, language=None): function strip_last_line (line 301) | def strip_last_line(code): function ends_with_show (line 324) | def ends_with_show(code): function _option_boolean (line 357) | def _option_boolean(arg): function _option_context (line 369) | def _option_context(arg): function _option_format (line 375) | def _option_format(arg): function _option_fig_vars (line 379) | def _option_fig_vars(arg): function mark_plot_labels (line 383) | def mark_plot_labels(app, document): class PlotlyDirective (line 416) | class PlotlyDirective(Directive): method run (line 441) | def run(self): function setup (line 456) | def setup(app): function contains_doctest (line 489) | def contains_doctest(text): function unescape_doctest (line 501) | def unescape_doctest(text): function split_code_at_show (line 521) | def split_code_at_show(text): class FigureFile (line 625) | class FigureFile: method __init__ (line 626) | def __init__(self, basename, dirname): method filename (line 631) | def filename(self, format): method filenames (line 634) | def filenames(self): function out_of_date (line 638) | def out_of_date(original, derived): class PlotError (line 648) | class PlotError(RuntimeError): function run_code (line 652) | def run_code(code, code_path, ns=None, function_name=None, fig_vars=None): function get_plot_formats (line 727) | def get_plot_formats(config): function render_figures (line 745) | def render_figures( function run (line 855) | def run(arguments, content, options, state_machine, state, lineno): FILE: _ext/symlink.py function remove_symlink_handler (line 6) | def remove_symlink_handler(app, exception): function setup (line 21) | def setup(app): FILE: _utilities/add_meta.py function infer_meta (line 12) | def infer_meta(filepath: str, content: str) -> dict: function has_meta_field (line 67) | def has_meta_field(content: str, field: str) -> bool: function process_file (line 72) | def process_file(filepath: str, dry_run: bool = False): function main (line 162) | def main(): FILE: _utilities/audit_frameworks.py function _resolve_path (line 30) | def _resolve_path(ref: str, referencing_file: Path, root: Path) -> str |... function _resolve_to_files (line 49) | def _resolve_to_files(base: str, root: Path) -> list[str]: function extract_toctree_entries (line 71) | def extract_toctree_entries(content: str, filepath: Path, root: Path) ->... function extract_doc_refs (line 114) | def extract_doc_refs(content: str, filepath: Path, root: Path) -> set[str]: function extract_include_refs (line 126) | def extract_include_refs(content: str, filepath: Path, root: Path) -> se... function extract_ref_labels (line 138) | def extract_ref_labels(content: str) -> set[str]: function extract_label_definitions (line 143) | def extract_label_definitions(content: str) -> set[str]: function find_all_framework_files (line 152) | def find_all_framework_files(root: Path) -> tuple[set[str], set[str], se... function collect_all_references (line 178) | def collect_all_references(root: Path) -> tuple[set[str], set[str], set[... function build_label_to_file_map (line 213) | def build_label_to_file_map(root: Path) -> dict[str, str]: function detect_orphans (line 234) | def detect_orphans(root: Path) -> list[dict]: function _check_stale_python (line 300) | def _check_stale_python(content: str) -> list[str]: function _check_stale_sdk (line 312) | def _check_stale_sdk(content: str) -> list[str]: function _check_stale_os (line 322) | def _check_stale_os(content: str) -> list[str]: function _check_torch_neuron_unsupported_os (line 327) | def _check_torch_neuron_unsupported_os(content: str) -> list[str]: function _check_neuron_cc (line 339) | def _check_neuron_cc(content: str) -> list[str]: function detect_stale_pages (line 346) | def detect_stale_pages(root: Path) -> list[dict]: function generate_report (line 398) | def generate_report(orphans: list[dict], stale: list[dict]) -> str: function main (line 437) | def main(): FILE: _utilities/create_sitemap.py function create_sitemap (line 8) | def create_sitemap(root_dir, base_url): FILE: _utilities/format_build_logs.py function check_python_version (line 20) | def check_python_version(): function check_pip_installed (line 39) | def check_pip_installed(): function find_repo_root (line 50) | def find_repo_root(): function setup_venv (line 77) | def setup_venv(repo_parent): function get_venv_python (line 103) | def get_venv_python(venv_path): function get_venv_pip (line 110) | def get_venv_pip(venv_path): function install_requirements (line 117) | def install_requirements(repo_root, venv_pip): function run_sphinx_build (line 137) | def run_sphinx_build(repo_root, venv_path): function parse_build_log (line 189) | def parse_build_log(log_text): function categorize_issues (line 346) | def categorize_issues(issues): function format_markdown (line 371) | def format_markdown(errors, warnings, build_time): function main (line 449) | def main(): FILE: _utilities/inject_archive_meta.py function find_title_end (line 29) | def find_title_end(lines): function inject_meta_and_warning (line 61) | def inject_meta_and_warning(filepath, framework="MXNet"): function main (line 122) | def main(): FILE: _utilities/migrate_setup_content.py function find_rst_files (line 58) | def find_rst_files(base_dir: str) -> list[Path]: function find_references (line 70) | def find_references(content: str, file_path: Path) -> list[dict]: function apply_fix (line 130) | def apply_fix(file_path: Path, refs: list[dict]) -> bool: function main (line 156) | def main(): FILE: about-neuron/news-and-blogs/validate_articles.py function validate_url (line 39) | def validate_url(url): function validate_date (line 51) | def validate_date(date_str): function validate_article (line 60) | def validate_article(article, index, section): function main (line 157) | def main(): FILE: archive/neuronperf/test_simple_pt.py class Model (line 9) | class Model(torch.nn.Module): method forward (line 10) | def forward(self, x): FILE: archive/src/benchmark/pytorch/bert-base-cased_benchmark.py function get_batch (line 17) | def get_batch(tokenizer, sequence_length, batch_size): FILE: archive/src/benchmark/pytorch/bert-base-cased_compile.py function get_batch (line 17) | def get_batch(tokenizer, sequence_length, batch_size): FILE: archive/src/benchmark/pytorch/bert-base-uncased_benchmark.py function get_batch (line 17) | def get_batch(tokenizer, sequence_length, batch_size): FILE: archive/src/benchmark/pytorch/bert-base-uncased_compile.py function get_batch (line 17) | def get_batch(tokenizer, sequence_length, batch_size): FILE: archive/src/benchmark/pytorch/distilbert-base-uncased-finetuned-sst-2-english_benchmark.py function get_batch (line 17) | def get_batch(tokenizer, sequence_length, batch_size): FILE: archive/src/benchmark/pytorch/distilbert-base-uncased-finetuned-sst-2-english_compile.py function get_batch (line 17) | def get_batch(tokenizer, sequence_length, batch_size): FILE: archive/src/benchmark/pytorch/distilbert-base-uncased_benchmark.py function get_batch (line 17) | def get_batch(tokenizer, sequence_length, batch_size): FILE: archive/src/benchmark/pytorch/distilbert-base-uncased_compile.py function get_batch (line 17) | def get_batch(tokenizer, sequence_length, batch_size): FILE: archive/src/benchmark/pytorch/distilroberta-base_benchmark.py function get_batch (line 17) | def get_batch(tokenizer, sequence_length, batch_size): FILE: archive/src/benchmark/pytorch/distilroberta-base_compile.py function get_batch (line 17) | def get_batch(tokenizer, sequence_length, batch_size): FILE: archive/src/benchmark/pytorch/hf-google-vit_benchmark.py function benchmark (line 10) | def benchmark(batch_size): FILE: archive/src/benchmark/pytorch/hf-openai-clip_benchmark.py function benchmark (line 10) | def benchmark(model_name, batch_size): FILE: archive/src/benchmark/pytorch/hf_pretrained_wav2vec2_conformer_relpos_benchmark.py function benchmark (line 9) | def benchmark(): FILE: archive/src/benchmark/pytorch/hf_pretrained_wav2vec2_conformer_rope_benchmark.py function benchmark (line 9) | def benchmark(): FILE: archive/src/benchmark/pytorch/inf2_benchmark.py class GPT2Neuron (line 11) | class GPT2Neuron(torch.nn.Module): method __init__ (line 12) | def __init__(self, model) -> None: method forward (line 16) | def forward(self, input_ids, attention_mask): function benchmark (line 19) | def benchmark(model_name, batch_size, sequence_length): FILE: archive/src/benchmark/pytorch/opt_benchmark.py class Wrapper (line 23) | class Wrapper(torch.nn.Module): method __init__ (line 24) | def __init__(self, filename): method forward (line 32) | def forward(self, *inputs): function load_fn (line 36) | def load_fn(filename, **kwargs): function env_setup_fn (line 40) | def env_setup_fn(*_): function preprocess_fn (line 43) | def preprocess_fn(inputs): function postprocess_fn (line 46) | def postprocess_fn(outputs): function benchmark (line 49) | def benchmark(): FILE: archive/src/benchmark/pytorch/perceiver-multimodal_benchmark.py function benchmark (line 24) | def benchmark(n_runs, test_name, model, model_inputs): class LatencyCollector (line 56) | class LatencyCollector: method __init__ (line 57) | def __init__(self): method pre_hook (line 61) | def pre_hook(self, *args): method hook (line 64) | def hook(self, *args): method percentile (line 67) | def percentile(self, percent): class MultimodalPerceiverWrapper (line 76) | class MultimodalPerceiverWrapper(nn.Module): method __init__ (line 77) | def __init__(self, perceiver_model, nchunks, image_chunk_size, audio_c... method forward (line 84) | def forward(self, inputs: torch.FloatTensor, function custom_model_forward (line 162) | def custom_model_forward( function custom_decoder_query (line 191) | def custom_decoder_query(self, inputs, modality_sizes=None, inputs_witho... class EncoderWrapper (line 253) | class EncoderWrapper(nn.Module): method __init__ (line 254) | def __init__(self, encoder): method forward (line 258) | def forward(self, embedding_output, inputs, extended_attention_mask): class NeuronEncoder (line 262) | class NeuronEncoder(nn.Module): method __init__ (line 263) | def __init__(self, encoder_wrapper): method forward (line 267) | def forward(self, class DecoderWrapper (line 282) | class DecoderWrapper(nn.Module): method __init__ (line 283) | def __init__(self, decoder, decoder_query_audio, decoder_query_image, ... method forward (line 292) | def forward(self, z, query_mask, class NeuronDecoder (line 318) | class NeuronDecoder(nn.Module): method __init__ (line 319) | def __init__(self, decoder_wrapper): method forward (line 325) | def forward(self, z, query_mask, inputs, modality_sizes, inputs_withou... function autoencode_video (line 382) | def autoencode_video(images, audio, nchunks, image_chunk_size, audio_chu... FILE: archive/src/benchmark/pytorch/perceiver-multimodal_compile.py class MultimodalPerceiverWrapper (line 20) | class MultimodalPerceiverWrapper(nn.Module): method __init__ (line 21) | def __init__(self, perceiver_model, nchunks, image_chunk_size, audio_c... method forward (line 28) | def forward(self, inputs: torch.FloatTensor, function custom_model_forward (line 106) | def custom_model_forward( function custom_decoder_query (line 135) | def custom_decoder_query(self, inputs, modality_sizes=None, inputs_witho... class EncoderWrapper (line 197) | class EncoderWrapper(nn.Module): method __init__ (line 198) | def __init__(self, encoder): method forward (line 202) | def forward(self, embedding_output, inputs, extended_attention_mask): class NeuronEncoder (line 206) | class NeuronEncoder(nn.Module): method __init__ (line 207) | def __init__(self, encoder_wrapper): method forward (line 211) | def forward(self, class DecoderWrapper (line 226) | class DecoderWrapper(nn.Module): method __init__ (line 227) | def __init__(self, decoder, decoder_query_audio, decoder_query_image, ... method forward (line 236) | def forward(self, z, query_mask, class NeuronDecoder (line 262) | class NeuronDecoder(nn.Module): method __init__ (line 263) | def __init__(self, decoder_wrapper): method forward (line 269) | def forward(self, z, query_mask, inputs, modality_sizes, inputs_withou... FILE: archive/src/benchmark/pytorch/perceiver-vision_benchmark.py function get_batch (line 16) | def get_batch(batch_size): FILE: archive/src/benchmark/pytorch/perceiver-vision_compile.py function get_batch (line 16) | def get_batch(batch_size): FILE: archive/src/benchmark/pytorch/pixart_alpha_benchmark.py function benchmark (line 35) | def benchmark(n_runs, test_name, model, model_inputs): class LatencyCollector (line 69) | class LatencyCollector: method __init__ (line 70) | def __init__(self): method pre_hook (line 74) | def pre_hook(self, *args): method hook (line 77) | def hook(self, *args): method percentile (line 80) | def percentile(self, percent): class InferenceTextEncoderWrapper (line 89) | class InferenceTextEncoderWrapper(nn.Module): method __init__ (line 90) | def __init__(self, dtype, t: T5EncoderModel, seqlen: int): method forward (line 95) | def forward(self, text_input_ids, attention_mask=None): class InferenceTransformerWrapper (line 98) | class InferenceTransformerWrapper(nn.Module): method __init__ (line 99) | def __init__(self, transformer: Transformer2DModel): method forward (line 105) | def forward(self, hidden_states, encoder_hidden_states=None, timestep=... class SimpleWrapper (line 115) | class SimpleWrapper(nn.Module): method __init__ (line 116) | def __init__(self, model): method forward (line 119) | def forward(self, x): function get_pipe (line 124) | def get_pipe(resolution, dtype): FILE: archive/src/benchmark/pytorch/pixart_sigma_benchmark.py function benchmark (line 34) | def benchmark(n_runs, test_name, model, model_inputs): class LatencyCollector (line 68) | class LatencyCollector: method __init__ (line 69) | def __init__(self): method pre_hook (line 73) | def pre_hook(self, *args): method hook (line 76) | def hook(self, *args): method percentile (line 79) | def percentile(self, percent): class InferenceTextEncoderWrapper (line 88) | class InferenceTextEncoderWrapper(nn.Module): method __init__ (line 89) | def __init__(self, dtype, t: T5EncoderModel, seqlen: int): method forward (line 94) | def forward(self, text_input_ids, attention_mask=None): class InferenceTransformerWrapper (line 97) | class InferenceTransformerWrapper(nn.Module): method __init__ (line 98) | def __init__(self, transformer: Transformer2DModel): method forward (line 104) | def forward(self, hidden_states, encoder_hidden_states=None, timestep=... class SimpleWrapper (line 114) | class SimpleWrapper(nn.Module): method __init__ (line 115) | def __init__(self, model): method forward (line 118) | def forward(self, x): function get_pipe (line 123) | def get_pipe(resolution, dtype): FILE: archive/src/benchmark/pytorch/resnet50_benchmark.py function get_batch (line 12) | def get_batch(batch_size): FILE: archive/src/benchmark/pytorch/resnet50_compile.py function get_batch (line 14) | def get_batch(batch_size): FILE: archive/src/benchmark/pytorch/resnet_benchmark.py function get_batch (line 12) | def get_batch(batch_size): FILE: archive/src/benchmark/pytorch/resnet_compile.py function get_batch (line 12) | def get_batch(batch_size): FILE: archive/src/benchmark/pytorch/sd2_512_benchmark.py function benchmark (line 22) | def benchmark(n_runs, test_name, model, model_inputs): class LatencyCollector (line 56) | class LatencyCollector: method __init__ (line 57) | def __init__(self): method pre_hook (line 61) | def pre_hook(self, *args): method hook (line 64) | def hook(self, *args): method percentile (line 67) | def percentile(self, percent): class UNetWrap (line 77) | class UNetWrap(nn.Module): method __init__ (line 78) | def __init__(self, unet): method forward (line 82) | def forward(self, sample, timestep, encoder_hidden_states, cross_atten... class NeuronUNet (line 86) | class NeuronUNet(nn.Module): method __init__ (line 87) | def __init__(self, unetwrap): method forward (line 94) | def forward(self, sample, timestep, encoder_hidden_states, cross_atten... class NeuronTextEncoder (line 98) | class NeuronTextEncoder(nn.Module): method __init__ (line 99) | def __init__(self, text_encoder): method forward (line 106) | def forward(self, emb, attention_mask = None): function decode_latents (line 109) | def decode_latents(self, latents): class NeuronTypeConversionWrapper (line 135) | class NeuronTypeConversionWrapper(nn.Module): method __init__ (line 136) | def __init__(self, network): method forward (line 140) | def forward(self, x): FILE: archive/src/benchmark/pytorch/sd2_512_compile.py class UNetWrap (line 26) | class UNetWrap(nn.Module): method __init__ (line 27) | def __init__(self, unet): method forward (line 31) | def forward(self, sample, timestep, encoder_hidden_states, cross_atten... class NeuronUNet (line 35) | class NeuronUNet(nn.Module): method __init__ (line 36) | def __init__(self, unetwrap): method forward (line 43) | def forward(self, sample, timestep, encoder_hidden_states, cross_atten... class NeuronTextEncoder (line 47) | class NeuronTextEncoder(nn.Module): method __init__ (line 48) | def __init__(self, text_encoder): method forward (line 55) | def forward(self, emb, attention_mask = None): function get_attention_scores (line 60) | def get_attention_scores(self, query, key, attn_mask): function custom_badbmm (line 95) | def custom_badbmm(a, b): FILE: archive/src/benchmark/pytorch/sd2_768_benchmark.py function benchmark (line 22) | def benchmark(n_runs, test_name, model, model_inputs): class LatencyCollector (line 56) | class LatencyCollector: method __init__ (line 57) | def __init__(self): method pre_hook (line 61) | def pre_hook(self, *args): method hook (line 64) | def hook(self, *args): method percentile (line 67) | def percentile(self, percent): class UNetWrap (line 77) | class UNetWrap(nn.Module): method __init__ (line 78) | def __init__(self, unet): method forward (line 82) | def forward(self, sample, timestep, encoder_hidden_states, cross_atten... class NeuronUNet (line 86) | class NeuronUNet(nn.Module): method __init__ (line 87) | def __init__(self, unetwrap): method forward (line 94) | def forward(self, sample, timestep, encoder_hidden_states, cross_atten... class NeuronTextEncoder (line 98) | class NeuronTextEncoder(nn.Module): method __init__ (line 99) | def __init__(self, text_encoder): method forward (line 106) | def forward(self, emb, attention_mask = None): FILE: archive/src/benchmark/pytorch/sd2_768_compile.py class UNetWrap (line 24) | class UNetWrap(nn.Module): method __init__ (line 25) | def __init__(self, unet): method forward (line 29) | def forward(self, sample, timestep, encoder_hidden_states, cross_atten... class NeuronUNet (line 33) | class NeuronUNet(nn.Module): method __init__ (line 34) | def __init__(self, unetwrap): method forward (line 41) | def forward(self, sample, timestep, encoder_hidden_states, cross_atten... class NeuronTextEncoder (line 45) | class NeuronTextEncoder(nn.Module): method __init__ (line 46) | def __init__(self, text_encoder): method forward (line 53) | def forward(self, emb, attention_mask = None): function get_attention_scores (line 58) | def get_attention_scores(self, query, key, attn_mask): function custom_badbmm (line 93) | def custom_badbmm(a, b): FILE: archive/src/benchmark/pytorch/sd2_inpainting_benchmark.py function parse_argsuments (line 15) | def parse_argsuments(): class UNetWrap (line 24) | class UNetWrap(nn.Module): method __init__ (line 25) | def __init__(self, unet): method forward (line 29) | def forward(self, sample, timestep, encoder_hidden_states, cross_atten... class NeuronUNet (line 33) | class NeuronUNet(nn.Module): method __init__ (line 34) | def __init__(self, unetwrap): method forward (line 41) | def forward(self, sample, timestep, encoder_hidden_states, cross_atten... class NeuronTextEncoder (line 45) | class NeuronTextEncoder(nn.Module): method __init__ (line 46) | def __init__(self, text_encoder): method forward (line 53) | def forward(self, emb, attention_mask = None): function get_attention_scores (line 57) | def get_attention_scores(self, query, key, attn_mask): function custom_badbmm (line 91) | def custom_badbmm(a, b): function trace_vae_encoder (line 101) | def trace_vae_encoder(model_id, height, width): function trace_unet (line 122) | def trace_unet(model_id, height, width): function main (line 158) | def main(): FILE: archive/src/benchmark/pytorch/sd2_inpainting_inference.py function parse_argsuments (line 20) | def parse_argsuments(): function benchmark (line 32) | def benchmark(n_runs, test_name, model, model_inputs): class LatencyCollector (line 66) | class LatencyCollector: method __init__ (line 67) | def __init__(self): method pre_hook (line 71) | def pre_hook(self, *args): method hook (line 74) | def hook(self, *args): method percentile (line 77) | def percentile(self, percent): class UNetWrap (line 90) | class UNetWrap(nn.Module): method __init__ (line 91) | def __init__(self, unet): method forward (line 95) | def forward(self, sample, timestep, encoder_hidden_states, cross_atten... class NeuronUNet (line 99) | class NeuronUNet(nn.Module): method __init__ (line 100) | def __init__(self, unetwrap): method forward (line 107) | def forward(self, sample, timestep, encoder_hidden_states, timestep_co... class NeuronTextEncoder (line 111) | class NeuronTextEncoder(nn.Module): method __init__ (line 112) | def __init__(self, text_encoder): method forward (line 119) | def forward(self, emb, attention_mask = None): function get_attention_scores (line 123) | def get_attention_scores(self, query, key, attn_mask): function custom_badbmm (line 157) | def custom_badbmm(a, b): function main (line 163) | def main(): FILE: archive/src/benchmark/pytorch/sd_15_512_benchmark.py function benchmark (line 21) | def benchmark(n_runs, test_name, model, model_inputs): class LatencyCollector (line 55) | class LatencyCollector: method __init__ (line 56) | def __init__(self): method pre_hook (line 60) | def pre_hook(self, *args): method hook (line 63) | def hook(self, *args): method percentile (line 66) | def percentile(self, percent): class UNetWrap (line 76) | class UNetWrap(nn.Module): method __init__ (line 77) | def __init__(self, unet): method forward (line 81) | def forward(self, sample, timestep, encoder_hidden_states, cross_atten... class NeuronUNet (line 85) | class NeuronUNet(nn.Module): method __init__ (line 86) | def __init__(self, unetwrap): method forward (line 93) | def forward(self, sample, timestep, encoder_hidden_states, cross_atten... class NeuronTextEncoder (line 97) | class NeuronTextEncoder(nn.Module): method __init__ (line 98) | def __init__(self, text_encoder): method forward (line 105) | def forward(self, emb, attention_mask = None): class NeuronSafetyModelWrap (line 109) | class NeuronSafetyModelWrap(nn.Module): method __init__ (line 110) | def __init__(self, safety_model): method forward (line 114) | def forward(self, clip_inputs): FILE: archive/src/benchmark/pytorch/sd_15_512_compile.py function get_attention_scores (line 24) | def get_attention_scores(self, query, key, attn_mask): function cust_badbmm (line 59) | def cust_badbmm(a, b, scale): class UNetWrap (line 65) | class UNetWrap(nn.Module): method __init__ (line 66) | def __init__(self, unet): method forward (line 70) | def forward(self, sample, timestep, encoder_hidden_states, cross_atten... class NeuronUNet (line 74) | class NeuronUNet(nn.Module): method __init__ (line 75) | def __init__(self, unetwrap): method forward (line 82) | def forward(self, sample, timestep, encoder_hidden_states, cross_atten... class NeuronTextEncoder (line 86) | class NeuronTextEncoder(nn.Module): method __init__ (line 87) | def __init__(self, text_encoder): method forward (line 94) | def forward(self, emb, attention_mask = None): class NeuronSafetyModelWrap (line 98) | class NeuronSafetyModelWrap(nn.Module): method __init__ (line 99) | def __init__(self, safety_model): method forward (line 103) | def forward(self, clip_inputs): FILE: archive/src/benchmark/pytorch/sd_4x_upscaler_benchmark.py class UNetWrap (line 21) | class UNetWrap(nn.Module): method __init__ (line 22) | def __init__(self, unet): method forward (line 26) | def forward( class NeuronUNet (line 40) | class NeuronUNet(nn.Module): method __init__ (line 41) | def __init__(self, unetwrap): method forward (line 48) | def forward( class NeuronTextEncoder (line 66) | class NeuronTextEncoder(nn.Module): method __init__ (line 67) | def __init__(self, text_encoder): method forward (line 74) | def forward(self, emb, attention_mask=None): function benchmark (line 82) | def benchmark(n_runs, test_name, model, model_inputs): class LatencyCollector (line 116) | class LatencyCollector: method __init__ (line 117) | def __init__(self): method pre_hook (line 121) | def pre_hook(self, *args): method hook (line 124) | def hook(self, *args): method percentile (line 127) | def percentile(self, percent): FILE: archive/src/benchmark/pytorch/sd_4x_upscaler_compile.py function apply_neuron_attn_override (line 20) | def apply_neuron_attn_override( function get_attention_scores_neuron (line 40) | def get_attention_scores_neuron(self, query, key, attn_mask): function cust_badbmm (line 52) | def cust_badbmm(a, b, scale): function neuron_scaled_dot_product_attention (line 58) | def neuron_scaled_dot_product_attention( class UNetWrap (line 92) | class UNetWrap(nn.Module): method __init__ (line 93) | def __init__(self, unet): method forward (line 97) | def forward( class NeuronUNet (line 111) | class NeuronUNet(nn.Module): method __init__ (line 112) | def __init__(self, unetwrap): method forward (line 119) | def forward( class NeuronTextEncoder (line 137) | class NeuronTextEncoder(nn.Module): method __init__ (line 138) | def __init__(self, text_encoder): method forward (line 145) | def forward(self, emb, attention_mask=None): FILE: archive/src/benchmark/pytorch/sdxl_base_1024_benchmark.py function benchmark (line 22) | def benchmark(n_runs, test_name, model, model_inputs): class LatencyCollector (line 56) | class LatencyCollector: method __init__ (line 57) | def __init__(self): method pre_hook (line 61) | def pre_hook(self, *args): method hook (line 64) | def hook(self, *args): method percentile (line 67) | def percentile(self, percent): class UNetWrap (line 76) | class UNetWrap(nn.Module): method __init__ (line 77) | def __init__(self, unet): method forward (line 81) | def forward(self, sample, timestep, encoder_hidden_states, text_embeds... class NeuronUNet (line 90) | class NeuronUNet(nn.Module): method __init__ (line 91) | def __init__(self, unetwrap): method forward (line 99) | def forward(self, sample, timestep, encoder_hidden_states, added_cond_... class TextEncoderOutputWrapper (line 107) | class TextEncoderOutputWrapper(nn.Module): method __init__ (line 108) | def __init__(self, traceable_text_encoder, original_text_encoder): method forward (line 115) | def forward(self, text_input_ids, output_hidden_states=True): method __init__ (line 121) | def __init__(self, traceable_text_encoder, original_text_encoder): method forward (line 128) | def forward(self, text_input_ids, output_hidden_states=True): class TextEncoderOutputWrapper (line 120) | class TextEncoderOutputWrapper(nn.Module): method __init__ (line 108) | def __init__(self, traceable_text_encoder, original_text_encoder): method forward (line 115) | def forward(self, text_input_ids, output_hidden_states=True): method __init__ (line 121) | def __init__(self, traceable_text_encoder, original_text_encoder): method forward (line 128) | def forward(self, text_input_ids, output_hidden_states=True): class TraceableTextEncoder (line 132) | class TraceableTextEncoder(nn.Module): method __init__ (line 133) | def __init__(self, text_encoder): method forward (line 137) | def forward(self, text_input_ids): FILE: archive/src/benchmark/pytorch/sdxl_base_1024_compile.py function apply_neuron_attn_override (line 18) | def apply_neuron_attn_override( function get_attention_scores_neuron (line 41) | def get_attention_scores_neuron(self, query, key, attn_mask): function custom_badbmm (line 60) | def custom_badbmm(a, b, scale): function neuron_scaled_dot_product_attention (line 65) | def neuron_scaled_dot_product_attention( class UNetWrap (line 103) | class UNetWrap(nn.Module): method __init__ (line 104) | def __init__(self, unet): method forward (line 108) | def forward( class NeuronUNet (line 121) | class NeuronUNet(nn.Module): method __init__ (line 122) | def __init__(self, unetwrap): method forward (line 130) | def forward( class TextEncoderOutputWrapper (line 148) | class TextEncoderOutputWrapper(nn.Module): method __init__ (line 149) | def __init__(self, traceable_text_encoder, original_text_encoder): method forward (line 156) | def forward(self, text_input_ids, output_hidden_states=True): class TraceableTextEncoder (line 160) | class TraceableTextEncoder(nn.Module): method __init__ (line 161) | def __init__(self, text_encoder): method forward (line 165) | def forward(self, text_input_ids): FILE: archive/src/benchmark/pytorch/sdxl_base_and_refiner_1024_benchmark.py function benchmark (line 21) | def benchmark(n_runs, test_name, model, model_inputs): class LatencyCollector (line 55) | class LatencyCollector: method __init__ (line 56) | def __init__(self): method pre_hook (line 60) | def pre_hook(self, *args): method hook (line 63) | def hook(self, *args): method percentile (line 66) | def percentile(self, percent): class UNetWrap (line 75) | class UNetWrap(nn.Module): method __init__ (line 76) | def __init__(self, unet): method forward (line 80) | def forward(self, sample, timestep, encoder_hidden_states, text_embeds... class NeuronUNet (line 89) | class NeuronUNet(nn.Module): method __init__ (line 90) | def __init__(self, unetwrap): method forward (line 98) | def forward(self, sample, timestep, encoder_hidden_states, added_cond_... function run_refiner_and_base (line 107) | def run_refiner_and_base(base, refiner, prompt, n_steps=40, high_noise_f... FILE: archive/src/benchmark/pytorch/sdxl_base_and_refiner_1024_compile.py function get_attention_scores_neuron (line 16) | def get_attention_scores_neuron(self, query, key, attn_mask): function custom_badbmm (line 36) | def custom_badbmm(a, b, scale): class UNetWrap (line 42) | class UNetWrap(nn.Module): method __init__ (line 43) | def __init__(self, unet): method forward (line 47) | def forward(self, sample, timestep, encoder_hidden_states, text_embeds... class NeuronUNet (line 56) | class NeuronUNet(nn.Module): method __init__ (line 57) | def __init__(self, unetwrap): method forward (line 65) | def forward(self, sample, timestep, encoder_hidden_states, added_cond_... FILE: archive/src/benchmark/pytorch/unet_benchmark.py function get_batch (line 11) | def get_batch(batch_size): FILE: archive/src/benchmark/pytorch/unet_compile.py function get_batch (line 11) | def get_batch(batch_size): FILE: archive/src/benchmark/pytorch/vgg_benchmark.py function get_batch (line 12) | def get_batch(batch_size): FILE: archive/src/benchmark/pytorch/vgg_compile.py function get_batch (line 12) | def get_batch(batch_size): FILE: archive/torch-neuron/placement.py function set_neuron_cores (line 42) | def set_neuron_cores(trace: 'torch.jit.ScriptModule', start_nc: int=-1, ... function set_multicore (line 104) | def set_multicore(trace: 'torch.jit.ScriptModule'): function neuron_cores_context (line 142) | def neuron_cores_context(start_nc: int=-1, nc_count: int=-1): function multicore_context (line 208) | def multicore_context(): FILE: archive/tutorials/ssd300_demo/ssd300_detection.py function main (line 13) | def main(): FILE: archive/tutorials/ssd300_demo/ssd300_evaluation.py function get_val_dataset (line 19) | def get_val_dataset(val_annotate, val_coco_root): function main (line 26) | def main(): FILE: archive/tutorials/ssd300_demo/ssd300_evaluation_client.py function get_val_dataset (line 21) | def get_val_dataset(val_annotate, val_coco_root): function main (line 28) | def main(): FILE: archive/tutorials/ssd300_demo/ssd300_model.py function decode_jpeg_resize (line 19) | def decode_jpeg_resize(input_tensor, image_size): function preprocessor (line 35) | def preprocessor(input_tensor, image_size): function tf_Conv2d (line 43) | def tf_Conv2d(input_tensor, module, first_conv=False): function tf_BatchNorm2d (line 60) | def tf_BatchNorm2d(input_tensor, module): function tf_MaxPool2d (line 69) | def tf_MaxPool2d(input_tensor, module): function tf_Bottleneck (line 74) | def tf_Bottleneck(input_tensor, module): function tf_SequentialBottleneck (line 88) | def tf_SequentialBottleneck(tensor, seq, resnet): function tf_bbox_view (line 95) | def tf_bbox_view(detection_feed, modules, ndim): function tf_feature_extractor (line 111) | def tf_feature_extractor(input_tensor, resnet): function tf_box_predictor (line 128) | def tf_box_predictor(tensor, ssd300_torch): function tf_ssd300 (line 148) | def tf_ssd300(input_tensor, ssd300_torch): function scale_back_batch (line 155) | def scale_back_batch(bboxes_in, scores_in, scale_xy, scale_wh, dboxes_xy... function select_nms_outputs (line 180) | def select_nms_outputs(input_tensors): function postprocessor (line 184) | def postprocessor(ploc_ts, plabel_ts, bbox_scale_hw_ts, scale_xy, scale_... class DefaultBoxes (line 215) | class DefaultBoxes(object): method __init__ (line 217) | def __init__(self, fig_size, feat_size, steps, scales, aspect_ratios, method scale_xy (line 262) | def scale_xy(self): method scale_wh (line 266) | def scale_wh(self): method __call__ (line 269) | def __call__(self, order="ltrb"): function dboxes300_coco (line 274) | def dboxes300_coco(): function main (line 285) | def main(): FILE: conf.py function get_env_vars_from_gh (line 25) | def get_env_vars_from_gh(): function get_env_vars_from_rtd (line 33) | def get_env_vars_from_rtd(): function get_env_vars (line 44) | def get_env_vars(): FILE: containers/docker-example/training/mlp_train.py function main (line 22) | def main(): FILE: containers/docker-example/training/model.py class MLP (line 5) | class MLP(nn.Module): method __init__ (line 6) | def __init__(self, input_size = 28 * 28, output_size = 10, layers = [1... method forward (line 12) | def forward(self, x): FILE: nki/_ext/nki_directives.py class NKIExampleReader (line 37) | class NKIExampleReader(LiteralIncludeReader): method __init__ (line 39) | def __init__(self, filename: str, options: dict[str, Any], config: Con... method nki_example_filter (line 49) | def nki_example_filter( method read (line 86) | def read(self, location: Union[tuple[str, int], None] = None) -> tuple... class NKIExample (line 106) | class NKIExample(LiteralInclude): method run (line 116) | def run(self) -> list[Node]: function setup (line 180) | def setup(app: Sphinx) -> ExtensionMetadata: FILE: nki/api/nki/__init__.py function jit (line 6) | def jit(func=None, mode="auto", **kwargs): function simulate (line 58) | def simulate(kernel): FILE: nki/api/nki/collectives/__init__.py class NKIObject (line 6) | class NKIObject: class ReplicaGroup (line 11) | class ReplicaGroup(NKIObject): function all_gather (line 18) | def all_gather(srcs, dsts, replica_group, collective_dim): function all_reduce (line 39) | def all_reduce(srcs, dsts, replica_group, op): function all_to_all (line 57) | def all_to_all(srcs, dsts, replica_group, collective_dim): function all_to_all_v (line 74) | def all_to_all_v(srcs, dsts, replica_group, metadata_tensor, recv_counts... function collective_permute (line 93) | def collective_permute(srcs, dsts, source_target_pairs): function collective_permute_implicit (line 114) | def collective_permute_implicit(srcs_by_channel, dsts_by_channel, replic... function collective_permute_implicit_current_processing_rank_id (line 144) | def collective_permute_implicit_current_processing_rank_id(iteration_id,... function collective_permute_implicit_reduce (line 180) | def collective_permute_implicit_reduce(srcs0_by_channel, srcs1_by_channe... function rank_id (line 214) | def rank_id(): function reduce_scatter (line 221) | def reduce_scatter(srcs, dsts, replica_group, collective_dim, op): FILE: nki/api/nki/isa/__init__.py class NKIObject (line 8) | class NKIObject: class NkiValidationError (line 13) | class NkiValidationError(Exception): class VirtualRegister (line 18) | class VirtualRegister(NKIObject): class dge_mode (line 66) | class dge_mode(Enum): class dma_engine (line 79) | class dma_engine(Enum): class engine (line 90) | class engine(Enum): class matmul_perf_mode (line 109) | class matmul_perf_mode(Enum): class nc_version (line 118) | class nc_version(Enum): class oob_mode (line 129) | class oob_mode(Enum): class reduce_cmd (line 138) | class reduce_cmd(Enum): function activation (line 153) | def activation(dst, op, data, bias=None, scale=1.0, reduce_op=None, redu... function activation_reduce (line 248) | def activation_reduce(dst, op, data, reduce_op, reduce_res, bias=None, s... function affine_select (line 300) | def affine_select(dst, pattern, channel_multiplier, on_true_tile, on_fal... function bn_aggr (line 375) | def bn_aggr(dst, data, name=None): function bn_stats (line 398) | def bn_stats(dst, data, name=None): function core_barrier (line 435) | def core_barrier(data, cores, engine=engine.gpsimd, name=None): function dma_compute (line 491) | def dma_compute(dst, srcs, reduce_op, scales=None, unique_indices=True, ... function dma_copy (line 561) | def dma_copy(dst, src, oob_mode=oob_mode.error, dge_mode=dge_mode.unknow... function dma_transpose (line 697) | def dma_transpose(dst, src, axes=None, dge_mode=dge_mode.unknown, oob_mo... function dropout (line 779) | def dropout(dst, data, prob, name=None): function exponential (line 810) | def exponential(dst, src, max_value=0.0, reduce_res=None, reduce_cmd=red... function get_nc_version (line 890) | def get_nc_version(): function iota (line 899) | def iota(dst, pattern, offset=0, channel_multiplier=0, name=None): function local_gather (line 956) | def local_gather(dst, src_buffer, index, num_elem_per_idx=1, num_valid_i... function max8 (line 1010) | def max8(dst, src, name=None): function memset (line 1030) | def memset(dst, value, engine=engine.unknown, name=None): function nc_find_index8 (line 1046) | def nc_find_index8(dst, data, vals, name=None): function nc_match_replace8 (line 1070) | def nc_match_replace8(dst, data, vals, imm, dst_idx=None, name=None): function nc_matmul (line 1083) | def nc_matmul(dst, stationary, moving, is_stationary_onezero=False, is_m... function nc_matmul_mx (line 1242) | def nc_matmul_mx(dst, stationary, moving, stationary_scale, moving_scale... function nc_n_gather (line 1336) | def nc_n_gather(dst, data, indices, name=None): function nc_stream_shuffle (line 1385) | def nc_stream_shuffle(dst, src, shuffle_mask, name=None): function nc_transpose (line 1421) | def nc_transpose(dst, data, engine=engine.unknown, name=None): function nonzero_with_count (line 1462) | def nonzero_with_count(dst, src, index_offset=0, padding_val=-1, name=No... function quantize_mx (line 1563) | def quantize_mx(dst, src, dst_scale, name=None): function rand2 (line 1610) | def rand2(dst, min, max, name=None): function rand_get_state (line 1652) | def rand_get_state(dst, engine=engine.unknown, name=None): function rand_set_state (line 1683) | def rand_set_state(src_seeds, engine=engine.unknown, name=None): function range_select (line 1719) | def range_select(dst, on_true_tile, comp_op0, comp_op1, bound0, bound1, ... function reciprocal (line 1795) | def reciprocal(dst, data, name=None): function register_alloc (line 1823) | def register_alloc(x=None): function register_load (line 1866) | def register_load(dst, src): function register_move (line 1892) | def register_move(dst, src): function register_store (line 1921) | def register_store(dst, src): function rng (line 1947) | def rng(dst, engine=engine.unknown, name=None): function scalar_tensor_tensor (line 1988) | def scalar_tensor_tensor(dst, data, op0, operand0, op1, operand1, revers... function select_reduce (line 2047) | def select_reduce(dst, predicate, on_true, on_false, reduce_res=None, re... function sendrecv (line 2112) | def sendrecv(src, dst, send_to_rank, recv_from_rank, pipe_id, dma_engine... function sequence_bounds (line 2198) | def sequence_bounds(dst, segment_ids, name=None): function set_rng_seed (line 2222) | def set_rng_seed(src_seeds, name=None): function tensor_copy (line 2247) | def tensor_copy(dst, src, engine=engine.unknown, name=None): function tensor_copy_predicated (line 2275) | def tensor_copy_predicated(dst, src, predicate, reverse_pred=False, name... function tensor_partition_reduce (line 2305) | def tensor_partition_reduce(dst, op, data, name=None): function tensor_reduce (line 2314) | def tensor_reduce(dst, op, data, axis, negate=False, keepdims=False, nam... function tensor_scalar (line 2382) | def tensor_scalar(dst, data, op0, operand0, reverse0=False, op1=None, op... function tensor_scalar_cumulative (line 2439) | def tensor_scalar_cumulative(dst, src, op0, op1, imm0, imm1=None, reduce... function tensor_scalar_reduce (line 2498) | def tensor_scalar_reduce(dst, data, op0, operand0, reduce_op, reduce_res... function tensor_tensor (line 2534) | def tensor_tensor(dst, data1, data2, op, engine=engine.unknown, name=None): function tensor_tensor_scan (line 2578) | def tensor_tensor_scan(dst, data0, data1, initial, op0, op1, reverse0=Fa... FILE: nki/api/nki/language/__init__.py class MemoryRegion (line 5) | class MemoryRegion(Enum): class NKIObject (line 14) | class NKIObject: class tile_size (line 19) | class tile_size: class NkiTensor (line 39) | class NkiTensor(NKIObject): function abs (line 52) | def abs(x, dtype=None): function add (line 83) | def add(x, y, dtype=None): function affine_range (line 119) | def affine_range(start, stop=None, step=1): function all (line 149) | def all(x, axis, dtype=None): function arctan (line 167) | def arctan(x, dtype=None): function bitwise_and (line 198) | def bitwise_and(x, y, dtype=None): function bitwise_or (line 216) | def bitwise_or(x, y, dtype=None): function bitwise_xor (line 234) | def bitwise_xor(x, y, dtype=None): function broadcast_to (line 256) | def broadcast_to(x, shape, dtype=None): function ceil (line 276) | def ceil(x, dtype=None): function copy (line 311) | def copy(x, dtype=None): function cos (line 329) | def cos(x, dtype=None): function device_print (line 356) | def device_print(print_prefix, tensor): function divide (line 372) | def divide(x, y, dtype=None): function dropout (line 389) | def dropout(x, rate, dtype=None): function ds (line 404) | def ds(start, size): function dynamic_range (line 413) | def dynamic_range(start, stop=None, step=1): function empty_like (line 438) | def empty_like(x, dtype=None, buffer=None, name=''): function equal (line 455) | def equal(x, y, dtype=None): function erf (line 472) | def erf(x, dtype=None): function erf_dx (line 477) | def erf_dx(x, dtype=None): function exp (line 482) | def exp(x, dtype=None): function expand_dims (line 511) | def expand_dims(x, axis): function floor (line 560) | def floor(x, dtype=None): function fmod (line 595) | def fmod(x, y, dtype=None): function full (line 615) | def full(shape, fill_value, dtype, buffer=MemoryRegion.sbuf, name=''): function gather_flattened (line 631) | def gather_flattened(data, indices, axis=0, dtype=None): function gelu (line 663) | def gelu(x, dtype=None): function gelu_apprx_sigmoid (line 668) | def gelu_apprx_sigmoid(x, dtype=None): function gelu_apprx_sigmoid_dx (line 673) | def gelu_apprx_sigmoid_dx(x, dtype=None): function gelu_apprx_tanh (line 678) | def gelu_apprx_tanh(x, dtype=None): function gelu_dx (line 683) | def gelu_dx(x, dtype=None): function greater (line 688) | def greater(x, y, dtype=None): function greater_equal (line 705) | def greater_equal(x, y, dtype=None): function invert (line 737) | def invert(x, dtype=None): function is_hbm (line 754) | def is_hbm(buffer): function is_on_chip (line 759) | def is_on_chip(buffer): function is_psum (line 764) | def is_psum(buffer): function is_sbuf (line 769) | def is_sbuf(buffer): function left_shift (line 774) | def left_shift(x, y, dtype=None): function less (line 792) | def less(x, y, dtype=None): function less_equal (line 809) | def less_equal(x, y, dtype=None): function load (line 826) | def load(src, dtype=None): function load_transpose2d (line 839) | def load_transpose2d(src, dtype=None): function log (line 852) | def log(x, dtype=None): function logical_and (line 882) | def logical_and(x, y, dtype=None): function logical_not (line 900) | def logical_not(x, dtype=None): function logical_or (line 918) | def logical_or(x, y, dtype=None): function logical_xor (line 936) | def logical_xor(x, y, dtype=None): function matmul (line 954) | def matmul(x, y, transpose_x=False): function max (line 987) | def max(x, axis, dtype=None, keepdims=False): function maximum (line 1006) | def maximum(x, y, dtype=None): function mean (line 1041) | def mean(x, axis, dtype=None, keepdims=False): function min (line 1061) | def min(x, axis, dtype=None, keepdims=False): function minimum (line 1080) | def minimum(x, y, dtype=None): function mish (line 1115) | def mish(x, dtype=None): function mod (line 1120) | def mod(x, y, dtype=None): function multiply (line 1140) | def multiply(x, y, dtype=None): function ndarray (line 1175) | def ndarray(shape, dtype, buffer=MemoryRegion.sbuf, name='', address=None): function negative (line 1187) | def negative(x, dtype=None): function no_reorder (line 1220) | def no_reorder(): function not_equal (line 1249) | def not_equal(x, y, dtype=None): function num_programs (line 1266) | def num_programs(axes=0): function ones (line 1275) | def ones(shape, dtype, buffer=MemoryRegion.sbuf, name=''): function power (line 1292) | def power(x, y, dtype=None): function prod (line 1324) | def prod(x, axis, dtype=None, keepdims=False): function program_id (line 1343) | def program_id(axis=0): function program_ndim (line 1351) | def program_ndim(): function rand (line 1361) | def rand(shape, dtype, buffer=MemoryRegion.sbuf, name=''): function random_seed (line 1387) | def random_seed(seed): function reciprocal (line 1420) | def reciprocal(x, dtype=None): function relu (line 1451) | def relu(x, dtype=None): function right_shift (line 1456) | def right_shift(x, y, dtype=None): function rms_norm (line 1474) | def rms_norm(x, w, axis, n, epsilon=1e-06, dtype=None, compute_dtype=None): function rsqrt (line 1503) | def rsqrt(x, dtype=None): function sequential_range (line 1537) | def sequential_range(start, stop=None, step=1): function shared_identity_matrix (line 1570) | def shared_identity_matrix(n, dtype='uint8', dst=None): function sigmoid (line 1594) | def sigmoid(x, dtype=None): function sign (line 1599) | def sign(x, dtype=None): function silu (line 1630) | def silu(x, dtype=None): function silu_dx (line 1635) | def silu_dx(x, dtype=None): function sin (line 1640) | def sin(x, dtype=None): function softmax (line 1669) | def softmax(x, axis=-1, dtype=None): function softplus (line 1695) | def softplus(x, dtype=None): function sqrt (line 1700) | def sqrt(x, dtype=None): function square (line 1729) | def square(x, dtype=None): function static_range (line 1758) | def static_range(start, stop=None, step=1): function store (line 1785) | def store(dst, value): function subtract (line 1797) | def subtract(x, y, dtype=None): function sum (line 1832) | def sum(x, axis, dtype=None, keepdims=False): function tan (line 1851) | def tan(x, dtype=None): function tanh (line 1880) | def tanh(x, dtype=None): function transpose (line 1889) | def transpose(x, dtype=None): function trunc (line 1916) | def trunc(x, dtype=None): function var (line 1964) | def var(x, axis, dtype=None, keepdims=False): function where (line 1983) | def where(condition, x, y, dtype=None): function zeros (line 2030) | def zeros(shape, dtype, buffer=MemoryRegion.sbuf, name=''): function zeros_like (line 2047) | def zeros_like(x, dtype=None, buffer=None, name=''): FILE: nki/deep-dives/src/mxfp-matmul/mx_cpu_utils.py function validate_quantized_dtype (line 9) | def validate_quantized_dtype(dtype): function get_float32_exp (line 15) | def get_float32_exp(float_data): function get_mx_fp_max (line 23) | def get_mx_fp_max(mx_dtype): function get_mx_max_exp (line 35) | def get_mx_max_exp(mx_dtype): function get_p_contiguous_scale (line 47) | def get_p_contiguous_scale(hw_scale, data_p_size, p_offset=0): function quantize_mx_golden (line 61) | def quantize_mx_golden(in_tensor, out_quantized_dtype, ocp_saturation = ... function nc_matmul_mx_golden (line 112) | def nc_matmul_mx_golden(stationary_x4, moving_x4, stationary_scale, movi... function dequantize_mx_golden (line 185) | def dequantize_mx_golden(mx_data_x4, quantized_dtype, mx_scale): function generate_stabilized_mx_data (line 241) | def generate_stabilized_mx_data(quantized_dtype, shape, val_range=1.0): function pack_mx_data_into_x4 (line 318) | def pack_mx_data_into_x4(mx_data): function unpack_mx_data_from_x4 (line 345) | def unpack_mx_data_from_x4(packed_data, target_dtype): FILE: nki/deep-dives/src/mxfp-matmul/mx_kernel_utils.py function load_scales_scattered (line 18) | def load_scales_scattered(data_hbm, scale_hbm): function load_tensor_helper (line 82) | def load_tensor_helper(stationary_hbm, moving_hbm): function allocate_mx_tiles (line 104) | def allocate_mx_tiles(shape_unquantized, mx_dtype, alloc_scale: bool = T... function copy_data_strided (line 136) | def copy_data_strided(stationary_hbm, moving_hbm, use_tensor_copy: bool ... FILE: nki/deep-dives/src/mxfp-matmul/mx_kernels.py function kernel_offline_quantized_mx_matmul (line 19) | def kernel_offline_quantized_mx_matmul(stationary_mx_data, stationary_mx... function kernel_on_device_quantize_matmul_mx (line 83) | def kernel_on_device_quantize_matmul_mx(stationary_mx_data, stationary_m... function kernel_copy_strided_quantize_matmul_mx (line 148) | def kernel_copy_strided_quantize_matmul_mx(stationary_hbm, moving_hbm, m... function kernel_copy_strided_quantize_matmul_mx_packed_scale (line 210) | def kernel_copy_strided_quantize_matmul_mx_packed_scale(stationary_hbm, ... FILE: nki/deep-dives/src/mxfp-matmul/mx_toplevel.py function setup_compiler_workdir (line 24) | def setup_compiler_workdir(test_name): function compare_and_print_results (line 37) | def compare_and_print_results(res, golden, rtol=5e-2, atol=5e-2): function print_test_header (line 59) | def print_test_header(test_name): function run_offline_quantized_matmul_mx_test (line 68) | def run_offline_quantized_matmul_mx_test(quantized_dtype): function run_on_device_quantize_matmul_mx_test (line 105) | def run_on_device_quantize_matmul_mx_test(quantized_dtype_stationary, qu... function run_copy_strided_test (line 154) | def run_copy_strided_test(quantized_dtype, use_tensor_copy: bool = True,... FILE: nki/examples/average_pool2d/average_pool2d_jax.py function jax_average_pool_2D (line 15) | def jax_average_pool_2D(in_tensor, pool_size): FILE: nki/examples/average_pool2d/average_pool2d_nki_kernels.py function tensor_avgpool_kernel (line 15) | def tensor_avgpool_kernel(in_tensor, pool_size): function np_average_pool_2D (line 70) | def np_average_pool_2D(in_tensor, pool_size): FILE: nki/examples/fused_mamba/mamba_nki_kernels.py function mamba_v1 (line 18) | def mamba_v1(delta, u, A, B, C): function mamba_v2 (line 108) | def mamba_v2(delta, u, A, B, C): function mamba_v3 (line 198) | def mamba_v3(delta, u, A, B, C): function parse_args (line 306) | def parse_args(): FILE: nki/examples/fused_mamba/mamba_torch.py function associative_scan (line 18) | def associative_scan(deltaA, deltaB_u): function mamba_layer (line 37) | def mamba_layer(delta, A, B, u, C): function parse_args (line 56) | def parse_args(): FILE: nki/examples/getting_started_baremetal.py function nki_tensor_add_kernel (line 9) | def nki_tensor_add_kernel(a_input, b_input): FILE: nki/examples/getting_started_jax.py function nki_tensor_add_kernel (line 5) | def nki_tensor_add_kernel(a_input, b_input): FILE: nki/examples/getting_started_torch.py function nki_tensor_add_kernel (line 5) | def nki_tensor_add_kernel(a_input, b_input): FILE: nki/examples/index-case-1.py function tensor_split_kernel_ (line 6) | def tensor_split_kernel_(in_tensor): FILE: nki/examples/index-case-3.py function tensor_maxpool_kernel_ (line 5) | def tensor_maxpool_kernel_(in_tensor, sz_pool): FILE: nki/examples/layout-dynamic-loop.py function tensor_exp_kernel_ (line 6) | def tensor_exp_kernel_(in_tensor): FILE: nki/examples/layout-loop.py function tensor_exp_kernel_ (line 5) | def tensor_exp_kernel_(in_tensor): FILE: nki/examples/layout-pass.py function tensor_exp_kernel_ (line 5) | def tensor_exp_kernel_(in_tensor): FILE: nki/examples/layout-violation.py function tensor_exp_kernel_ (line 6) | def tensor_exp_kernel_(in_tensor): FILE: nki/examples/matrix_multiplication/matrix_multiplication_nki_kernels.py function nki_matmul_basic_ (line 16) | def nki_matmul_basic_(lhsT, rhs): function nki_matmul_tiled_ (line 74) | def nki_matmul_tiled_(lhsT, rhs): function nki_matmul_hoist_load_ (line 146) | def nki_matmul_hoist_load_(lhsT, rhs): function nki_matmul_block_free_dimension_ (line 231) | def nki_matmul_block_free_dimension_(lhsT, rhs): function nki_matmul_fully_optimized_ (line 349) | def nki_matmul_fully_optimized_( FILE: nki/examples/matrix_multiplication/matrix_multiplication_torch.py function check_match (line 45) | def check_match(nki_func): FILE: nki/examples/simulate/nki_simulate_example.py function add_kernel (line 11) | def add_kernel(a_ptr, b_ptr): FILE: nki/examples/tensor_addition/tensor_addition_nki_kernels.py function nki_tensor_add (line 16) | def nki_tensor_add(a_input, b_input): FILE: nki/examples/transpose2d/transpose2d_nki_kernels.py function tensor_transpose2D_kernel_ (line 15) | def tensor_transpose2D_kernel_(in_tensor, shape2D): FILE: nki/test/test_nki_isa_activation.py function nki_activation (line 16) | def nki_activation(a_tensor, b_tensor, c_tensor): class TestNkiIsaExamplesActivation (line 44) | class TestNkiIsaExamplesActivation(unittest.TestCase): method test_activation (line 45) | def test_activation(self): FILE: nki/test/test_nki_isa_affine_select.py function nki_affine_select (line 16) | def nki_affine_select(a_tensor): class TestNkiIsaExamplesAffineSelect (line 35) | class TestNkiIsaExamplesAffineSelect(unittest.TestCase): method test_affine_select (line 36) | def test_affine_select(self): FILE: nki/test/test_nki_isa_bn_stats.py function nki_bn_stats_bn_aggr_1 (line 18) | def nki_bn_stats_bn_aggr_1(a_tensor): function nki_bn_stats_bn_aggr_2 (line 42) | def nki_bn_stats_bn_aggr_2(b_tensor): class TestNkiIsaExamplesBnStatsBnAggr (line 78) | class TestNkiIsaExamplesBnStatsBnAggr(unittest.TestCase): method test_bn_stats_bn_aggr (line 79) | def test_bn_stats_bn_aggr(self): FILE: nki/test/test_nki_isa_copypredicated.py function nki_copy_predicated (line 18) | def nki_copy_predicated(predicate, on_true_tensor, on_false_tensor): class TestNkiIsaExamplescopy_predicated (line 45) | class TestNkiIsaExamplescopy_predicated(unittest.TestCase): method test_copy_predicated (line 46) | def test_copy_predicated(self): FILE: nki/test/test_nki_isa_dma_copy.py function nki_dma_copy (line 25) | def nki_dma_copy(a): function nki_indirect_load_oob_err (line 40) | def nki_indirect_load_oob_err(in_tensor): function nki_indirect_load_oob_error_negative (line 67) | def nki_indirect_load_oob_error_negative(in_tensor): function nki_indirect_load_oob_skip (line 96) | def nki_indirect_load_oob_skip(in_tensor): function nki_indirect_store_rmw (line 125) | def nki_indirect_store_rmw(in_tensor): function nki_indirect_store_oob_err (line 153) | def nki_indirect_store_oob_err(in_tensor): function nki_indirect_store_oob_err_negative (line 181) | def nki_indirect_store_oob_err_negative(in_tensor): function nki_indirect_store_oob_skip (line 211) | def nki_indirect_store_oob_skip(in_tensor): function nki_dma_copy_swdge (line 240) | def nki_dma_copy_swdge(in_tensor): function nki_dma_copy_hwdge (line 259) | def nki_dma_copy_hwdge(in_tensor): class TestNkiIsaExamplesTensorCopy (line 281) | class TestNkiIsaExamplesTensorCopy(unittest.TestCase): method test_tensor_copy (line 282) | def test_tensor_copy(self): method test_indirect_load_oob_err (line 291) | def test_indirect_load_oob_err(self): method test_indirect_load_oob_err_negative (line 302) | def test_indirect_load_oob_err_negative(self): method test_indirect_load_oob_skip (line 313) | def test_indirect_load_oob_skip(self): method test_indirect_store_rmw (line 327) | def test_indirect_store_rmw(self): method test_indirect_store_oob_err (line 340) | def test_indirect_store_oob_err(self): method test_indirect_store_oob_err_negative (line 353) | def test_indirect_store_oob_err_negative(self): method test_indirect_store_oob_skip (line 364) | def test_indirect_store_oob_skip(self): method test_dma_copy_swdge (line 377) | def test_dma_copy_swdge(self): method test_dma_copy_hwdge (line 383) | def test_dma_copy_hwdge(self): FILE: nki/test/test_nki_isa_dma_transpose.py function nki_dma_transpose_2d_hbm2sb (line 28) | def nki_dma_transpose_2d_hbm2sb(a): function nki_dma_transpose_2d_sb2sb (line 40) | def nki_dma_transpose_2d_sb2sb(a): function nki_dma_transpose_2d_hbm2sb_dge_xbar (line 53) | def nki_dma_transpose_2d_hbm2sb_dge_xbar(a): function nki_dma_transpose_2d_sb2sb_dge_xbar (line 65) | def nki_dma_transpose_2d_sb2sb_dge_xbar(a): function nki_dma_gather_transpose_3d_hbm2sb (line 78) | def nki_dma_gather_transpose_3d_hbm2sb(src_tensor, idx_tensor): function nki_dma_gather_transpose_3d_sb2sb (line 99) | def nki_dma_gather_transpose_3d_sb2sb(src_tensor, idx_tensor): class TestNkiIsaExamplesDmaTranspose (line 115) | class TestNkiIsaExamplesDmaTranspose(unittest.TestCase): method test_dma_transpose_2d (line 116) | def test_dma_transpose_2d(self): method test_dma_transpose_indirect (line 134) | def test_dma_transpose_indirect(self): FILE: nki/test/test_nki_isa_dropout.py function nki_dropout (line 16) | def nki_dropout(a_tensor, b_tensor): function nki_dropout_scalar (line 36) | def nki_dropout_scalar(in_tensor): class TestNkiIsaExamplesDropout (line 55) | class TestNkiIsaExamplesDropout(unittest.TestCase): method test_dropout (line 56) | def test_dropout(self): method test_dropout_scalar (line 67) | def test_dropout_scalar(self): FILE: nki/test/test_nki_isa_iota.py function nki_iota (line 17) | def nki_iota(): class TestNkiIsaExamplesIota (line 82) | class TestNkiIsaExamplesIota(unittest.TestCase): method test_iota (line 83) | def test_iota(self): FILE: nki/test/test_nki_isa_local_gather.py function nki_local_gather (line 18) | def nki_local_gather(src_buffer, index, num_elem_per_idx, num_valid_indi... class TestNkiIsaExamplesLocalGather (line 41) | class TestNkiIsaExamplesLocalGather(unittest.TestCase): method test_local_gather (line 42) | def test_local_gather(self): FILE: nki/test/test_nki_isa_max8.py function nki_max8 (line 17) | def nki_max8(): class TestNkiIsaExamplesMax8 (line 34) | class TestNkiIsaExamplesMax8(unittest.TestCase): method test_max8 (line 35) | def test_max8(self): FILE: nki/test/test_nki_isa_memset.py function nki_memset (line 22) | def nki_memset(): class TestNkiIsaExamplesMemset (line 38) | class TestNkiIsaExamplesMemset(unittest.TestCase): method test_memset (line 39) | def test_memset(self): FILE: nki/test/test_nki_isa_nc_find_index8.py function nki_max_index8 (line 17) | def nki_max_index8(): class TestNkiIsaExamplesMaxIndex8 (line 43) | class TestNkiIsaExamplesMaxIndex8(unittest.TestCase): method test_max_index8 (line 44) | def test_max_index8(self): FILE: nki/test/test_nki_isa_nc_match_replace8.py function nki_nc_match_replace8 (line 18) | def nki_nc_match_replace8(): function nki_nc_match_replace_indices8 (line 39) | def nki_nc_match_replace_indices8(in_tensor: nt.tensor, imm: np.float32): function nki_nc_match_replace_indices8_mask (line 69) | def nki_nc_match_replace_indices8_mask(in_tensor: nt.tensor, imm: np.flo... function nki_nc_match_replace_indices8_3d (line 102) | def nki_nc_match_replace_indices8_3d(data_tensor: nt.tensor): function nki_nc_match_replace_indices8_3d_inplace (line 142) | def nki_nc_match_replace_indices8_3d_inplace(data_tensor: nt.tensor): function match_and_get_index (line 179) | def match_and_get_index(data, vals): function get_replaced_output_and_max_indices (line 194) | def get_replaced_output_and_max_indices(a, imm=0): class TestNkiIsaExamplesMatchReplace8 (line 214) | class TestNkiIsaExamplesMatchReplace8(unittest.TestCase): method test_nc_match_replace8 (line 215) | def test_nc_match_replace8(self): method test_nc_match_replace_indices8 (line 230) | def test_nc_match_replace_indices8(self): method test_nc_match_replace_indices8_mask (line 241) | def test_nc_match_replace_indices8_mask(self): method test_nc_match_replace_indices8_3d (line 251) | def test_nc_match_replace_indices8_3d(self): method test_nc_match_replace_indices8_3d_inplace (line 261) | def test_nc_match_replace_indices8_3d_inplace(self): FILE: nki/test/test_nki_isa_nc_matmul.py function nki_nc_matmul (line 22) | def nki_nc_matmul(a_tensor, b_tensor, d_tensor, e_tensor, g_tensor, h_te... function nki_nc_matmul_double_row_gen3 (line 83) | def nki_nc_matmul_double_row_gen3(a_input, b_input): class TestNkiIsaExamplesNcMatmul (line 104) | class TestNkiIsaExamplesNcMatmul(unittest.TestCase): method test_nc_matmul (line 105) | def test_nc_matmul(self): method test_double_row_gen3 (line 127) | def test_double_row_gen3(self): FILE: nki/test/test_nki_isa_nc_stream_shuffle.py function nki_nc_stream_shuffle (line 18) | def nki_nc_stream_shuffle(in_tensor): function nki_nc_stream_shuffle_broadcast_partition (line 40) | def nki_nc_stream_shuffle_broadcast_partition(in_tensor): function nki_nc_stream_shuffle_broadcast_mask (line 63) | def nki_nc_stream_shuffle_broadcast_mask(in_tensor): class TestNkiIsaExamplesStreamShuffle (line 87) | class TestNkiIsaExamplesStreamShuffle(unittest.TestCase): method test_stream_shuffle (line 88) | def test_stream_shuffle(self): method test_broadcast_partition (line 94) | def test_broadcast_partition(self): method test_broadcast_mask (line 100) | def test_broadcast_mask(self): FILE: nki/test/test_nki_isa_nc_transpose.py function nki_nc_transpose (line 23) | def nki_nc_transpose(a_tensor, b_tensor): class TestNkiIsaExamplesSbTranspose (line 63) | class TestNkiIsaExamplesSbTranspose(unittest.TestCase): method test_nc_transpose (line 64) | def test_nc_transpose(self): FILE: nki/test/test_nki_isa_partition_reduce.py function nki_par_reduce (line 23) | def nki_par_reduce(a_tensor, b_tensor): function nki_par_reduce_nd_b (line 36) | def nki_par_reduce_nd_b(a_tensor, b_tensor): class TestNkiIsaExamplesPartitionReduce (line 50) | class TestNkiIsaExamplesPartitionReduce(unittest.TestCase): method test_par_reduce_nd (line 51) | def test_par_reduce_nd(self): method test_par_reduce_nd_b (line 58) | def test_par_reduce_nd_b(self): FILE: nki/test/test_nki_isa_range_select.py function nki_range_select_example (line 19) | def nki_range_select_example(on_true, bound0, bound1, compare_op0, compa... function nki_range_select_chaining (line 59) | def nki_range_select_chaining(on_true, bound0, bound1, compare_op0, comp... class TestNkiIsaExamplesRangeSelect (line 150) | class TestNkiIsaExamplesRangeSelect(unittest.TestCase): method test_range_select_example (line 151) | def test_range_select_example(self): method test_range_select_chaining (line 177) | def test_range_select_chaining(self): FILE: nki/test/test_nki_isa_reciprocal.py function reciprocal_kernel (line 22) | def reciprocal_kernel(in_tensor): class TestNkiExampleNisaReciprocal (line 35) | class TestNkiExampleNisaReciprocal(unittest.TestCase): method test_nisa_reciprocal (line 36) | def test_nisa_reciprocal(self): FILE: nki/test/test_nki_isa_reduce.py function nki_reduce (line 22) | def nki_reduce(a_tensor): class TestNkiIsaExamplesReduce (line 46) | class TestNkiIsaExamplesReduce(unittest.TestCase): method test_reduce (line 47) | def test_reduce(self): FILE: nki/test/test_nki_isa_select_reduce.py function nki_select_reduce_basic (line 16) | def nki_select_reduce_basic(predicate_data, on_true_data): function nki_select_reduce_with_reduction (line 49) | def nki_select_reduce_with_reduction(predicate_data, on_true_data, on_fa... function nki_select_reduce_reverse_pred (line 90) | def nki_select_reduce_reverse_pred(predicate_data, on_true_data): class TestNkiIsaExamplesSelectReduce (line 123) | class TestNkiIsaExamplesSelectReduce(unittest.TestCase): method test_select_reduce_basic (line 124) | def test_select_reduce_basic(self): method test_select_reduce_with_reduction (line 141) | def test_select_reduce_with_reduction(self): method test_select_reduce_reverse_pred (line 158) | def test_select_reduce_reverse_pred(self): FILE: nki/test/test_nki_isa_sequence_bounds.py function nki_sequence_bounds (line 17) | def nki_sequence_bounds(segment_ids): class TestNkiIsaExamplesSequenceBounds (line 44) | class TestNkiIsaExamplesSequenceBounds(unittest.TestCase): method test_sequence_bounds (line 45) | def test_sequence_bounds(self): FILE: nki/test/test_nki_isa_tensor_copy.py function nki_tensor_copy (line 24) | def nki_tensor_copy(in_tensor): class TestNkiIsaExamplesTensorCopy (line 40) | class TestNkiIsaExamplesTensorCopy(unittest.TestCase): method test_tensor_copy (line 41) | def test_tensor_copy(self): FILE: nki/test/test_nki_isa_tensor_scalar.py function nki_tensor_scalar (line 22) | def nki_tensor_scalar(a_tensor, c_tensor, e_tensor, f_tensor): class TestNkiIsaExamplesTensorScalar (line 79) | class TestNkiIsaExamplesTensorScalar(unittest.TestCase): method test_tensor_scalar (line 80) | def test_tensor_scalar(self): FILE: nki/test/test_nki_isa_tensor_scalar_cumulative.py function nki_tensor_scalar_cumulative_scalar (line 15) | def nki_tensor_scalar_cumulative_scalar( function nki_tensor_scalar_cumulative_vector (line 54) | def nki_tensor_scalar_cumulative_vector( function nki_tensor_scalar_cumulative_chain (line 95) | def nki_tensor_scalar_cumulative_chain( function nki_tensor_scan (line 145) | def nki_tensor_scan(src_data, op, initial): class TestNkiIsaExamplesTensorScalarCumulative (line 176) | class TestNkiIsaExamplesTensorScalarCumulative(unittest.TestCase): method test_tensor_scalar_cumulative_scalar1 (line 178) | def test_tensor_scalar_cumulative_scalar1(self): method test_tensor_scalar_cumulative_scalar2 (line 192) | def test_tensor_scalar_cumulative_scalar2(self): method test_tensor_scalar_cumulative_vector1 (line 206) | def test_tensor_scalar_cumulative_vector1(self): method test_tensor_scalar_cumulative_vector2 (line 221) | def test_tensor_scalar_cumulative_vector2(self): method test_tensor_scalar_cumulative_vector3 (line 236) | def test_tensor_scalar_cumulative_vector3(self): method test_tensor_scalar_cumulative_load_reduce1 (line 251) | def test_tensor_scalar_cumulative_load_reduce1(self): method test_tensor_scalar_cumulative_load_reduce2 (line 273) | def test_tensor_scalar_cumulative_load_reduce2(self): method test_tensor_scalar_cumulative_load_reduce3 (line 295) | def test_tensor_scalar_cumulative_load_reduce3(self): method test_tensor_scalar_cumulative_chain1 (line 317) | def test_tensor_scalar_cumulative_chain1(self): method test_tensor_scalar_cumulative_chain2 (line 336) | def test_tensor_scalar_cumulative_chain2(self): method test_tensor_scan (line 356) | def test_tensor_scan(self): FILE: nki/test/test_nki_isa_tensor_tensor.py function nki_tensor_tensor (line 23) | def nki_tensor_tensor(a_tensor, b_tensor): class TestNkiIsaExamplesTensorTensor (line 43) | class TestNkiIsaExamplesTensorTensor(unittest.TestCase): method test_tensor_tensor (line 44) | def test_tensor_tensor(self): FILE: nki/test/test_nki_isa_tensor_tensor_scan.py function nki_tensor_tensor_scan (line 23) | def nki_tensor_tensor_scan(a_tensor, b_tensor): class TestNkiIsaExamplesTensorTensorScan (line 49) | class TestNkiIsaExamplesTensorTensorScan(unittest.TestCase): method test_tensor_tensor_scan (line 50) | def test_tensor_tensor_scan(self): FILE: nki/test/test_nki_mask.py function nki_mask (line 22) | def nki_mask(in_tensor): class TestNkiIsaExamplesMask (line 41) | class TestNkiIsaExamplesMask(unittest.TestCase): method test_mask (line 42) | def test_mask(self): FILE: nki/test/test_nki_memory_semantics.py function simple_demo_kernel (line 8) | def simple_demo_kernel(a_ptr): class TestNkiMemorySemantics (line 24) | class TestNkiMemorySemantics(unittest.TestCase): method test_simulate_kernel (line 25) | def test_simulate_kernel(self): FILE: nki/test/test_nki_nl_add.py function add_tensors (line 20) | def add_tensors(a_tensor, b_tensor): function add_tensor_scalar (line 34) | def add_tensor_scalar(a_tensor): function add_broadcast_free_dim (line 48) | def add_broadcast_free_dim(a_tensor, b_tensor): function add_broadcast_par_dim (line 62) | def add_broadcast_par_dim(a_tensor, b_tensor): function add_broadcast_both_dims (line 76) | def add_broadcast_both_dims(a_tensor, b_tensor): function add_broadcast_each_dims (line 90) | def add_broadcast_each_dims(a_tensor, b_tensor): class TestNkiNlExampleAdd (line 103) | class TestNkiNlExampleAdd(unittest.TestCase): method test_add (line 104) | def test_add(self): method test_add_tensor_scalar (line 114) | def test_add_tensor_scalar(self): method test_add_broadcast_free_dim (line 124) | def test_add_broadcast_free_dim(self): method test_add_broadcast_par_dim (line 134) | def test_add_broadcast_par_dim(self): method test_add_broadcast_both_dims (line 144) | def test_add_broadcast_both_dims(self): method test_add_broadcast_each_dims (line 154) | def test_add_broadcast_each_dims(self): FILE: nki/test/test_nki_nl_atomic_rmw.py function atomic_rmw_indirect_indices (line 22) | def atomic_rmw_indirect_indices(in_tensor, indices_tensor, value_tensor): class TestNkiExampleNlLoad (line 54) | class TestNkiExampleNlLoad(unittest.TestCase): method test_atomic_rmw_indirect_indices (line 55) | def test_atomic_rmw_indirect_indices(self): FILE: nki/test/test_nki_nl_broadcast.py function test_nl_broadcast (line 20) | def test_nl_broadcast(in_tensor): class TestNkiExampleNlBroadcast (line 43) | class TestNkiExampleNlBroadcast(unittest.TestCase): method test_nl_broadcast_to (line 44) | def test_nl_broadcast_to(self): FILE: nki/test/test_nki_nl_dslice.py function example_kernel (line 17) | def example_kernel(in_tensor): class TestNkiExampleNlLoad (line 32) | class TestNkiExampleNlLoad(unittest.TestCase): method test_nl_load (line 33) | def test_nl_load(self): FILE: nki/test/test_nki_nl_gather_flattened.py function nki_gather_flattened (line 16) | def nki_gather_flattened(): class TestNkiExamplesGather (line 47) | class TestNkiExamplesGather(unittest.TestCase): method test_gather_flattened (line 48) | def test_gather_flattened(self): FILE: nki/test/test_nki_nl_load_store.py function example_kernel (line 22) | def example_kernel(in_tensor, use_scalar=False): function example_load_store_b (line 49) | def example_load_store_b(in_tensor): class TestNkiExampleNlLoad (line 71) | class TestNkiExampleNlLoad(unittest.TestCase): method test_nl_load (line 72) | def test_nl_load(self): method test_nl_load_scalar (line 78) | def test_nl_load_scalar(self): method test_load_store_3d (line 84) | def test_load_store_3d(self): FILE: nki/test/test_nki_nl_load_store_indirect.py function example_indirect_load_1 (line 24) | def example_indirect_load_1(data_tensor, idx_tensor): function example_indirect_load_2 (line 48) | def example_indirect_load_2(data_tensor): function example_indirect_save_1 (line 74) | def example_indirect_save_1(in_tensor, idx_tensor): function example_indirect_save_2 (line 98) | def example_indirect_save_2(in_tensor): class TestNkiExampleNlLoadStoreIndirect (line 122) | class TestNkiExampleNlLoadStoreIndirect(unittest.TestCase): method test_indirect_load_1 (line 123) | def test_indirect_load_1(self): method test_indirect_load_2 (line 131) | def test_indirect_load_2(self): method test_indirect_save_1 (line 139) | def test_indirect_save_1(self): method test_indirect_save_2 (line 146) | def test_indirect_save_2(self): FILE: nki/test/test_nki_nl_load_transpose2d.py function example_kernel_0 (line 23) | def example_kernel_0(in_tensor): function example_kernel_1 (line 38) | def example_kernel_1(in_tensor): class TestNkiExampleNlLoadTranspose2d (line 57) | class TestNkiExampleNlLoadTranspose2d(unittest.TestCase): method test_dma_transpose_load_0 (line 58) | def test_dma_transpose_load_0(self): method test_dma_transpose_load_1 (line 67) | def test_dma_transpose_load_1(self): FILE: nki/test/test_nki_nl_mgrid.py function example_kernel (line 22) | def example_kernel(in_tensor): function example_kernel_1 (line 36) | def example_kernel_1(in_tensor): class TestNkiExampleNlLoad (line 48) | class TestNkiExampleNlLoad(unittest.TestCase): method test_nl_load (line 49) | def test_nl_load(self): method test_nl_load_1 (line 56) | def test_nl_load_1(self): FILE: nki/test/test_nki_simulate_kernel.py function print_kernel (line 14) | def print_kernel(a_tensor): class TestNkiIsaExamplesSimulateKernel (line 30) | class TestNkiIsaExamplesSimulateKernel(unittest.TestCase): method test_simulate_kernel (line 31) | def test_simulate_kernel(self): FILE: nki/test/test_nki_spmd_grid.py function nki_spmd_kernel (line 15) | def nki_spmd_kernel(a): class TestNkiIsaExamplesTensorCopy (line 31) | class TestNkiIsaExamplesTensorCopy(unittest.TestCase): method test_spmd_grid (line 32) | def test_spmd_grid(self): FILE: nki/test/test_psum_modulo_alloc.py function num_elems (line 7) | def num_elems(shape): function linearize (line 10) | def linearize(shape, indices): function modulo_allocate_func (line 13) | def modulo_allocate_func(base, allocate_shape, scale): function mod_alloc (line 24) | def mod_alloc(base_addr: int, *, function allocated_loop_transpose (line 48) | def allocated_loop_transpose(a_ptr, tp_ptr): class TestNkiPSUMModuloAllocation (line 84) | class TestNkiPSUMModuloAllocation(unittest.TestCase): method test_simulate_kernel (line 85) | def test_simulate_kernel(self): FILE: nki/test/test_sbuf_modulo_alloc.py function num_elms (line 7) | def num_elms(shape): function linearize (line 10) | def linearize(shape, indices): function modulo_allocate_func (line 13) | def modulo_allocate_func(base, allocate_shape, scale): function mod_alloc (line 24) | def mod_alloc(base_addr: int, *, function allocated_loop_transpose (line 45) | def allocated_loop_transpose(a_ptr, tp_ptr): class TestNkiSBUFModuloAllocation (line 82) | class TestNkiSBUFModuloAllocation(unittest.TestCase): method test_simulate_kernel (line 83) | def test_simulate_kernel(self): FILE: src/benchmark/tensorflow/distilbert-base-uncased-finetuned-sst-2-english_benchmark.py function get_batch (line 17) | def get_batch(tokenizer, sequence_length, batch_size): FILE: src/benchmark/tensorflow/distilbert-base-uncased-finetuned-sst-2-english_compile.py function get_batch (line 17) | def get_batch(tokenizer, sequence_length, batch_size): FILE: src/examples/mxnet/data_parallel/benchmark_utils.py class Results (line 6) | class Results(): method __init__ (line 8) | def __init__(self, batch_size, num_cores=1): method add_result (line 15) | def add_result(self, latency_array, end_times, start_times): method report (line 20) | def report(self, f, window_size=1): FILE: src/examples/mxnet/data_parallel/parallel.py function consumer (line 9) | def consumer(model_file, sample_input, input_queue, result_queue): class NeuronSimpleDataParallel (line 35) | class NeuronSimpleDataParallel: method __init__ (line 36) | def __init__(self, model_file, num_neuron_cores, sample_input): method start_continuous_inference (line 60) | def start_continuous_inference(self): method warmup (line 64) | def warmup(self, batch): method infer (line 67) | def infer(self, batch): method stop (line 72) | def stop(self): method add_result (line 76) | def add_result(self, callback_fn): method add_all_results (line 83) | def add_all_results(self, callback_fn): FILE: src/examples/pytorch/bert_tutorial/bert_benchmark_utils.py class BertTestDataset (line 11) | class BertTestDataset(torch.utils.data.Dataset): method __init__ (line 14) | def __init__(self, tsv_file, tokenizer, max_length=128, transform=None): method __len__ (line 33) | def __len__(self): method __getitem__ (line 36) | def __getitem__(self, idx): class BertResults (line 61) | class BertResults(): method __init__ (line 63) | def __init__(self, batch_size, num_cores=1): method add_result (line 72) | def add_result(self, correct_count, inference_count, latency_array, en... method report (line 80) | def report(self, f, window_size=1): FILE: src/examples/pytorch/bert_tutorial/parallel.py function consumer (line 9) | def consumer(model, input_queue): class NeuronSimpleDataParallel (line 25) | class NeuronSimpleDataParallel(): method __init__ (line 27) | def __init__(self, model_file, num_neuron_cores, batch_size=1): method eval (line 43) | def eval(self): method train (line 47) | def train(self): method start_continuous_inference (line 51) | def start_continuous_inference(self): method infer (line 55) | def infer(self, batch, input_id, callback_fn): method stop (line 58) | def stop(self): FILE: src/examples/pytorch/byoc_sm_bert_tutorial/code/inference.py function model_fn (line 11) | def model_fn(model_dir): function input_fn (line 20) | def input_fn(serialized_input_data, content_type=JSON_CONTENT_TYPE): function predict_fn (line 31) | def predict_fn(input_data, models): function output_fn (line 51) | def output_fn(prediction_output, accept=JSON_CONTENT_TYPE): FILE: src/examples/pytorch/libtorch_demo/bert_neuronx/detect_instance.py function get_instance_type (line 18) | def get_instance_type() -> str: function get_num_neuroncores (line 38) | def get_num_neuroncores(instance_type: Optional[str] = None) -> int: function get_num_neuroncores_v3 (line 59) | def get_num_neuroncores_v3() -> int: FILE: src/examples/pytorch/libtorch_demo/example_app/example_app.cpp function Input (line 27) | Input get_input(const std::string& sentence_1, const std::string& senten... function get_batch (line 73) | std::vector get_batch(const std::vector& inputs) function sanity_check (line 92) | int sanity_check(const std::string& model_filename) function benchmark (line 142) | void benchmark(const std::string& model_filename, const std::vector tuple: method worker_thread (line 199) | def worker_thread(self, worker_id): method run (line 226) | def run(self): method stop (line 267) | def stop(self): method results (line 279) | def results(self) -> dict: class StatsThread (line 306) | class StatsThread(threading.Thread): method __init__ (line 309) | def __init__(self, interval: float): method run (line 316) | def run(self): method join (line 323) | def join(self, **kwargs): function _combine_results (line 328) | def _combine_results(results: List[dict]) -> dict: function _get_num_workers (line 351) | def _get_num_workers(pipeline_size: int) -> int: function get_instance_type (line 356) | def get_instance_type() -> str: function _get_cost_per_hour (line 371) | def _get_cost_per_hour(instance_type: str) -> float: function _get_max_neuroncores (line 386) | def _get_max_neuroncores(instance_type: str = None) -> int: function _get_num_gpus (line 404) | def _get_num_gpus(instance_type: str = None) -> int: function _get_num_devices (line 430) | def _get_num_devices(device_type: str, instance_type: str = None) -> int: function _sanitize_inputs (line 443) | def _sanitize_inputs(inputs, batch_sizes: Union[int, List[int]], dataset... function set_verbosity (line 474) | def set_verbosity(verbosity: int): function compile (line 488) | def compile( function run_benchmarker (line 673) | def run_benchmarker(benchmarker, duration, pipe=None): function _run_benchmarker_new_interpreter (line 703) | def _run_benchmarker_new_interpreter(benchmarker, duration): function _run_benchmarkers_multiprocess (line 797) | def _run_benchmarkers_multiprocess( function _run_benchmarkers_multithreaded (line 838) | def _run_benchmarkers_multithreaded( function run_benchmarkers (line 857) | def run_benchmarkers( function _get_env_setup_fn (line 896) | def _get_env_setup_fn(benchmarker_id: int, benchmarker_config: dict, env... function _get_setup_fn (line 958) | def _get_setup_fn(benchmarker_id: int, benchmarker_config: dict, setup_fn): function _get_device_id (line 970) | def _get_device_id(benchmarker_id: int, benchmarker_config: dict): function benchmark (line 987) | def benchmark( FILE: src/neuronperf/src/neuronperf/cpu/cpu.py class DummyModel (line 18) | class DummyModel: method __call__ (line 19) | def __call__(self, x): function benchmark (line 25) | def benchmark(model_class, inputs, *args, **kwargs): FILE: src/neuronperf/src/neuronperf/logging.py function _get_stream_handlers (line 15) | def _get_stream_handlers(level = logging.DEBUG): FILE: src/neuronperf/src/neuronperf/model_index.py function generate_id (line 31) | def generate_id(length: int = 8): function generate_name (line 38) | def generate_name(model_name: str): function _create (line 43) | def _create(model_name: str, compile_info: list) -> dict: function create (line 56) | def create( function delete (line 93) | def delete(filename: str): function copy (line 117) | def copy(old_index: Union[str, dict], new_index: str, new_dir: str) -> str: function move (line 138) | def move(old_index: str, new_index: str, new_dir: str) -> str: function _sanitize (line 145) | def _sanitize(*model_indexes: Union[str, dict]) -> List[dict]: function append (line 176) | def append(*model_indexes: Union[str, dict]) -> dict: function save (line 216) | def save(model_index: dict, filename: str = None, root_dir=None) -> str: function load (line 233) | def load(filename) -> dict: function filter_configs (line 262) | def filter_configs(configs, filter_name, filter_values) -> List: function filter (line 276) | def filter(index: Union[str, dict], **kwargs) -> dict: FILE: src/neuronperf/src/neuronperf/mxnet/mxnet.py class _MXNetModelWrapper (line 28) | class _MXNetModelWrapper: method __init__ (line 29) | def __init__(self, device_id, sym, args, aux): method __call__ (line 38) | def __call__(self, inputs): function change_dir (line 66) | def change_dir(new_dir): function _load_fn (line 75) | def _load_fn(model_filename, **kwargs): function _compile_fn (line 81) | def _compile_fn(model, example_inputs, models_dir, model_name, **kwargs): function compile (line 100) | def compile(model, inputs, *args, **kwargs): function benchmark (line 104) | def benchmark(model_filename, inputs, *args, **kwargs): FILE: src/neuronperf/src/neuronperf/reporting.py function _validate_config (line 84) | def _validate_config(config): function _validate_results (line 95) | def _validate_results(results): function _get_report_name (line 106) | def _get_report_name(model_name: str) -> str: function get_report (line 110) | def get_report( function get_reports (line 243) | def get_reports(results, cost_per_hour: float = None) -> List[dict]: function print_reports (line 267) | def print_reports(reports: List[dict], cols=PRINT_COLS, sort_by="through... function write_csv (line 298) | def write_csv(reports: List[dict], filename: str = None, cols=CSV_COLS): function write_json (line 325) | def write_json(reports: List[dict], filename: str = None): FILE: src/neuronperf/src/neuronperf/scripts/run_benchmark_file.py function main (line 6) | def main(): FILE: src/neuronperf/src/neuronperf/tensorflow/tensorflow.py function _load_fn (line 22) | def _load_fn(model_file, **kwargs): function _compile_fn (line 34) | def _compile_fn(model, inputs, models_dir, model_name, **kwargs): function compile (line 64) | def compile(model, inputs, *args, **kwargs): function benchmark (line 68) | def benchmark(model_filename, inputs, *args, **kwargs): FILE: src/neuronperf/src/neuronperf/timing.py function timestamp_convert (line 29) | def timestamp_convert(timestamps, class Timer (line 47) | class Timer(): method __init__ (line 48) | def __init__(self, method __enter__ (line 56) | def __enter__(self): method __exit__ (line 59) | def __exit__(self, type, value, traceback): method __delitem__ (line 62) | def __delitem__(self, index): method __getitem__ (line 66) | def __getitem__(self, index): method __iter__ (line 72) | def __iter__(self): method __len__ (line 75) | def __len__(self): method __str__ (line 78) | def __str__(self): method start (line 81) | def start(self): method stop (line 87) | def stop(self): method next (line 92) | def next(self): method reset (line 97) | def reset(self): method insert (line 101) | def insert(self, timestamps: tuple, time_unit: str): method start_timestamps (line 112) | def start_timestamps(self, time_unit: str = None): method end_timestamps (line 116) | def end_timestamps(self, time_unit: str = None): method timestamps (line 120) | def timestamps(self, time_unit: str = None): method durations (line 128) | def durations(self, time_unit: str = None): method total_duration (line 137) | def total_duration(self, time_unit: str = None): method avg (line 147) | def avg(self, time_unit: str = None): FILE: src/neuronperf/src/neuronperf/torch/torch.py function _compile_fn (line 24) | def _compile_fn(model, example_inputs, models_dir, model_name, **kwargs): function _load_fn (line 45) | def _load_fn(model_filename, **kwargs): function _class_load_fn (line 53) | def _class_load_fn(model_class, **kwargs): function compile (line 59) | def compile(model, inputs, *args, **kwargs): function _get_dataset_loader_fn (line 64) | def _get_dataset_loader_fn(dataset, loop): function benchmark (line 100) | def benchmark(model_filename, inputs, *args, dataset_inputs=False, loop_... FILE: src/neuronperf/test/test_neuronperf.py function test_timer (line 16) | def test_timer(): function test_timestamp_convert (line 52) | def test_timestamp_convert(): function test_model_index_create_from_file (line 65) | def test_model_index_create_from_file(): function test_model_index_create_delete_save_load (line 75) | def test_model_index_create_delete_save_load(): function test_model_index_copy (line 96) | def test_model_index_copy(): function test_model_index_copy_2 (line 125) | def test_model_index_copy_2(): function test_model_index_move (line 154) | def test_model_index_move(): function test_model_index_append (line 181) | def test_model_index_append(): function test_model_index_filter (line 198) | def test_model_index_filter(): function test_benchmarker (line 219) | def test_benchmarker(): function test_benchmark_multithread (line 234) | def test_benchmark_multithread(): function test_benchmark_multithread_2 (line 264) | def test_benchmark_multithread_2(): function test_benchmark_multiprocess (line 288) | def test_benchmark_multiprocess(): function test_benchmark_multiinterpreter (line 315) | def test_benchmark_multiinterpreter(): function test_reporting (line 337) | def test_reporting():