SYMBOL INDEX (3167 symbols across 352 files)

FILE: android/MLCChat/bundle_weight.py
  function main (line 12) | def main(apk_path: Path, package_output_path: Path):
  function _parse_apk_path (line 42) | def _parse_apk_path(path: str) -> Path:

FILE: android/MLCEngineExample/bundle_weight.py
  function main (line 12) | def main(apk_path: Path, package_output_path: Path):
  function _parse_apk_path (line 42) | def _parse_apk_path(path: str) -> Path:

FILE: android/mlc4j/prepare_libs.py
  function run_cmake (line 16) | def run_cmake(mlc4j_path: Path):
  function run_cmake_build (line 56) | def run_cmake_build():
  function run_cmake_install (line 71) | def run_cmake_install():
  function main (line 86) | def main(mlc_llm_source_dir: Path):

FILE: android/mlc4j/src/cpp/tvm_runtime.h
  function namespace (line 49) | namespace tvm {

FILE: android/mlc4j/src/main/java/ai/mlc/mlcllm/JSONFFIEngine.java
  class JSONFFIEngine (line 9) | public class JSONFFIEngine {
    method JSONFFIEngine (line 23) | public JSONFFIEngine() {
    method initBackgroundEngine (line 39) | public void initBackgroundEngine(KotlinFunction callback) {
    method reload (line 55) | public void reload(String engineConfigJSONStr) {
    method chatCompletion (line 59) | public void chatCompletion(String requestJSONStr, String requestId) {
    method runBackgroundLoop (line 63) | public void runBackgroundLoop() {
    method runBackgroundStreamBackLoop (line 67) | public void runBackgroundStreamBackLoop() {
    method exitBackgroundLoop (line 71) | public void exitBackgroundLoop() {
    method unload (line 75) | public void unload() {
    type KotlinFunction (line 79) | public interface KotlinFunction {
      method invoke (line 80) | void invoke(String arg);
    method reset (line 83) | public void reset() {

FILE: cpp/json_ffi/conv_template.cc
  type mlc (line 8) | namespace mlc {
    type llm (line 9) | namespace llm {
      type json_ffi (line 10) | namespace json_ffi {
        function ModelVisionConfig (line 16) | ModelVisionConfig ModelVisionConfig::FromJSON(const tvm::ffi::json...
        function ModelConfig (line 85) | ModelConfig ModelConfig::FromJSON(const tvm::ffi::json::Object& js...
        function MessagePlaceholders (line 147) | MessagePlaceholders MessagePlaceholderFromString(const std::string...
        function TryGetFunctionCallingString (line 193) | Result<std::optional<std::string>> TryGetFunctionCallingString(
        function CreatePrompt (line 224) | Result<std::vector<Data>> CreatePrompt(const Conversation& conv,

FILE: cpp/json_ffi/conv_template.h
  function namespace (line 21) | namespace llm {

FILE: cpp/json_ffi/image_utils.cc
  type mlc (line 9) | namespace mlc {
    type llm (line 10) | namespace llm {
      type json_ffi (line 11) | namespace json_ffi {
        class MemoryBufferStream (line 15) | class MemoryBufferStream : public tvm::support::Stream {
          method MemoryBufferStream (line 20) | MemoryBufferStream(const char* data, size_t size) : data_(data),...
          method Read (line 22) | size_t Read(void* ptr, size_t size) override {
          method Write (line 35) | size_t Write(const void* ptr, size_t size) override {
        function Base64DecodedSize (line 46) | size_t Base64DecodedSize(const std::string& base64_str) {
        function LoadImageFromBase64 (line 58) | Result<Tensor> LoadImageFromBase64(const std::string& base64_str) {
        function Tensor (line 78) | Tensor ClipPreprocessor(Tensor image_data, int target_size, DLDevi...

FILE: cpp/json_ffi/image_utils.h
  function namespace (line 16) | namespace mlc {

FILE: cpp/json_ffi/json_ffi_engine.cc
  type mlc (line 15) | namespace mlc {
    type llm (line 16) | namespace llm {
      type json_ffi (line 17) | namespace json_ffi {
        class JSONFFIEngineImpl (line 154) | class JSONFFIEngineImpl : public JSONFFIEngine, public ffi::Module...
          method InitBackgroundEngine (line 170) | void InitBackgroundEngine(int device_type, int device_id,
          method Reload (line 189) | void Reload(String engine_config_json_str) {
          method Unload (line 209) | void Unload() { this->engine_->Unload(); }
          method Reset (line 211) | void Reset() { this->engine_->Reset(); }
          method RunBackgroundLoop (line 213) | void RunBackgroundLoop() { this->engine_->RunBackgroundLoop(); }
          method RunBackgroundStreamBackLoop (line 215) | void RunBackgroundStreamBackLoop() { this->engine_->RunBackgroun...
          method String (line 217) | String GetResponseFromStreamOutput(Array<RequestStreamOutput> de...
        function TVM_FFI_STATIC_INIT_BLOCK (line 299) | TVM_FFI_STATIC_INIT_BLOCK() {

FILE: cpp/json_ffi/json_ffi_engine.h
  function namespace (line 16) | namespace mlc {

FILE: cpp/json_ffi/openai_api_protocol.cc
  type mlc (line 10) | namespace mlc {
    type llm (line 11) | namespace llm {
      type json_ffi (line 12) | namespace json_ffi {

FILE: cpp/json_ffi/openai_api_protocol.h
  function namespace (line 22) | namespace llm {

FILE: cpp/metadata/model.cc
  type mlc (line 7) | namespace mlc {
    type llm (line 8) | namespace llm {
      function ModelMetadata (line 76) | ModelMetadata ModelMetadata::FromJSON(const tvm::ffi::json::Object& ...
      function ModelMetadata (line 139) | ModelMetadata ModelMetadata::FromModule(Module module, const tvm::ff...

FILE: cpp/metadata/model.h
  function namespace (line 18) | namespace llm {

FILE: cpp/multi_gpu/builtin.cc
  type mlc (line 18) | namespace mlc {
    type llm (line 19) | namespace llm {
      type multi_gpu (line 20) | namespace multi_gpu {
        function ObjectRef (line 28) | ObjectRef DispatchFunctionByGroup(tvm::ffi::AnyView vm_arg,
        function ObjectRef (line 59) | ObjectRef SendFromLastGroupToWorker0(Tensor send, Optional<Tensor>...
        function TVM_FFI_STATIC_INIT_BLOCK (line 90) | TVM_FFI_STATIC_INIT_BLOCK() {

FILE: cpp/multi_gpu/multi_gpu_loader.cc
  type mlc (line 29) | namespace mlc {
    type llm (line 30) | namespace llm {
      type multi_gpu (line 31) | namespace multi_gpu {
        class RangeTimer (line 42) | class RangeTimer {
          method RangeTimer (line 44) | explicit RangeTimer(DurationType* result)
        class PreprocessorPool (line 59) | class PreprocessorPool {
          method PreprocessorPool (line 61) | explicit PreprocessorPool(const ModelMetadata& model_metadata, M...
          method Tensor (line 79) | Tensor Apply(Tensor param, const ModelMetadata::Param& param_inf...
        type ParamInfo (line 96) | struct ParamInfo {
        function Tensor (line 101) | Tensor RecvFromGlobalWorker0(Device device, const ModelMetadata::P...
        function Tensor (line 108) | Tensor BroadcastOrShardAndScatter(Tensor param, const ModelMetadat...
        function Tensor (line 127) | Tensor ReceiveBroadcastedOrSharded(Device device, const ModelMetad...
        function FormatDuration (line 143) | std::string FormatDuration(DurationType duration) {
        function LoadMultiGPU (line 150) | Array<Optional<Tensor>> LoadMultiGPU(const std::string& model_path...
        function LoadMultiGPUPresharded (line 250) | Array<Optional<Tensor>> LoadMultiGPUPresharded(const std::string& ...
        function TVM_FFI_STATIC_INIT_BLOCK (line 314) | TVM_FFI_STATIC_INIT_BLOCK() {

FILE: cpp/serve/config.cc
  type mlc (line 18) | namespace mlc {
    type llm (line 19) | namespace llm {
      type serve (line 20) | namespace serve {
        function TVM_FFI_STATIC_INIT_BLOCK (line 22) | TVM_FFI_STATIC_INIT_BLOCK() {
        function TotalDetectGlobalMemory (line 27) | uint64_t TotalDetectGlobalMemory(DLDevice device) {
        function GenerationConfig (line 365) | GenerationConfig GenerationConfig::GetDefaultFromModelConfig(
        function EngineConfig (line 423) | EngineConfig EngineConfig::FromJSONAndInferredConfig(
        function String (line 511) | String EngineConfigNode::AsJSONString() const {
        type ModelConfigLimits (line 550) | struct ModelConfigLimits {
        function BytesToMegabytesString (line 560) | inline std::string BytesToMegabytesString(double bytes) {
        function GetModelConfigLimits (line 570) | Result<ModelConfigLimits> GetModelConfigLimits(
        type MemUsageEstimationResult (line 646) | struct MemUsageEstimationResult {
        function EstimateMemoryUsageOnMode (line 653) | Result<MemUsageEstimationResult> EstimateMemoryUsageOnMode(
        function ModelsUseKVCache (line 1065) | Result<bool> ModelsUseKVCache(const std::vector<tvm::ffi::json::Ob...

FILE: cpp/serve/config.h
  type ResponseFormat (line 34) | struct ResponseFormat {
  type class (line 50) | enum class
  type class (line 55) | enum class
  type class (line 63) | enum class
  function class (line 72) | class DisaggConfig {
  function class (line 94) | class DebugConfig {
  function class (line 117) | class GenerationConfigNode : public Object {
  function class (line 149) | class GenerationConfig : public ObjectRef {
  type class (line 192) | enum class
  type class (line 199) | enum class
  function SpeculativeMode (line 207) | enum class SpeculativeMode : int {

FILE: cpp/serve/data.cc
  type mlc (line 12) | namespace mlc {
    type llm (line 13) | namespace llm {
      type serve (line 14) | namespace serve {
        function TVM_FFI_STATIC_INIT_BLOCK (line 16) | TVM_FFI_STATIC_INIT_BLOCK() {
        function SplitData (line 26) | std::pair<Array<Data>, Array<Data>> SplitData(const Array<Data>& o...
        function ObjectRef (line 78) | ObjectRef TextDataNode::GetEmbedding(Model model, ObjectRef* dst, ...
        function TVM_FFI_STATIC_INIT_BLOCK (line 83) | TVM_FFI_STATIC_INIT_BLOCK() {
        function ObjectRef (line 106) | ObjectRef TokenDataNode::GetEmbedding(Model model, ObjectRef* dst,...
        function TVM_FFI_STATIC_INIT_BLOCK (line 110) | TVM_FFI_STATIC_INIT_BLOCK() {
        function ObjectRef (line 136) | ObjectRef ImageDataNode::GetEmbedding(Model model, ObjectRef* dst,...
        function TVM_FFI_STATIC_INIT_BLOCK (line 140) | TVM_FFI_STATIC_INIT_BLOCK() {
        function TokenToLogProbJSON (line 151) | inline void TokenToLogProbJSON(const Tokenizer& tokenizer, const T...
        function RequestStreamOutput (line 226) | RequestStreamOutput RequestStreamOutput::Usage(String request_id,
        function TVM_FFI_STATIC_INIT_BLOCK (line 234) | TVM_FFI_STATIC_INIT_BLOCK() {

FILE: cpp/serve/data.h
  function namespace (line 23) | namespace mlc {

FILE: cpp/serve/draft_token_workspace_manager.cc
  type mlc (line 10) | namespace mlc {
    type llm (line 11) | namespace llm {
      type serve (line 12) | namespace serve {
        function TVM_FFI_STATIC_INIT_BLOCK (line 14) | TVM_FFI_STATIC_INIT_BLOCK() { DraftTokenWorkspaceManagerObj::Regis...

FILE: cpp/serve/draft_token_workspace_manager.h
  function namespace (line 17) | namespace mlc {

FILE: cpp/serve/engine.cc
  type mlc (line 39) | namespace mlc {
    type llm (line 40) | namespace llm {
      type serve (line 41) | namespace serve {
        class EngineModule (line 47) | class EngineModule
          method Init (line 1043) | void Init(const std::string& engine_config_json_str, Device device,
          method Create (line 1054) | static ffi::Module Create() { return ffi::Module(tvm::ffi::make_...
          method AddRequest (line 1056) | void AddRequest(Request request) { return GetEngine()->AddReques...
          method Abort (line 1058) | void Abort(const String& request_id) { return GetEngine()->Abort...
          method Request (line 1060) | Request CreateRequest(String id, Array<Data> inputs, String gene...
          method Step (line 1067) | void Step() { return GetEngine()->Step(); }
          method FRequestStreamCallback (line 1069) | FRequestStreamCallback GetRequestStreamCallback() {
          method SetRequestStreamCallback (line 1073) | void SetRequestStreamCallback(FRequestStreamCallback request_str...
          method Reset (line 1077) | void Reset() { return GetEngine()->Reset(); }
          method String (line 1080) | String JSONMetrics() { return GetEngine()->JSONMetrics(); }
          method Engine (line 1083) | Engine* GetEngine() {
        function GetTokenizerInfo (line 50) | inline std::optional<TokenizerInfo> GetTokenizerInfo(const tvm::ff...
        function GetEnvSocketHostPort (line 72) | inline std::pair<std::optional<std::string>, int> GetEnvSocketHost...
        function StreamBackErrorImpl (line 86) | void StreamBackErrorImpl(Request request, FRequestStreamCallback r...
        function AbortRequestImpl (line 104) | void AbortRequestImpl(EngineState estate, const Array<Model>& mode...
        class MockEchoEngineImpl (line 158) | class MockEchoEngineImpl : public Engine {
          method Create (line 160) | static Result<EngineCreationOutput> Create(const std::string& en...
          method Reset (line 188) | void Reset() final {}
          method Empty (line 190) | bool Empty() final { return request_map_.empty(); }
          method SetRequestStreamCallback (line 192) | void SetRequestStreamCallback(FRequestStreamCallback request_str...
          method FRequestStreamCallback (line 196) | FRequestStreamCallback GetRequestStreamCallback() final { return...
          method AddRequest (line 198) | void AddRequest(Request request) final {
          method AbortRequest (line 260) | void AbortRequest(const String& request_id) {
          method AbortAllRequests (line 283) | void AbortAllRequests() final {
          method Step (line 294) | void Step() final {
          method String (line 321) | String JSONMetrics() final { return "{}"; }
          method DebugCallFuncOnAllAllWorker (line 324) | void DebugCallFuncOnAllAllWorker(const String& func_name, Option...
          type MockRequestState (line 327) | struct MockRequestState {
        class EngineImpl (line 344) | class EngineImpl : public Engine {
          method Create (line 350) | static Result<EngineCreationOutput> Create(const std::string& en...
          method Reset (line 505) | void Reset() final {
          method Empty (line 513) | bool Empty() final { return estate_->running_queue.empty() && es...
          method String (line 515) | String JSONMetrics() final { return tvm::ffi::json::Stringify(es...
          method FRequestStreamCallback (line 517) | FRequestStreamCallback GetRequestStreamCallback() final {
          method SetRequestStreamCallback (line 521) | void SetRequestStreamCallback(FRequestStreamCallback request_str...
          method StreamBackError (line 526) | void StreamBackError(Request request, String finish_reason) {
          method HandleSpecialRequests (line 532) | void HandleSpecialRequests(Request request) {
          method HandleDisaggRequest (line 550) | bool HandleDisaggRequest(Request request) {
          method AddRequest (line 665) | void AddRequest(Request request) final {
          method AbortRequest (line 727) | void AbortRequest(const String& request_id) final {
          method AbortAllRequests (line 731) | void AbortAllRequests() final {
          method Step (line 746) | void Step() final {
          method CreateDiscoSession (line 769) | std::tuple<Optional<Session>, int, std::vector<int>> CreateDisco...
          method DebugCallFuncOnAllAllWorker (line 884) | void DebugCallFuncOnAllAllWorker(const String& func_name, Option...
          method AutoDecideEngineConfig (line 890) | Result<EngineConfig> AutoDecideEngineConfig(
          method SetThreadMaxConcurrency (line 964) | void SetThreadMaxConcurrency() {
          method GetGrammarFromResponseFormat (line 978) | std::optional<xgrammar::CompiledGrammar> GetGrammarFromResponseF...
        function ClearGlobalMemoryManager (line 1022) | void ClearGlobalMemoryManager() {
        class EngineModule (line 1028) | class EngineModule : public ffi::ModuleObj {
          method Init (line 1043) | void Init(const std::string& engine_config_json_str, Device device,
          method Create (line 1054) | static ffi::Module Create() { return ffi::Module(tvm::ffi::make_...
          method AddRequest (line 1056) | void AddRequest(Request request) { return GetEngine()->AddReques...
          method Abort (line 1058) | void Abort(const String& request_id) { return GetEngine()->Abort...
          method Request (line 1060) | Request CreateRequest(String id, Array<Data> inputs, String gene...
          method Step (line 1067) | void Step() { return GetEngine()->Step(); }
          method FRequestStreamCallback (line 1069) | FRequestStreamCallback GetRequestStreamCallback() {
          method SetRequestStreamCallback (line 1073) | void SetRequestStreamCallback(FRequestStreamCallback request_str...
          method Reset (line 1077) | void Reset() { return GetEngine()->Reset(); }
          method String (line 1080) | String JSONMetrics() { return GetEngine()->JSONMetrics(); }
          method Engine (line 1083) | Engine* GetEngine() {
        function TVM_FFI_STATIC_INIT_BLOCK (line 1092) | TVM_FFI_STATIC_INIT_BLOCK() {

FILE: cpp/serve/engine.h
  function namespace (line 15) | namespace mlc {

FILE: cpp/serve/engine_actions/action.cc
  type mlc (line 8) | namespace mlc {
    type llm (line 9) | namespace llm {
      type serve (line 10) | namespace serve {
        function TVM_FFI_STATIC_INIT_BLOCK (line 12) | TVM_FFI_STATIC_INIT_BLOCK() { EngineActionObj::RegisterReflection(...

FILE: cpp/serve/engine_actions/action.h
  function namespace (line 18) | namespace mlc {

FILE: cpp/serve/engine_actions/action_commons.cc
  type mlc (line 10) | namespace mlc {
    type llm (line 11) | namespace llm {
      type serve (line 12) | namespace serve {
        function CreateEngineActions (line 14) | Array<EngineAction> CreateEngineActions(Array<Model> models, Engin...
        function RemoveRequestFromModel (line 137) | void RemoveRequestFromModel(EngineState estate, int64_t req_intern...
        function RemoveRequestStateEntry (line 151) | void RemoveRequestStateEntry(EngineState estate, const Array<Model...
        function ProcessFinishedRequestStateEntries (line 175) | void ProcessFinishedRequestStateEntries(
        function ActionStepPostProcess (line 238) | void ActionStepPostProcess(Array<Request> requests, EngineState es...
        function RequestStateEntry (line 331) | RequestStateEntry PreemptLastRunningRequestStateEntry(
        function ApplyLogitProcessorAndSample (line 427) | std::pair<Tensor, std::vector<SampleResult>> ApplyLogitProcessorAn...

FILE: cpp/serve/engine_actions/action_commons.h
  function namespace (line 19) | namespace mlc {

FILE: cpp/serve/engine_actions/auto_spec_decode.cc
  type mlc (line 13) | namespace mlc {
    type llm (line 14) | namespace llm {
      type serve (line 15) | namespace serve {
        class AutoSpecDecodeActionObj (line 21) | class AutoSpecDecodeActionObj : public EngineActionObj {
          method AutoSpecDecodeActionObj (line 23) | explicit AutoSpecDecodeActionObj(Array<EngineAction> spec_decode...
          method Step (line 30) | Array<Request> Step(EngineState estate) final {
          method CalculateDraftLength (line 54) | int CalculateDraftLength(EngineState estate, int num_running_rse...
        function EngineAction (line 80) | EngineAction EngineAction::AutoSpecDecode(std::vector<EngineAction...

FILE: cpp/serve/engine_actions/batch_decode.cc
  type mlc (line 17) | namespace mlc {
    type llm (line 18) | namespace llm {
      type serve (line 19) | namespace serve {
        class BatchDecodeActionObj (line 29) | class BatchDecodeActionObj : public EngineActionObj {
          method BatchDecodeActionObj (line 31) | explicit BatchDecodeActionObj(Array<Model> models, Tokenizer tok...
          method Step (line 42) | Array<Request> Step(EngineState estate) final {
          method CanDecode (line 203) | bool CanDecode(int num_rsentries) {
          method RetokenizeWithNewToken (line 215) | std::pair<int, std::vector<int32_t>> RetokenizeWithNewToken(Requ...
          method CommitTokenMayRetokenize (line 254) | void CommitTokenMayRetokenize(RequestStateEntry rsentry, Request...
        function EngineAction (line 316) | EngineAction EngineAction::BatchDecode(Array<Model> models, Tokeni...

FILE: cpp/serve/engine_actions/batch_draft.cc
  type mlc (line 14) | namespace mlc {
    type llm (line 15) | namespace llm {
      type serve (line 16) | namespace serve {
        class BatchDraftActionObj (line 23) | class BatchDraftActionObj : public EngineActionObj {
          method BatchDraftActionObj (line 25) | explicit BatchDraftActionObj(Array<Model> models, LogitProcessor...
          method Step (line 38) | Array<Request> Step(EngineState estate) final {
          method CanDecode (line 304) | bool CanDecode(int num_rsentries) {
          method PrefillLaggedTokensByChunk (line 316) | void PrefillLaggedTokensByChunk(const Array<RequestModelState>& ...
        function EngineAction (line 395) | EngineAction EngineAction::BatchDraft(Array<Model> models, LogitPr...

FILE: cpp/serve/engine_actions/batch_jumpforward.cc
  type mlc (line 18) | namespace mlc {
    type llm (line 19) | namespace llm {
      type serve (line 20) | namespace serve {
        class BatchJumpForwardActionObj (line 27) | class BatchJumpForwardActionObj : public EngineActionObj {
          method BatchJumpForwardActionObj (line 29) | explicit BatchJumpForwardActionObj(Array<Model> models, Tokenize...
          method Step (line 35) | Array<Request> Step(EngineState estate) final {
          method CheckMemForJumpForward (line 103) | bool CheckMemForJumpForward(int num_rsentries) {
          method CanJumpForward (line 111) | bool CanJumpForward(const RequestStateEntry& rsentry) {
          method RetokenizeWithNewString (line 133) | std::tuple<int, std::vector<int32_t>, std::string> RetokenizeWit...
          method HandleRollback (line 188) | void HandleRollback(const RequestStateEntry& rsentry, RequestMod...
        function EngineAction (line 231) | EngineAction EngineAction::BatchJumpForward(Array<Model> models, T...

FILE: cpp/serve/engine_actions/batch_prefill_base.cc
  type mlc (line 12) | namespace mlc {
    type llm (line 13) | namespace llm {
      type serve (line 14) | namespace serve {
        function HasPrefillSpace (line 16) | bool HasPrefillSpace(int num_required_pages, bool sliding_window_e...

FILE: cpp/serve/engine_actions/batch_prefill_base.h
  function namespace (line 13) | namespace mlc {

FILE: cpp/serve/engine_actions/batch_verify.cc
  type mlc (line 19) | namespace mlc {
    type llm (line 20) | namespace llm {
      type serve (line 21) | namespace serve {
        class BatchVerifyActionObj (line 28) | class BatchVerifyActionObj : public EngineActionObj {
          method BatchVerifyActionObj (line 30) | explicit BatchVerifyActionObj(Array<Model> models, LogitProcesso...
          method Step (line 44) | Array<Request> Step(EngineState estate) final {
          type DraftRequestStateEntries (line 277) | struct DraftRequestStateEntries {
          method DraftRequestStateEntries (line 292) | DraftRequestStateEntries GetDraftsToVerify(EngineState estate) {
          method CanVerify (line 337) | bool CanVerify(int num_required_pages) {
        function EngineAction (line 369) | EngineAction EngineAction::BatchVerify(Array<Model> models, LogitP...

FILE: cpp/serve/engine_actions/disagg_prepare_recv.cc
  type mlc (line 12) | namespace mlc {
    type llm (line 13) | namespace llm {
      type serve (line 14) | namespace serve {
        class DisaggPrepareReceiveActionObj (line 21) | class DisaggPrepareReceiveActionObj : public BatchPrefillBaseActio...
          method DisaggPrepareReceiveActionObj (line 23) | explicit DisaggPrepareReceiveActionObj(Array<Model> models, Engi...
          method Step (line 34) | Array<Request> Step(EngineState estate) final {
          method GetRequestStateEntriesToPrefill (line 186) | std::optional<PrefillInput> GetRequestStateEntriesToPrefill(Engi...
          method CanPrefill (line 324) | bool CanPrefill(EngineState estate, int num_prefill_rsentries, i...
          method MatchPrefixCache (line 354) | int MatchPrefixCache(EngineState estate, PrefillInput* input) fi...
        function EngineAction (line 432) | EngineAction EngineAction::DisaggPrepareReceive(Array<Model> model...

FILE: cpp/serve/engine_actions/disagg_remote_send.cc
  type mlc (line 9) | namespace mlc {
    type llm (line 10) | namespace llm {
      type serve (line 11) | namespace serve {
        class DisaggRemoteSendActionObj (line 19) | class DisaggRemoteSendActionObj : public BatchPrefillBaseActionObj {
          method DisaggRemoteSendActionObj (line 21) | explicit DisaggRemoteSendActionObj(Array<Model> models,
          method Step (line 40) | Array<Request> Step(EngineState estate) final {
          method GetRequestStateEntriesToPrefill (line 174) | std::vector<PrefillInput> GetRequestStateEntriesToPrefill(Engine...
          method MatchPrefixCache (line 385) | int MatchPrefixCache(EngineState estate, PrefillInput* input) fi...
        function EngineAction (line 487) | EngineAction EngineAction::DisaggRemoteSend(

FILE: cpp/serve/engine_actions/eagle_batch_draft.cc
  type mlc (line 14) | namespace mlc {
    type llm (line 15) | namespace llm {
      type serve (line 16) | namespace serve {
        class EagleBatchDraftActionObj (line 23) | class EagleBatchDraftActionObj : public EngineActionObj {
          method EagleBatchDraftActionObj (line 25) | explicit EagleBatchDraftActionObj(Array<Model> models, LogitProc...
          method Step (line 38) | Array<Request> Step(EngineState estate) final {
          method CanDecode (line 190) | bool CanDecode(int num_rsentries) {
        function EngineAction (line 220) | EngineAction EngineAction::EagleBatchDraft(Array<Model> models, Lo...

FILE: cpp/serve/engine_actions/eagle_batch_verify.cc
  type mlc (line 19) | namespace mlc {
    type llm (line 20) | namespace llm {
      type serve (line 21) | namespace serve {
        class EagleBatchVerifyActionObj (line 28) | class EagleBatchVerifyActionObj : public EngineActionObj {
          method EagleBatchVerifyActionObj (line 30) | explicit EagleBatchVerifyActionObj(Array<Model> models, LogitPro...
          method Step (line 44) | Array<Request> Step(EngineState estate) final {
          type DraftRequestStateEntries (line 347) | struct DraftRequestStateEntries {
          method DraftRequestStateEntries (line 362) | DraftRequestStateEntries GetDraftsToVerify(EngineState estate) {
          method CanVerify (line 397) | bool CanVerify(int num_required_pages) {
          method UpdateRequestStatesWithDraftProposals (line 402) | void UpdateRequestStatesWithDraftProposals(const Array<RequestMo...
        function EngineAction (line 447) | EngineAction EngineAction::EagleBatchVerify(

FILE: cpp/serve/engine_actions/eagle_new_request_prefill.cc
  type mlc (line 9) | namespace mlc {
    type llm (line 10) | namespace llm {
      type serve (line 11) | namespace serve {
        class EagleNewRequestPrefillActionObj (line 17) | class EagleNewRequestPrefillActionObj : public BatchPrefillBaseAct...
          method EagleNewRequestPrefillActionObj (line 19) | explicit EagleNewRequestPrefillActionObj(Array<Model> models, Lo...
          method Step (line 33) | Array<Request> Step(EngineState estate) final {
          method UpdateRequestStatesWithDraftProposals (line 344) | void UpdateRequestStatesWithDraftProposals(
          method MatchPrefixCache (line 393) | int MatchPrefixCache(EngineState estate, PrefillInput* input) fi...
        function EngineAction (line 485) | EngineAction EngineAction::EagleNewRequestPrefill(

FILE: cpp/serve/engine_actions/new_request_prefill.cc
  type mlc (line 9) | namespace mlc {
    type llm (line 10) | namespace llm {
      type serve (line 11) | namespace serve {
        class NewRequestPrefillActionObj (line 17) | class NewRequestPrefillActionObj : public BatchPrefillBaseActionObj {
          method NewRequestPrefillActionObj (line 19) | explicit NewRequestPrefillActionObj(Array<Model> models, LogitPr...
          method Step (line 30) | Array<Request> Step(EngineState estate) final {
          method MatchPrefixCache (line 280) | int MatchPrefixCache(EngineState estate, PrefillInput* input) fi...
        function EngineAction (line 352) | EngineAction EngineAction::NewRequestPrefill(Array<Model> models, ...

FILE: cpp/serve/engine_state.cc
  type mlc (line 7) | namespace mlc {
    type llm (line 8) | namespace llm {
      type serve (line 9) | namespace serve {
        function TVM_FFI_STATIC_INIT_BLOCK (line 11) | TVM_FFI_STATIC_INIT_BLOCK() { EngineStateObj::RegisterReflection(); }
        function RequestState (line 28) | RequestState EngineStateObj::GetRequestState(Request request) {

FILE: cpp/serve/engine_state.h
  function namespace (line 16) | namespace mlc {

FILE: cpp/serve/event_trace_recorder.cc
  type mlc (line 19) | namespace mlc {
    type llm (line 20) | namespace llm {
      type serve (line 21) | namespace serve {
        type detail (line 25) | namespace detail {
          type PairHash (line 27) | struct PairHash {
        class EventTraceRecorderImpl (line 39) | class EventTraceRecorderImpl : public EventTraceRecorderObj {
          method AddEvent (line 41) | void AddEvent(const String& request_id, const std::string& event...
          method AddEvent (line 52) | void AddEvent(const Array<String>& request_ids, const std::strin...
          method DumpJSON (line 65) | std::string DumpJSON() final {
          method AddEventInternal (line 124) | void AddEventInternal(const std::string& request_id, const std::...
        function EventTraceRecorder (line 146) | EventTraceRecorder EventTraceRecorder::Create() {
        function TVM_FFI_STATIC_INIT_BLOCK (line 150) | TVM_FFI_STATIC_INIT_BLOCK() {

FILE: cpp/serve/event_trace_recorder.h
  function namespace (line 16) | namespace mlc {

FILE: cpp/serve/function_table.cc
  type mlc (line 24) | namespace mlc {
    type llm (line 25) | namespace llm {
      type serve (line 26) | namespace serve {
        function GetDiscoWorkerCPUBinding (line 28) | Optional<IntTuple> GetDiscoWorkerCPUBinding(int num_workers) {
        function Function (line 53) | Function FunctionTable::SessionFuncAsPackedFunc(Session sess, DRef...
        function ObjectRef (line 155) | ObjectRef FunctionTable::LoadParams(const std::string& model_path,...
        function ObjectRef (line 294) | ObjectRef FunctionTable::Empty(Shape shape, DataType dtype, Device...
        function ObjectRef (line 305) | ObjectRef FunctionTable::CopyToWorker0(const Tensor& host_array, S...

FILE: cpp/serve/function_table.h
  function namespace (line 23) | namespace mlc {

FILE: cpp/serve/logit_processor.cc
  type mlc (line 13) | namespace mlc {
    type llm (line 14) | namespace llm {
      type serve (line 15) | namespace serve {
        function CopyArray (line 17) | inline void CopyArray(Tensor src, Tensor dst, TVMStreamHandle copy...
        function SyncCopyStream (line 22) | inline void SyncCopyStream(Device device, TVMStreamHandle compute_...
        function TVM_FFI_STATIC_INIT_BLOCK (line 34) | TVM_FFI_STATIC_INIT_BLOCK() { LogitProcessorObj::RegisterReflectio...
        class LogitProcessorImpl (line 36) | class LogitProcessorImpl : public LogitProcessorObj {
          method LogitProcessorImpl (line 39) | explicit LogitProcessorImpl(int max_num_token, int vocab_size, F...
          method InplaceUpdateLogits (line 99) | void InplaceUpdateLogits(Tensor logits,                         ...
          method Tensor (line 153) | Tensor ComputeProbsFromLogits(Tensor logits, const Array<Generat...
          method UpdateWithLogitBias (line 212) | void UpdateWithLogitBias(Tensor logits, const Array<GenerationCo...
          method UpdateWithPenalty (line 269) | void UpdateWithPenalty(Tensor logits, const Array<GenerationConf...
          method UpdateWithMask (line 371) | void UpdateWithMask(Tensor logits, const Array<RequestModelState...

FILE: cpp/serve/logit_processor.h
  function namespace (line 19) | namespace mlc {

FILE: cpp/serve/metrics.cc
  type mlc (line 12) | namespace mlc {
    type llm (line 13) | namespace llm {
      type serve (line 14) | namespace serve {

FILE: cpp/serve/metrics.h
  function namespace (line 15) | namespace mlc {
  function Reset (line 87) | void Reset() {
  function GetPrefillTime (line 100) | struct RequestMetrics {
  function Reset (line 148) | void Reset() {
  function UpdateDraftTimeByBatchSize (line 168) | struct EngineMetrics {
  function UpdateVerifyTimeByBatchSize (line 227) | void UpdateVerifyTimeByBatchSize(int effective_batch_size, double time) {
  function RequestFinishUpdate (line 237) | void RequestFinishUpdate(const RequestMetrics& request_metrics) {

FILE: cpp/serve/model.cc
  type mlc (line 21) | namespace mlc {
    type llm (line 22) | namespace llm {
      type serve (line 23) | namespace serve {
        function TVM_FFI_STATIC_INIT_BLOCK (line 27) | TVM_FFI_STATIC_INIT_BLOCK() { ModelObj::RegisterReflection(); }
        class ModelImpl (line 29) | class ModelImpl
          method ModelImpl (line 64) | explicit ModelImpl(String reload_lib_path, String model_path, tv...
          method ObjectRef (line 85) | ObjectRef TokenEmbed(IntTuple token_ids, ObjectRef* dst, int off...
          method ObjectRef (line 126) | ObjectRef ImageEmbed(const Tensor& image, ObjectRef* dst, int of...
          method CanGetLogits (line 154) | bool CanGetLogits() final {
          method Tensor (line 158) | Tensor GetLogits(const ObjectRef& hidden_states) final {
          method GetMultiStepLogits (line 184) | Array<Tensor> GetMultiStepLogits(const ObjectRef& hidden_states)...
          method ObjectRef (line 200) | ObjectRef FuseEmbedHidden(const ObjectRef& embeddings, const Obj...
          method Tensor (line 243) | Tensor BatchPrefill(const ObjectRef& embeddings, const std::vect...
          method ObjectRef (line 352) | ObjectRef BatchPrefillToLastHidden(const ObjectRef& embedding_or...
          method Tensor (line 420) | Tensor BatchDecode(const ObjectRef& embeddings, const std::vecto...
          method Tensor (line 488) | Tensor BatchTreeDecode(const ObjectRef& embeddings, const std::v...
          method ObjectRef (line 561) | ObjectRef BatchDecodeToLastHidden(const ObjectRef& hidden_states...
          method Tensor (line 612) | Tensor BatchVerify(const ObjectRef& embeddings, const std::vecto...
          method ObjectRef (line 684) | ObjectRef BatchVerifyToLastHidden(const ObjectRef& embeddings,
          method CreateKVCache (line 752) | void CreateKVCache(int page_size, int max_num_sequence, int64_t ...
          method AddNewSequence (line 783) | void AddNewSequence(int64_t seq_id) final {
          method ForkSequence (line 790) | void ForkSequence(int64_t parent_seq_id, int64_t child_seq_id, i...
          method RemoveSequence (line 798) | void RemoveSequence(int64_t seq_id) final {
          method PopNFromKVCache (line 806) | void PopNFromKVCache(int64_t seq_id, int num_tokens) final {
          method CommitAcceptedTokenTreeNodesToKVCache (line 813) | void CommitAcceptedTokenTreeNodesToKVCache(
          method EnableSlidingWindowForSeq (line 822) | void EnableSlidingWindowForSeq(int64_t seq_id) final {
          method IntTuple (line 832) | IntTuple DisaggPrepareKVRecv(int64_t seq_id, int length) final {
          method DisaggMarkKVSend (line 851) | void DisaggMarkKVSend(int64_t seq_id, int begin_pos, IntTuple co...
          method ModelMetadata (line 866) | ModelMetadata GetMetadata() const final { return ft_.model_metad...
          method GetNumAvailablePages (line 868) | int GetNumAvailablePages() const final {
          method GetCurrentTotalSequenceLength (line 877) | int GetCurrentTotalSequenceLength() const final {
          method LoadParams (line 888) | void LoadParams() final { this->params_ = ft_.LoadParams(model_,...
          method SetMaxNumSequence (line 890) | void SetMaxNumSequence(int max_num_sequence) final {
          method SetPrefillChunkSize (line 896) | void SetPrefillChunkSize(int prefill_chunk_size) final {
          method LogitProcessor (line 913) | LogitProcessor CreateLogitProcessor(int max_num_token,
          method Sampler (line 919) | Sampler CreateSampler(int max_num_sample, int num_models,
          method EstimateHostCPURequirement (line 929) | int EstimateHostCPURequirement() const final {
          method GetSlidingWindowSize (line 934) | int GetSlidingWindowSize() const final { return sliding_window_s...
          method GetAttentionSinkSize (line 936) | int GetAttentionSinkSize() const final { return attention_sink_s...
          method ObjectRef (line 938) | ObjectRef AllocEmbeddingTensor() final {
          method ObjectRef (line 961) | ObjectRef AllocHiddenStatesTensor() final {
          method Reset (line 985) | void Reset() final {
          method DraftTokenWorkspaceManager (line 994) | DraftTokenWorkspaceManager CreateDraftTokenWorkspaceManager(int ...
          method ObjectRef (line 999) | ObjectRef GatherHiddenStates(const ObjectRef& input, const std::...
          method ScatterHiddenStates (line 1018) | void ScatterHiddenStates(const ObjectRef& input, const std::vect...
          method Tensor (line 1028) | Tensor GatherDraftProbs(const Tensor& input, const std::vector<i...
          method ScatterDraftProbs (line 1041) | void ScatterDraftProbs(const Tensor& input, const std::vector<in...
          method GetMedusaLogits (line 1051) | Array<Tensor> GetMedusaLogits(const ObjectRef& hidden_states) {
          method DebugCallFuncOnAllAllWorker (line 1064) | void DebugCallFuncOnAllAllWorker(const String& func_name, Option...
          method LoadModelConfigJSON (line 1070) | void LoadModelConfigJSON(const tvm::ffi::json::Object& config) {
        function Model (line 31) | Model Model::Create(String reload_lib_path, String model_path,
        class ModelImpl (line 58) | class ModelImpl : public ModelObj {
          method ModelImpl (line 64) | explicit ModelImpl(String reload_lib_path, String model_path, tv...
          method ObjectRef (line 85) | ObjectRef TokenEmbed(IntTuple token_ids, ObjectRef* dst, int off...
          method ObjectRef (line 126) | ObjectRef ImageEmbed(const Tensor& image, ObjectRef* dst, int of...
          method CanGetLogits (line 154) | bool CanGetLogits() final {
          method Tensor (line 158) | Tensor GetLogits(const ObjectRef& hidden_states) final {
          method GetMultiStepLogits (line 184) | Array<Tensor> GetMultiStepLogits(const ObjectRef& hidden_states)...
          method ObjectRef (line 200) | ObjectRef FuseEmbedHidden(const ObjectRef& embeddings, const Obj...
          method Tensor (line 243) | Tensor BatchPrefill(const ObjectRef& embeddings, const std::vect...
          method ObjectRef (line 352) | ObjectRef BatchPrefillToLastHidden(const ObjectRef& embedding_or...
          method Tensor (line 420) | Tensor BatchDecode(const ObjectRef& embeddings, const std::vecto...
          method Tensor (line 488) | Tensor BatchTreeDecode(const ObjectRef& embeddings, const std::v...
          method ObjectRef (line 561) | ObjectRef BatchDecodeToLastHidden(const ObjectRef& hidden_states...
          method Tensor (line 612) | Tensor BatchVerify(const ObjectRef& embeddings, const std::vecto...
          method ObjectRef (line 684) | ObjectRef BatchVerifyToLastHidden(const ObjectRef& embeddings,
          method CreateKVCache (line 752) | void CreateKVCache(int page_size, int max_num_sequence, int64_t ...
          method AddNewSequence (line 783) | void AddNewSequence(int64_t seq_id) final {
          method ForkSequence (line 790) | void ForkSequence(int64_t parent_seq_id, int64_t child_seq_id, i...
          method RemoveSequence (line 798) | void RemoveSequence(int64_t seq_id) final {
          method PopNFromKVCache (line 806) | void PopNFromKVCache(int64_t seq_id, int num_tokens) final {
          method CommitAcceptedTokenTreeNodesToKVCache (line 813) | void CommitAcceptedTokenTreeNodesToKVCache(
          method EnableSlidingWindowForSeq (line 822) | void EnableSlidingWindowForSeq(int64_t seq_id) final {
          method IntTuple (line 832) | IntTuple DisaggPrepareKVRecv(int64_t seq_id, int length) final {
          method DisaggMarkKVSend (line 851) | void DisaggMarkKVSend(int64_t seq_id, int begin_pos, IntTuple co...
          method ModelMetadata (line 866) | ModelMetadata GetMetadata() const final { return ft_.model_metad...
          method GetNumAvailablePages (line 868) | int GetNumAvailablePages() const final {
          method GetCurrentTotalSequenceLength (line 877) | int GetCurrentTotalSequenceLength() const final {
          method LoadParams (line 888) | void LoadParams() final { this->params_ = ft_.LoadParams(model_,...
          method SetMaxNumSequence (line 890) | void SetMaxNumSequence(int max_num_sequence) final {
          method SetPrefillChunkSize (line 896) | void SetPrefillChunkSize(int prefill_chunk_size) final {
          method LogitProcessor (line 913) | LogitProcessor CreateLogitProcessor(int max_num_token,
          method Sampler (line 919) | Sampler CreateSampler(int max_num_sample, int num_models,
          method EstimateHostCPURequirement (line 929) | int EstimateHostCPURequirement() const final {
          method GetSlidingWindowSize (line 934) | int GetSlidingWindowSize() const final { return sliding_window_s...
          method GetAttentionSinkSize (line 936) | int GetAttentionSinkSize() const final { return attention_sink_s...
          method ObjectRef (line 938) | ObjectRef AllocEmbeddingTensor() final {
          method ObjectRef (line 961) | ObjectRef AllocHiddenStatesTensor() final {
          method Reset (line 985) | void Reset() final {
          method DraftTokenWorkspaceManager (line 994) | DraftTokenWorkspaceManager CreateDraftTokenWorkspaceManager(int ...
          method ObjectRef (line 999) | ObjectRef GatherHiddenStates(const ObjectRef& input, const std::...
          method ScatterHiddenStates (line 1018) | void ScatterHiddenStates(const ObjectRef& input, const std::vect...
          method Tensor (line 1028) | Tensor GatherDraftProbs(const Tensor& input, const std::vector<i...
          method ScatterDraftProbs (line 1041) | void ScatterDraftProbs(const Tensor& input, const std::vector<in...
          method GetMedusaLogits (line 1051) | Array<Tensor> GetMedusaLogits(const ObjectRef& hidden_states) {
          method DebugCallFuncOnAllAllWorker (line 1064) | void DebugCallFuncOnAllAllWorker(const String& func_name, Option...
          method LoadModelConfigJSON (line 1070) | void LoadModelConfigJSON(const tvm::ffi::json::Object& config) {
        function TVM_FFI_STATIC_INIT_BLOCK (line 1127) | TVM_FFI_STATIC_INIT_BLOCK() {

FILE: cpp/serve/model.h
  type ModelWorkspace (line 39) | struct ModelWorkspace {
  function ObjectRef (line 49) | ObjectRef hidden_states{nullptr};

FILE: cpp/serve/prefix_cache.cc
  type mlc (line 10) | namespace mlc {
    type llm (line 11) | namespace llm {
      type serve (line 12) | namespace serve {
        function TVM_FFI_STATIC_INIT_BLOCK (line 16) | TVM_FFI_STATIC_INIT_BLOCK() { PrefixCacheObj::RegisterReflection(); }
        class PrefixCacheImpl (line 21) | class PrefixCacheImpl : public PrefixCacheObj {
          method PrefixCacheImpl (line 28) | explicit PrefixCacheImpl(size_t max_num_recycling_seqs, PrefixCa...
          method PrefixCacheMatchedResult (line 48) | PrefixCacheMatchedResult InsertSequence(int64_t seq_id, std::vec...
          method ExtendSequence (line 149) | void ExtendSequence(int64_t seq_id, const std::vector<int32_t>& ...
          method CommitSequenceExtention (line 153) | void CommitSequenceExtention() final {
          method RollBackSequence (line 176) | void RollBackSequence(int64_t seq_id, size_t num_tokens) final {
          method RecycleSequence (line 190) | void RecycleSequence(int64_t seq_id, bool lazy = true) final {
          method TryFreeMemory (line 224) | bool TryFreeMemory() final {
          method HasSequence (line 250) | bool HasSequence(int64_t seq_id) final { return radix_tree_->Has...
          method Reset (line 255) | void Reset() final {
          method PrefixCacheMode (line 265) | PrefixCacheMode Mode() final { return PrefixCacheMode::kRadix; }
          method ReuseRecyclingSequence (line 268) | void ReuseRecyclingSequence(int64_t seq_id) {
          type SequenceState (line 280) | enum class SequenceState : int {
        class NoPrefixCache (line 344) | class NoPrefixCache : public PrefixCacheObj {
          method PrefixCacheMatchedResult (line 355) | PrefixCacheMatchedResult InsertSequence(int64_t seq_id, std::vec...
          method ExtendSequence (line 367) | void ExtendSequence(int64_t seq_id, const std::vector<int32_t>& ...
          method CommitSequenceExtention (line 371) | void CommitSequenceExtention() final {
          method RollBackSequence (line 381) | void RollBackSequence(int64_t seq_id, size_t num_tokens) final {
          method RecycleSequence (line 394) | void RecycleSequence(int64_t seq_id, bool lazy = true) final {
          method TryFreeMemory (line 404) | bool TryFreeMemory() final {
          method HasSequence (line 414) | bool HasSequence(int64_t seq_id) final {
          method Reset (line 422) | void Reset() final {}
          method PrefixCacheMode (line 424) | PrefixCacheMode Mode() final { return PrefixCacheMode::kDisable; }
        function PrefixCache (line 427) | PrefixCache PrefixCache::CreateRadixPrefixCache(size_t max_num_rec...
        function PrefixCache (line 434) | PrefixCache PrefixCache::CreateNoPrefixCache() {

FILE: cpp/serve/prefix_cache.h
  function namespace (line 20) | namespace mlc {

FILE: cpp/serve/radix_tree.cc
  type mlc (line 11) | namespace mlc {
    type llm (line 12) | namespace llm {
      type serve (line 13) | namespace serve {
        function TVM_FFI_STATIC_INIT_BLOCK (line 17) | TVM_FFI_STATIC_INIT_BLOCK() { PagedRadixTreeObj::RegisterReflectio...
        type SequenceIDNode (line 22) | struct SequenceIDNode {
        class SequenceIDNodePool (line 35) | class SequenceIDNodePool {
          method SequenceIDNodePool (line 38) | SequenceIDNodePool() {
          method SequenceIDNode (line 50) | SequenceIDNode* Allocate(int64_t seq_id, SequenceIDNode* next) {
          method Free (line 68) | void Free(SequenceIDNode* node) {
          method Reset (line 77) | void Reset() {
          method NewNodeBlock_ (line 107) | void NewNodeBlock_() {
        type RadixPage (line 137) | struct RadixPage {
          method Extend (line 172) | void Extend(const int32_t* suffix, size_t suffix_length) {
          method AddSequence (line 185) | void AddSequence(SequenceIDNodePool* pool, int64_t id) { seq_ids...
          method PopSequence (line 193) | void PopSequence(SequenceIDNodePool* pool, int64_t id) {
          method GetLocalSequence (line 222) | std::vector<int64_t> GetLocalSequence() {
          method FindAnyChildSequence (line 236) | int32_t FindAnyChildSequence() {
          method FindAllChildSequence (line 246) | std::vector<int64_t> FindAllChildSequence() {
          method Iterate (line 263) | void Iterate(CallbackFunc f) {
          method RadixPage (line 274) | RadixPage* GetLastSibling() {
          method RadixPage (line 287) | RadixPage* FindChild(int64_t first_token) {
          method InsertChild (line 297) | void InsertChild(RadixPage* child) {
          method RemoveChild (line 307) | void RemoveChild(RadixPage* child) {
          method Mergeable (line 325) | bool Mergeable() {
          method MatchPrefix (line 341) | size_t MatchPrefix(const int32_t* prefix, size_t prefix_length) {
        class RadixPagePool (line 356) | class RadixPagePool {
          method RadixPagePool (line 359) | RadixPagePool() {
          method RadixPage (line 369) | RadixPage* Allocate() {
          method Free (line 389) | void Free(RadixPage* page) {
          method FreeCapacity (line 400) | size_t FreeCapacity() { return free_page_indices_.size() * kPage...
          method Reset (line 405) | void Reset() {
          method NewPageBlock_ (line 443) | void NewPageBlock_() {
        class PagedRadixTreeImpl (line 460) | class PagedRadixTreeImpl : public PagedRadixTreeObj {
          method PagedRadixTreeImpl (line 471) | explicit PagedRadixTreeImpl() {
          method HasSequence (line 487) | bool HasSequence(int64_t seq_id) { return seq2page.find(seq_id) ...
          method IntTuple (line 495) | IntTuple GetSequence(int64_t seq_id) {
          method MatchPrefix (line 514) | std::pair<size_t, std::vector<int64_t>> MatchPrefix(const std::v...
          method GetSequenceLength (line 528) | size_t GetSequenceLength(int64_t seq_id) {
          method ForkSequence (line 547) | void ForkSequence(int64_t seq_id, int64_t parent_seq_id, size_t ...
          method AddSequence (line 572) | void AddSequence(int64_t seq_id) {
          method ExtendSequence (line 585) | void ExtendSequence(int64_t seq_id, const std::vector<int32_t>& ...
          method RollBackSequence (line 625) | void RollBackSequence(int64_t seq_id, size_t num_tokens) {
          method RemoveSequence (line 672) | void RemoveSequence(int64_t seq_id) {
          method FreeCapacity (line 692) | size_t FreeCapacity() { return radix_page_pool->FreeCapacity(); }
          method Reset (line 694) | void Reset() {
          method MergePage (line 717) | void MergePage(RadixPage* page) {
          method RadixPage (line 743) | RadixPage* SplitPage(RadixPage* page, size_t offset) {
          method MatchSequence (line 779) | std::tuple<RadixPage*, size_t, size_t> MatchSequence(RadixPage* ...
        function PagedRadixTree (line 801) | PagedRadixTree PagedRadixTree::Create() {
        function TVM_FFI_STATIC_INIT_BLOCK (line 805) | TVM_FFI_STATIC_INIT_BLOCK() {

FILE: cpp/serve/radix_tree.h
  function namespace (line 15) | namespace mlc {

FILE: cpp/serve/request.cc
  type mlc (line 13) | namespace mlc {
    type llm (line 14) | namespace llm {
      type serve (line 15) | namespace serve {
        function TVM_FFI_STATIC_INIT_BLOCK (line 19) | TVM_FFI_STATIC_INIT_BLOCK() { RequestNode::RegisterReflection(); }
        function Request (line 47) | Request Request::FromUntokenized(const Request& request, const Tok...
        function TVM_FFI_STATIC_INIT_BLOCK (line 71) | TVM_FFI_STATIC_INIT_BLOCK() {

FILE: cpp/serve/request.h
  function namespace (line 18) | namespace mlc {

FILE: cpp/serve/request_state.cc
  type mlc (line 10) | namespace mlc {
    type llm (line 11) | namespace llm {
      type serve (line 12) | namespace serve {
        function TVM_FFI_STATIC_INIT_BLOCK (line 14) | TVM_FFI_STATIC_INIT_BLOCK() {
        function RequestStreamOutput (line 117) | RequestStreamOutput RequestActionPostProcWorkspace::GetStreamOutpu...

FILE: cpp/serve/request_state.h
  function namespace (line 23) | namespace llm {

FILE: cpp/serve/sampler/cpu_sampler.cc
  type mlc (line 16) | namespace mlc {
    type llm (line 17) | namespace llm {
      type serve (line 18) | namespace serve {
        function TVM_FFI_STATIC_INIT_BLOCK (line 20) | TVM_FFI_STATIC_INIT_BLOCK() { SamplerObj::RegisterReflection(); }
        function TokenProbPair (line 35) | TokenProbPair SampleTopPFromProb(Tensor prob, int unit_offset, int...
        function RenormalizeProbByTopP (line 172) | void RenormalizeProbByTopP(Tensor prob, int unit_offset, double to...
        type detail (line 262) | namespace detail {
          function ComputeTopProbsImpl (line 266) | std::vector<TokenProbPair> ComputeTopProbsImpl(const float* p_pr...
        function ComputeTopProbs (line 302) | inline std::vector<TokenProbPair> ComputeTopProbs(Tensor prob, int...
        class CPUSampler (line 327) | class CPUSampler : public SamplerObj {
          method CPUSampler (line 329) | explicit CPUSampler(Optional<EventTraceRecorder> trace_recorder)
          method Tensor (line 332) | Tensor BatchRenormalizeProbsByTopP(Tensor probs_on_device,      ...
          method BatchSampleTokensWithProbBeforeTopP (line 375) | std::vector<SampleResult> BatchSampleTokensWithProbBeforeTopP(
          method BatchSampleTokensWithProbAfterTopP (line 392) | std::vector<SampleResult> BatchSampleTokensWithProbAfterTopP(
          method BatchVerifyDraftTokensWithProbAfterTopP (line 402) | std::pair<std::vector<std::vector<SampleResult>>, std::vector<int>>
          method BatchSampleTokensImpl (line 506) | std::vector<SampleResult> BatchSampleTokensImpl(Tensor probs_on_...
          method Tensor (line 546) | Tensor CopyProbsToCPU(Tensor probs_on_device) {
        function Sampler (line 582) | Sampler Sampler::CreateCPUSampler(Optional<EventTraceRecorder> tra...

FILE: cpp/serve/sampler/gpu_sampler.cc
  type mlc (line 14) | namespace mlc {
    type llm (line 15) | namespace llm {
      type serve (line 16) | namespace serve {
        function FlashInferSamplingAvailable (line 18) | inline bool FlashInferSamplingAvailable(Device device) {
        function CopyArray (line 32) | inline void CopyArray(Tensor src, Tensor dst, TVMStreamHandle copy...
        function SyncCopyStream (line 37) | inline void SyncCopyStream(Device device, TVMStreamHandle compute_...
        class GPUSampler (line 49) | class GPUSampler : public SamplerObj {
          method GPUSampler (line 51) | explicit GPUSampler(int max_num_sample, int vocab_size, Function...
          method Tensor (line 122) | Tensor BatchRenormalizeProbsByTopP(Tensor probs_on_device,      ...
          method BatchSampleTokensWithProbBeforeTopP (line 177) | std::vector<SampleResult> BatchSampleTokensWithProbBeforeTopP(
          method BatchSampleTokensWithProbAfterTopP (line 188) | std::vector<SampleResult> BatchSampleTokensWithProbAfterTopP(
          method BatchVerifyDraftTokensWithProbAfterTopP (line 199) | std::pair<std::vector<std::vector<SampleResult>>, std::vector<int>>
          method BatchSampleTokensImpl (line 358) | std::vector<SampleResult> BatchSampleTokensImpl(Tensor probs_on_...
          method CollectSampleResult (line 409) | std::vector<SampleResult> CollectSampleResult(const std::vector<...
          method ChunkSampleTokensImpl (line 438) | std::vector<SampleResult> ChunkSampleTokensImpl(Tensor probs_on_...
          method Tensor (line 478) | Tensor GenerateUniformSamples(const std::vector<RandomGenerator*...
          method Tensor (line 491) | Tensor GenerateUniformSamples(const std::vector<RandomGenerator*...
          method Tensor (line 507) | Tensor CopySampleIndicesToGPU(const std::vector<int>& sample_ind...
          method CheckTopP (line 519) | bool CheckTopP(const Array<GenerationConfig>& generation_cfg,
          method CheckProbValues (line 544) | bool CheckProbValues(const Array<GenerationConfig>& generation_cfg,
          method SampleOnGPU (line 565) | std::vector<Tensor> SampleOnGPU(Tensor probs_on_device, Tensor u...
          method CopyArraysToCPU (line 655) | std::vector<Tensor> CopyArraysToCPU(const std::vector<Tensor>& d...
        function Sampler (line 746) | Sampler Sampler::CreateGPUSampler(int max_num_sample, int vocab_si...

FILE: cpp/serve/sampler/sampler.h
  function namespace (line 20) | namespace mlc {

FILE: cpp/serve/threaded_engine.cc
  type mlc (line 22) | namespace mlc {
    type llm (line 23) | namespace llm {
      type serve (line 24) | namespace serve {
        type InstructionKind (line 30) | enum class InstructionKind : int {
        class ThreadedEngineImpl (line 40) | class ThreadedEngineImpl : public ThreadedEngine {
          method InitThreadedEngine (line 42) | void InitThreadedEngine(Device device, Optional<Function> reques...
          method Reload (line 51) | void Reload(String engine_config_json_str) final {
          method Unload (line 73) | void Unload() final {
          method Reset (line 96) | void Reset() final {
          method AddRequest (line 109) | void AddRequest(Request request) final {
          method AbortRequest (line 122) | void AbortRequest(const String& request_id) final {
          method RunBackgroundLoop (line 135) | void RunBackgroundLoop() final {
          method RunBackgroundStreamBackLoop (line 190) | void RunBackgroundStreamBackLoop() final {
          method ExitBackgroundLoop (line 222) | void ExitBackgroundLoop() final {
          method GenerationConfig (line 233) | GenerationConfig GetDefaultGenerationConfig() const final {
          method Request (line 239) | Request CreateRequest(String id, Array<Data> inputs, String gene...
          method EngineConfig (line 246) | EngineConfig GetCompleteEngineConfig() const final {
          method String (line 251) | String GetCompleteEngineConfigJSONString() const {
          method DebugCallFuncOnAllAllWorker (line 255) | void DebugCallFuncOnAllAllWorker(const String& func_name, Option...
          method EngineReloadImpl (line 270) | void EngineReloadImpl(const std::string& engine_config_json_str) {
          method EngineUnloadImpl (line 300) | void EngineUnloadImpl() {
        class ThreadedEngineModule (line 383) | class ThreadedEngineModule : public ThreadedEngineImpl, public ffi...
        function TVM_FFI_STATIC_INIT_BLOCK (line 403) | TVM_FFI_STATIC_INIT_BLOCK() {

FILE: cpp/serve/threaded_engine.h
  function namespace (line 13) | namespace mlc {

FILE: cpp/support/debug_utils.h
  function namespace (line 11) | namespace mlc {

FILE: cpp/support/dynamic_bitset.h
  function namespace (line 15) | namespace mlc {
  function const (line 89) | bool operator[](int index) const {
  function Set (line 98) | void Set() {
  function Reset (line 114) | void Reset() {
  function Reset (line 120) | void Reset(int index) { Set(index, false); }

FILE: cpp/support/encoding.cc
  type mlc (line 11) | namespace mlc {
    type llm (line 12) | namespace llm {
      function PrintAsUTF8 (line 14) | std::string PrintAsUTF8(TCodepoint codepoint) {
      function PrintAsEscaped (line 39) | std::string PrintAsEscaped(
      function PrintAsEscaped (line 68) | std::string PrintAsEscaped(uint8_t raw_char) { return PrintAsEscaped...
      function PrintAsEscaped (line 70) | std::string PrintAsEscaped(std::string raw_str) {
      function HandleUTF8FirstByte (line 79) | std::tuple<bool, int, TCodepoint> HandleUTF8FirstByte(uint8_t byte) {
      function ParseNextUTF8 (line 108) | std::pair<TCodepoint, const char*> ParseNextUTF8(const char* utf8, U...
      function ParseUTF8 (line 133) | std::vector<TCodepoint> ParseUTF8(const char* utf8, UTF8ErrorPolicy ...
      function HexCharToInt (line 146) | inline int HexCharToInt(char c) {
      function ParseNextUTF8OrEscaped (line 158) | std::pair<TCodepoint, const char*> ParseNextUTF8OrEscaped(

FILE: cpp/support/encoding.h
  function TCodepoint (line 62) | enum CharHandlingError : TCodepoint {

FILE: cpp/support/json_parser.h
  function namespace (line 18) | namespace mlc {
  function namespace (line 205) | namespace details {

FILE: cpp/support/load_bytes_from_file.h
  function namespace (line 14) | namespace mlc {

FILE: cpp/support/progress_bar.h
  function namespace (line 12) | namespace mlc {

FILE: cpp/support/random.h
  function namespace (line 12) | namespace mlc {

FILE: cpp/support/result.h
  function namespace (line 14) | namespace mlc {

FILE: cpp/support/utils.h
  function namespace (line 18) | namespace mlc {

FILE: cpp/support/vlm_utils.cc
  type mlc (line 9) | namespace mlc {
    type llm (line 10) | namespace llm {
      function CalculateResizeShape (line 12) | void CalculateResizeShape(tvm::runtime::Tensor image_data, std::stri...
      function CalculatePadShape (line 31) | void CalculatePadShape(tvm::runtime::Tensor image_data, std::string ...
      function CalculateCropShape (line 47) | void CalculateCropShape(tvm::runtime::Tensor image_data, std::string...

FILE: cpp/support/vlm_utils.h
  function namespace (line 13) | namespace mlc {

FILE: cpp/tokenizers/streamer.cc
  type mlc (line 17) | namespace mlc {
    type llm (line 18) | namespace llm {
      function TVM_FFI_STATIC_INIT_BLOCK (line 20) | TVM_FFI_STATIC_INIT_BLOCK() {
      function TVM_FFI_STATIC_INIT_BLOCK (line 146) | TVM_FFI_STATIC_INIT_BLOCK() {
      function CreatePartialMatchTable (line 162) | inline std::vector<int> CreatePartialMatchTable(const String& str) {
      function TVM_FFI_STATIC_INIT_BLOCK (line 269) | TVM_FFI_STATIC_INIT_BLOCK() {

FILE: cpp/tokenizers/streamer.h
  function namespace (line 17) | namespace mlc {

FILE: cpp/tokenizers/tokenizers.cc
  type mlc (line 24) | namespace mlc {
    type llm (line 25) | namespace llm {
      function TVM_FFI_STATIC_INIT_BLOCK (line 27) | TVM_FFI_STATIC_INIT_BLOCK() {
      function String (line 34) | String TokenizerInfoNode::AsJSONString() const {
      function TokenizerInfo (line 42) | TokenizerInfo TokenizerInfo::FromJSONString(String json_string) {
      function DynamicBitset (line 104) | const DynamicBitset& TokenizerObj::GetPrefixTokenMask() {
      function Tokenizer (line 143) | Tokenizer Tokenizer::FromPath(const String& _path, std::optional<Tok...
      function TokenizerInfo (line 197) | TokenizerInfo Tokenizer::DetectTokenizerInfo(const String& path_str) {
      function ByteFallbackDecoder (line 360) | inline std::string ByteFallbackDecoder(const std::string& token) {
      function SpaceReplacerDecoder (line 375) | inline std::string SpaceReplacerDecoder(const std::string& token) {
      function ByteLevelDecoder (line 395) | inline std::string ByteLevelDecoder(const std::string& token) {
      function PostProcessToken (line 440) | inline std::string PostProcessToken(const std::string& token,
      function TVM_FFI_STATIC_INIT_BLOCK (line 478) | TVM_FFI_STATIC_INIT_BLOCK() {
      function TVM_FFI_STATIC_INIT_BLOCK (line 507) | TVM_FFI_STATIC_INIT_BLOCK() {

FILE: cpp/tokenizers/tokenizers.h
  function namespace (line 23) | namespace llm {

FILE: examples/python/microserving/custom_router.py
  class CustomRouter (line 13) | class CustomRouter(Router):
    method translate_request (line 16) | async def translate_request(

FILE: examples/rest/nodejs/sample_langchain.ts
  function print (line 21) | function print(str: string) {

FILE: examples/rest/python/sample_client.py
  class color (line 6) | class color:

FILE: examples/rest/python/sample_langchain.py
  class color (line 30) | class color:
  function llm_chain_example (line 43) | def llm_chain_example():
  function load_qa_chain_example (line 62) | def load_qa_chain_example():
  function retrieval_qa_sotu_example (line 73) | def retrieval_qa_sotu_example():
  function retrieval_qa_mlc_docs_example (line 117) | def retrieval_qa_mlc_docs_example():

FILE: examples/rest/python/sample_openai.py
  class color (line 9) | class color:

FILE: python/mlc_llm/__init__.py
  function _create_socket_session_local_workers (line 14) | def _create_socket_session_local_workers(num_workers):

FILE: python/mlc_llm/__main__.py
  function main (line 11) | def main():

FILE: python/mlc_llm/base.py
  function _load_mlc_llm_lib (line 15) | def _load_mlc_llm_lib():
  function _debug_cuda_profiler_start (line 28) | def _debug_cuda_profiler_start() -> None:
  function _debug_cuda_profiler_stop (line 37) | def _debug_cuda_profiler_stop() -> None:

FILE: python/mlc_llm/bench/__main__.py
  function _parse_num_concurrent_requests (line 34) | def _parse_num_concurrent_requests(num_str: Optional[str]) -> Optional[L...
  function _parse_request_rate (line 43) | def _parse_request_rate(request_rate_str: Optional[str]) -> Optional[Lis...
  function _parse_mlc_engine_config (line 56) | def _parse_mlc_engine_config(config_str: Optional[str]) -> EngineConfig:
  function _launch_mlc_server (line 76) | def _launch_mlc_server(args: argparse.argparse.Namespace):
  function run_pipeline (line 88) | def run_pipeline(
  function query_mlc_server_metrics (line 119) | def query_mlc_server_metrics(host: str, port: int):
  function main (line 129) | def main(args: argparse.argparse.Namespace):

FILE: python/mlc_llm/bench/api_endpoint.py
  class APIEndPoint (line 18) | class APIEndPoint:
    method __init__ (line 23) | def __init__(self, include_server_metrics: bool = False) -> None:
    method __aenter__ (line 26) | async def __aenter__(self) -> Self:
    method __aexit__ (line 29) | async def __aexit__(self, exc_type, exc_value, tb) -> None:
    method __call__ (line 32) | async def __call__(self, request: RequestRecord) -> RequestRecord:
  class OpenAIChatEndPoint (line 36) | class OpenAIChatEndPoint(APIEndPoint):
    method __init__ (line 39) | def __init__(  # pylint: disable=too-many-arguments
    method __aenter__ (line 57) | async def __aenter__(self) -> Self:
    method __aexit__ (line 63) | async def __aexit__(self, exc_type, exc_value, tb) -> None:
    method __call__ (line 66) | async def __call__(  # pylint: disable=too-many-branches,too-many-stat...
  class OpenAIEndPoint (line 186) | class OpenAIEndPoint(APIEndPoint):
    method __init__ (line 189) | def __init__(  # pylint: disable=too-many-arguments
    method __aenter__ (line 212) | async def __aenter__(self) -> Self:
    method __aexit__ (line 218) | async def __aexit__(self, exc_type, exc_value, tb) -> None:
    method __call__ (line 221) | async def __call__(  # pylint: disable=too-many-branches,too-many-stat...
  class TensorRTLLMEndPoint (line 318) | class TensorRTLLMEndPoint(APIEndPoint):
    method __init__ (line 321) | def __init__(  # pylint: disable=too-many-arguments
    method __aenter__ (line 333) | async def __aenter__(self) -> Self:
    method __aexit__ (line 339) | async def __aexit__(self, exc_type, exc_value, tb) -> None:
    method __call__ (line 342) | async def __call__(  # pylint: disable=too-many-branches,too-many-loca...
  function create_api_endpoint (line 448) | def create_api_endpoint(args: argparse.Namespace) -> APIEndPoint:

FILE: python/mlc_llm/bench/dataset.py
  class Dataset (line 22) | class Dataset:  # pylint: disable=too-few-public-methods
    method generate_request_records (line 35) | def generate_request_records(
  class ShareGPTDataset (line 46) | class ShareGPTDataset(Dataset):  # pylint: disable=too-few-public-methods
    method __init__ (line 52) | def __init__(
    method generate_request_records (line 109) | def generate_request_records(
  class LoogleDataset (line 170) | class LoogleDataset(Dataset):  # pylint: disable=too-few-public-methods
    method __init__ (line 183) | def __init__(self, tokenizer: AutoTokenizer, testset_name: str) -> None:
    method generate_request_records (line 210) | def generate_request_records(  # pylint: disable=too-many-locals
  class LLMPerfDataset (line 264) | class LLMPerfDataset(Dataset):  # pylint: disable=too-few-public-methods
    method __init__ (line 267) | def __init__(self, dataset_path: str, num_requests: int, tokenizer: Au...
    method generate_request_records (line 285) | def generate_request_records(  # pylint: disable=too-many-arguments,to...
  class JSONModeEvalDataset (line 345) | class JSONModeEvalDataset(Dataset):  # pylint: disable=too-few-public-me...
    method __init__ (line 348) | def __init__(self, tokenizer: AutoTokenizer) -> None:
    method generate_request_records (line 365) | def generate_request_records(
  class ReActDataset (line 407) | class ReActDataset(Dataset):  # pylint: disable=too-few-public-methods
    method __init__ (line 484) | def __init__(  # pylint: disable=too-many-locals
    method generate_request_records (line 550) | def generate_request_records(
  class WildChatDataset (line 590) | class WildChatDataset(Dataset):  # pylint: disable=too-few-public-methods
    method __init__ (line 595) | def __init__(self, tokenizer: AutoTokenizer, apply_chat_template: bool...
    method generate_request_records (line 650) | def generate_request_records(  # pylint: disable=too-many-locals
  class AzureLLMInferenceDataset (line 711) | class AzureLLMInferenceDataset(Dataset):  # pylint: disable=too-few-publ...
    method __init__ (line 718) | def __init__(self, dataset_path: str, tokenizer: AutoTokenizer) -> None:
    method generate_request_records (line 741) | def generate_request_records(  # pylint: disable=too-many-locals
  function create_dataset (line 817) | def create_dataset(  # pylint: disable=too-many-return-statements,too-ma...

FILE: python/mlc_llm/bench/evaluation/gsm8k.py
  function extract_answer (line 21) | def extract_answer(text: str, regex: re.Pattern, select_index: int) -> str:
  function extract_ground_truth (line 34) | def extract_ground_truth(text: str) -> str:
  function strict_extract_answer (line 39) | def strict_extract_answer(text: str) -> str:
  function flexible_extract_answer (line 44) | def flexible_extract_answer(text: str) -> str:
  function create_few_shot_prompt (line 49) | def create_few_shot_prompt(n_shot: int, use_cot: bool, random_order=Fals...
  function create_prompt (line 157) | def create_prompt(question: str, n_shot: int, use_cot: bool, random_orde...
  function parse_args (line 167) | def parse_args():
  function send_request (line 184) | async def send_request(
  function evaluate (line 209) | async def evaluate(  # pylint: disable=too-many-arguments, too-many-locals

FILE: python/mlc_llm/bench/evaluation/mmlu.py
  function parse_args (line 81) | def parse_args():
  function send_request (line 97) | async def send_request(
  function evaluate (line 128) | async def evaluate(  # pylint: disable=too-many-arguments, too-many-locals

FILE: python/mlc_llm/bench/request_processor.py
  class RequestProcessor (line 30) | class RequestProcessor:  # pylint: disable=too-few-public-methods
    method __call__ (line 36) | def __call__(self, request_records: List[RequestRecord]) -> List[Reque...
  class LogMessage (line 40) | class LogMessage(RequestProcessor):  # pylint: disable=too-few-public-me...
    method __init__ (line 43) | def __init__(self, message: str) -> None:
    method __call__ (line 46) | def __call__(self, request_records: List[RequestRecord]) -> List[Reque...
  class SampleRequests (line 51) | class SampleRequests(RequestProcessor):  # pylint: disable=too-few-publi...
    method __init__ (line 54) | def __init__(self, num_requests: int, take_first_x_requests: bool = Fa...
    method __call__ (line 60) | def __call__(self, request_records: List[RequestRecord]) -> List[Reque...
    method _sample_from_plain_request_records (line 71) | def _sample_from_plain_request_records(
    method _sample_from_grouped_request_records (line 93) | def _sample_from_grouped_request_records(
  class AttachModelName (line 124) | class AttachModelName(RequestProcessor):  # pylint: disable=too-few-publ...
    method __init__ (line 127) | def __init__(self, model: str) -> None:
    method __call__ (line 130) | def __call__(self, request_records: List[RequestRecord]) -> List[Reque...
  class AttachRequestRateTimestamp (line 136) | class AttachRequestRateTimestamp(RequestProcessor):  # pylint: disable=t...
    method __init__ (line 139) | def __init__(self, request_rate: np.float32) -> None:
    method __call__ (line 142) | def __call__(self, request_records: List[RequestRecord]) -> List[Reque...
  class AttachExecutionFeature (line 151) | class AttachExecutionFeature(RequestProcessor):  # pylint: disable=too-f...
    method __init__ (line 154) | def __init__(self, exec_feature: Dict[str, Any]) -> None:
    method __call__ (line 157) | def __call__(self, request_records: List[RequestRecord]) -> List[Reque...
  class AttachStreamFlag (line 164) | class AttachStreamFlag(RequestProcessor):  # pylint: disable=too-few-pub...
    method __init__ (line 167) | def __init__(self, stream: Optional[bool]) -> None:
    method __call__ (line 170) | def __call__(self, request_records: List[RequestRecord]) -> List[Reque...
  class AttachSamplingOptions (line 178) | class AttachSamplingOptions(RequestProcessor):  # pylint: disable=too-fe...
    method __init__ (line 181) | def __init__(self, temperature: float, top_p: float, ignore_eos: bool)...
    method __call__ (line 186) | def __call__(self, request_records: List[RequestRecord]) -> List[Reque...
  class ScaleTimestamp (line 198) | class ScaleTimestamp(RequestProcessor):  # pylint: disable=too-few-publi...
    method __init__ (line 201) | def __init__(self, timestamp_scale: float):
    method __call__ (line 204) | def __call__(self, request_records: List[RequestRecord]) -> List[Reque...
  class MetricAnalyzer (line 214) | class MetricAnalyzer(RequestProcessor):  # pylint: disable=too-few-publi...
    method __init__ (line 217) | def __init__(self, tokenizer: AutoTokenizer) -> None:
    method __call__ (line 220) | def __call__(self, request_records: List[RequestRecord]) -> List[Reque...
  class WarmupAndRun (line 255) | class WarmupAndRun(RequestProcessor):  # pylint: disable=too-few-public-...
    method __init__ (line 258) | def __init__(  # pylint: disable=too-many-arguments
    method generate_fake_warmup_requests (line 272) | def generate_fake_warmup_requests(  # pylint: disable=missing-function...
    method __call__ (line 291) | def __call__(self, request_records: List[RequestRecord]) -> List[Reque...
    method _process_warmup_requests (line 324) | def _process_warmup_requests(self, warmup_requests: List[RequestRecord...
  class SequentialProcessor (line 341) | class SequentialProcessor(RequestProcessor):  # pylint: disable=too-few-...
    method __init__ (line 346) | def __init__(self, *processors: RequestProcessor) -> None:
    method __call__ (line 349) | def __call__(self, request_records: List[RequestRecord]) -> List[Reque...
  class Executor (line 355) | class Executor(RequestProcessor):  # pylint: disable=too-few-public-methods
    method __init__ (line 358) | def __init__(
    method __call__ (line 368) | def __call__(self, request_records: List[RequestRecord]) -> List[Reque...
  class FixedConcurrentRequestExecutor (line 372) | class FixedConcurrentRequestExecutor(Executor):  # pylint: disable=too-f...
    method __init__ (line 375) | def __init__(  # pylint: disable=too-many-arguments
    method __call__ (line 391) | def __call__(self, request_records: List[RequestRecord]) -> List[Reque...
    method _process_task (line 422) | def _process_task(
  class FixTimestampExecutor (line 484) | class FixTimestampExecutor(Executor):  # pylint: disable=too-few-public-...
    method __init__ (line 487) | def __init__(  # pylint: disable=too-many-arguments
    method __call__ (line 503) | def __call__(self, request_records: List[RequestRecord]) -> List[Reque...
    method _process_task (line 540) | def _process_task(
  function create_pipelines (line 603) | def create_pipelines(  # pylint: disable=too-many-branches

FILE: python/mlc_llm/bench/request_record.py
  class ServerMetrics (line 14) | class ServerMetrics(BaseModel):
  class Metrics (line 27) | class Metrics(BaseModel):
  class RequestRecord (line 45) | class RequestRecord(BaseModel):
  class GroupedRequestRecord (line 57) | class GroupedRequestRecord(RequestRecord):
  function generate_metrics_summary (line 67) | def generate_metrics_summary(
  function _compute_metrics_statistics (line 116) | def _compute_metrics_statistics(
  function convert_reports_to_df (line 161) | def convert_reports_to_df(reports: List[Dict[str, Any]]) -> pd.DataFrame:
  function pretty_print_report (line 177) | def pretty_print_report(report: Dict[str, Any]) -> None:  # pylint: disa...

FILE: python/mlc_llm/cli/calibrate.py
  function main (line 10) | def main(argv):

FILE: python/mlc_llm/cli/chat.py
  function main (line 8) | def main(argv):

FILE: python/mlc_llm/cli/check_device.py
  function _check_device (line 10) | def _check_device(device: Device) -> bool:
  function main (line 17) | def main():

FILE: python/mlc_llm/cli/compile.py
  function main (line 27) | def main(argv):

FILE: python/mlc_llm/cli/convert_weight.py
  function main (line 17) | def main(argv):

FILE: python/mlc_llm/cli/delivery.py
  class OverrideConfigs (line 33) | class OverrideConfigs(BaseModel):
  class ModelDeliveryTask (line 46) | class ModelDeliveryTask(BaseModel):
  class ModelDeliveryList (line 71) | class ModelDeliveryList(BaseModel):
    method from_json (line 83) | def from_json(cls: Type[T], json_dict: Dict[str, Any]) -> T:
    method to_json (line 93) | def to_json(self) -> Dict[str, Any]:
  function _clone_repo (line 100) | def _clone_repo(model: Union[str, Path], hf_local_dir: Optional[str]) ->...
  function _run_quantization (line 120) | def _run_quantization(
  function _get_current_log (line 207) | def _get_current_log(log: str) -> ModelDeliveryList:
  function _generate_model_delivery_diff (line 219) | def _generate_model_delivery_diff(  # pylint: disable=too-many-locals
  function _main (line 281) | def _main(  # pylint: disable=too-many-locals, too-many-arguments
  function main (line 369) | def main():

FILE: python/mlc_llm/cli/gen_config.py
  function main (line 14) | def main(argv):

FILE: python/mlc_llm/cli/lib_delivery.py
  class ModelInfo (line 23) | class ModelInfo:  # pylint: disable=too-many-instance-attributes
  class DeferredScope (line 36) | class DeferredScope:
    method __init__ (line 39) | def __init__(self):
    method add (line 42) | def add(self, func: Callable[[], None]):
    method __enter__ (line 46) | def __enter__(self):
    method __exit__ (line 49) | def __exit__(self, exc_type, exc_value, traceback):
    method create_temp_dir (line 54) | def create_temp_dir(self) -> Path:
  function _run_compilation (line 61) | def _run_compilation(model_info: ModelInfo, repo_dir: Path) -> bool:
  function _main (line 122) | def _main(  # pylint: disable=too-many-locals
  function main (line 175) | def main():

FILE: python/mlc_llm/cli/model_metadata.py
  function _extract_metadata (line 19) | def _extract_metadata(model_lib: Path) -> Dict[str, Any]:
  function _report_all (line 29) | def _report_all(metadata: Dict[str, Any]) -> None:
  function _read_dynamic_shape (line 46) | def _read_dynamic_shape(shape: List[Union[int, str]], config: Union[Dict...
  function _compute_memory_usage (line 74) | def _compute_memory_usage(metadata: Dict[str, Any], config: Union[Dict, ...
  function _report_memory_usage (line 91) | def _report_memory_usage(metadata: Dict[str, Any], config: Union[Dict, C...
  function main (line 145) | def main():

FILE: python/mlc_llm/cli/package.py
  function main (line 12) | def main(argv):

FILE: python/mlc_llm/cli/router.py
  function main (line 8) | def main(argv):

FILE: python/mlc_llm/cli/serve.py
  class EngineConfigOverride (line 15) | class EngineConfigOverride:  # pylint: disable=too-many-instance-attributes
    method __repr__ (line 36) | def __repr__(self) -> str:
    method from_str (line 65) | def from_str(source: str) -> "EngineConfigOverride":
  function main (line 106) | def main(argv):

FILE: python/mlc_llm/cli/worker.py
  function main (line 32) | def main():

FILE: python/mlc_llm/compiler_pass/attach_cuda_graph_alloc_init_func.py
  class AttachCUDAGraphAllocInitFunc (line 8) | class AttachCUDAGraphAllocInitFunc:  # pylint: disable=too-few-public-me...
    method __init__ (line 11) | def __init__(self):
    method transform_module (line 14) | def transform_module(self, mod: IRModule, _ctx: tvm.transform.PassCont...

FILE: python/mlc_llm/compiler_pass/attach_embedding_allocator.py
  class AttachAllocEmbeddingTensorFunc (line 10) | class AttachAllocEmbeddingTensorFunc:  # pylint: disable=too-few-public-...
    method __init__ (line 13) | def __init__(self, metadata: Dict[str, Any]):
    method transform_module (line 16) | def transform_module(self, mod: IRModule, _ctx: tvm.transform.PassCont...

FILE: python/mlc_llm/compiler_pass/attach_logit_processor.py
  class AttachLogitProcessFunc (line 14) | class AttachLogitProcessFunc:  # pylint: disable=too-few-public-methods
    method __init__ (line 17) | def __init__(self, target: tvm.target.Target):
    method transform_module (line 27) | def transform_module(self, mod: IRModule, _ctx: tvm.transform.PassCont...
  function _get_apply_logit_bias_inplace_cpu (line 41) | def _get_apply_logit_bias_inplace_cpu():
  function _get_apply_logit_bias_inplace (line 72) | def _get_apply_logit_bias_inplace(target: tvm.target.Target):
  function _get_apply_penalty_inplace_cpu (line 112) | def _get_apply_penalty_inplace_cpu():
  function _get_apply_penalty_inplace (line 156) | def _get_apply_penalty_inplace(target: tvm.target.Target):
  function _get_apply_bitmask_inplace_cpu (line 210) | def _get_apply_bitmask_inplace_cpu():
  function _get_apply_bitmask_inplace (line 246) | def _get_apply_bitmask_inplace(target: tvm.target.Target):

FILE: python/mlc_llm/compiler_pass/attach_sampler.py
  class AttachGPUSamplingFunc (line 15) | class AttachGPUSamplingFunc:  # pylint: disable=too-few-public-methods
    method __init__ (line 18) | def __init__(self, target: tvm.target.Target, variable_bounds: Dict[st...
    method transform_module (line 29) | def transform_module(self, mod: IRModule, _ctx: tvm.transform.PassCont...
  function _attach_multinomial_sampling_func (line 68) | def _attach_multinomial_sampling_func(bb: relax.BlockBuilder):
  function _attach_argsort_func (line 119) | def _attach_argsort_func(bb: relax.BlockBuilder):
  function full (line 142) | def full(var_result: T.handle, value: T.int32):
  function _attach_sample_with_top_p (line 152) | def _attach_sample_with_top_p(bb: relax.BlockBuilder):  # pylint: disabl...
  function _attach_renormalize_by_top_p (line 236) | def _attach_renormalize_by_top_p(bb: relax.BlockBuilder, target: tvm.tar...
  function _attach_take_probs_func (line 267) | def _attach_take_probs_func(bb: relax.BlockBuilder):
  function _attach_batch_verifier (line 343) | def _attach_batch_verifier(bb: relax.BlockBuilder):

FILE: python/mlc_llm/compiler_pass/attach_softmax_with_temperature.py
  class AttachSoftmaxWithTemperature (line 15) | class AttachSoftmaxWithTemperature:  # pylint: disable=too-few-public-me...
    method __init__ (line 18) | def __init__(
    method transform_module (line 24) | def transform_module(self, mod: IRModule, _ctx: tvm.transform.PassCont...
  class _Rewriter (line 30) | class _Rewriter(PyExprMutator):  # pylint: disable=abstract-method
    method __init__ (line 31) | def __init__(
    method transform (line 44) | def transform(self) -> IRModule:
  function _get_lse_and_softmax_func (line 99) | def _get_lse_and_softmax_func(  # pylint: disable=too-many-locals,too-ma...

FILE: python/mlc_llm/compiler_pass/attach_spec_decode_aux_funcs.py
  class AttachSpecDecodeAuxFuncs (line 10) | class AttachSpecDecodeAuxFuncs:  # pylint: disable=too-few-public-methods
    method __init__ (line 15) | def __init__(self, tensor_parallel_shards: int):
    method transform_module (line 18) | def transform_module(self, mod: IRModule, _ctx: tvm.transform.PassCont...
  function _get_scatter_2d_inplace (line 40) | def _get_scatter_2d_inplace(dtype: str, global_symbol: str):
  function _get_gather_2d_inplace (line 58) | def _get_gather_2d_inplace(dtype: str, global_symbol: str):
  function _add_scatter_hidden_states (line 76) | def _add_scatter_hidden_states(bb: BlockBuilder, tensor_parallel_shards:...
  function _add_gather_hidden_states (line 102) | def _add_gather_hidden_states(bb: BlockBuilder, tensor_parallel_shards: ...

FILE: python/mlc_llm/compiler_pass/attach_support_info.py
  class AttachVariableBounds (line 13) | class AttachVariableBounds:  # pylint: disable=too-few-public-methods
    method __init__ (line 16) | def __init__(self, variable_bounds: Dict[str, int]):
    method transform_module (line 21) | def transform_module(self, mod: IRModule, _ctx: tvm.transform.PassCont...
  class AttachAdditionalPrimFuncs (line 32) | class AttachAdditionalPrimFuncs:  # pylint: disable=too-few-public-methods
    method __init__ (line 35) | def __init__(self, functions: Dict[str, tir.PrimFunc]):
    method transform_module (line 38) | def transform_module(self, mod: IRModule, _ctx: tvm.transform.PassCont...
  class AttachMemoryPlanAttr (line 46) | class AttachMemoryPlanAttr:  # pylint: disable=too-few-public-methods
    method transform_module (line 49) | def transform_module(self, mod: IRModule, _ctx: tvm.transform.PassCont...
  class AttachCUDAGraphSymbolicCaptureHints (line 58) | class AttachCUDAGraphSymbolicCaptureHints:  # pylint: disable=too-few-pu...
    method __init__ (line 61) | def __init__(self, hints: Dict[str, List[str]]):
    method transform_module (line 64) | def transform_module(self, mod: IRModule, _ctx: tvm.transform.PassCont...
  class AttachPipelineParallelStages (line 79) | class AttachPipelineParallelStages:  # pylint: disable=too-few-public-me...
    method __init__ (line 82) | def __init__(self, pipeline_parallel_shards: int):
    method transform_module (line 85) | def transform_module(self, mod: IRModule, _ctx: tvm.transform.PassCont...
  class AttachSequenceLengthPaddingFactor (line 108) | class AttachSequenceLengthPaddingFactor:  # pylint: disable=too-few-publ...
    method __init__ (line 111) | def __init__(self, target: tvm.target.Target, metadata: Dict[str, Any]):
    method transform_module (line 115) | def transform_module(self, mod: IRModule, _ctx: tvm.transform.PassCont...

FILE: python/mlc_llm/compiler_pass/blas_dispatch.py
  class BLASDispatch (line 17) | class BLASDispatch:  # pylint: disable=too-few-public-methods,broad-exce...
    method __init__ (line 20) | def __init__(self, target: tvm.target.Target) -> None:
    method transform_module (line 34) | def transform_module(self, mod: IRModule, _ctx: tvm.transform.PassCont...

FILE: python/mlc_llm/compiler_pass/clean_up_tir_attrs.py
  class CleanUpTIRAttrs (line 10) | class CleanUpTIRAttrs:  # pylint: disable=too-few-public-methods
    method __init__ (line 13) | def __init__(self, attrs: List[str]):
    method transform_module (line 16) | def transform_module(

FILE: python/mlc_llm/compiler_pass/dispatch_kv_cache_creation.py
  function extract_creation_args (line 16) | def extract_creation_args(func: relax.Function) -> Dict[str, Any]:
  class DispatchKVCacheCreation (line 79) | class DispatchKVCacheCreation:  # pylint: disable=too-many-instance-attr...
    method __init__ (line 82) | def __init__(
    method transform_module (line 104) | def transform_module(self, mod: IRModule, _ctx: tvm.transform.PassCont...
    method attach_kv_cache_metadata (line 135) | def attach_kv_cache_metadata(self, kwargs: Dict[str, Any]):
    method create_tir_paged_kv_cache (line 144) | def create_tir_paged_kv_cache(
    method create_flashinfer_paged_kv_cache (line 182) | def create_flashinfer_paged_kv_cache(

FILE: python/mlc_llm/compiler_pass/dispatch_triton_kernel.py
  class _Rewriter (line 21) | class _Rewriter(PyExprMutator):  # pylint: disable=abstract-method
    method __init__ (line 22) | def __init__(self, mod: IRModule, target: tvm.target.Target) -> None:
    method transform (line 28) | def transform(self) -> tvm.IRModule:  # pylint: disable=too-many-locals
    method visit_call_ (line 44) | def visit_call_(self, call: relax.Call) -> relax.Expr:  # pylint: disa...
    method w8a8_block_fp8_matmul (line 62) | def w8a8_block_fp8_matmul(  # pylint: disable=too-many-locals
    method w8a8_block_fp8_group_matmul (line 106) | def w8a8_block_fp8_group_matmul(  # pylint: disable=too-many-locals
  class DispatchTritonKernel (line 158) | class DispatchTritonKernel:  # pylint: disable=too-many-instance-attribu...
    method __init__ (line 161) | def __init__(self, target: tvm.target.Target) -> None:
    method transform_module (line 169) | def transform_module(self, mod: IRModule, _ctx: tvm.transform.PassCont...

FILE: python/mlc_llm/compiler_pass/estimate_memory_usage.py
  class AttachMetadataWithMemoryUsage (line 17) | class AttachMetadataWithMemoryUsage:  # pylint: disable=too-few-public-m...
    method __init__ (line 20) | def __init__(self, metadata: Dict[str, Any]):
    method transform_module (line 23) | def transform_module(self, mod: IRModule, _ctx: tvm.transform.PassCont...
  class _MemoryEstimator (line 40) | class _MemoryEstimator(PyExprVisitor):
    method __init__ (line 43) | def __init__(self) -> None:
    method run (line 49) | def run(self, mod: IRModule) -> Dict[str, int]:
    method visit_call_ (line 65) | def visit_call_(self, call: relax.Call) -> None:  # pylint: disable=ar...
    method _builtin_tensor_alloc (line 72) | def _builtin_tensor_alloc(self, shape: relax.Expr, dtype_str: str) -> ...
    method _storage_alloc (line 83) | def _storage_alloc(self, size: relax.Expr) -> None:

FILE: python/mlc_llm/compiler_pass/fuse_add_norm.py
  function _get_add_rms_norm_decode (line 16) | def _get_add_rms_norm_decode(hidden_size: int, eps: float, TX: int, in_d...
  function _get_add_rms_norm_prefill (line 87) | def _get_add_rms_norm_prefill(hidden_size: int, eps: float, TX: int, in_...
  class FuseAddRMSNorm (line 156) | class FuseAddRMSNorm:  # pylint: disable=too-few-public-methods
    method __init__ (line 159) | def __init__(self, target: tvm.target.Target) -> None:
    method transform_module (line 169) | def transform_module(self, mod: tvm.IRModule, _ctx: tvm.transform.Pass...
  class _FuseAddRMSNormRewriter (line 175) | class _FuseAddRMSNormRewriter(PyExprMutator):  # pylint: disable=abstrac...
    method __init__ (line 176) | def __init__(self, mod: tvm.IRModule, target: tvm.target.Target):
    method transform (line 183) | def transform(self) -> tvm.IRModule:  # pylint: disable=too-many-locals
    method visit_call_ (line 193) | def visit_call_(self, call: relax.Call) -> relax.Expr:  # pylint: disa...

FILE: python/mlc_llm/compiler_pass/fuse_dequantize_matmul_ewise.py
  class FuseDequantizeMatmulEwise (line 9) | class FuseDequantizeMatmulEwise:  # pylint: disable=too-few-public-methods
    method transform_module (line 12) | def transform_module(
  function _pattern (line 37) | def _pattern(match_ewise: int, n_aux_tensor: int):

FILE: python/mlc_llm/compiler_pass/fuse_dequantize_take.py
  class FuseDequantizeTake (line 15) | class FuseDequantizeTake:  # pylint: disable=too-few-public-methods
    method transform_module (line 18) | def transform_module(  # pylint: disable=too-many-locals
  function _pattern (line 52) | def _pattern(n_aux_tensor: int, match_tir_vars: bool):

FILE: python/mlc_llm/compiler_pass/fuse_dequantize_transpose.py
  class FuseDequantizeTranspose (line 11) | class FuseDequantizeTranspose:  # pylint: disable=too-few-public-methods
    method transform_module (line 14) | def transform_module(self, mod: IRModule, _ctx: tvm.transform.PassCont...
  class _DequantizeTransposeFuser (line 20) | class _DequantizeTransposeFuser(PyExprMutator):  # pylint: disable=abstr...
    method __init__ (line 21) | def __init__(
    method transform (line 28) | def transform(self) -> IRModule:
    method visit_call_ (line 37) | def visit_call_(  # pylint: disable=arguments-renamed

FILE: python/mlc_llm/compiler_pass/fuse_ft_dequantize_matmul_epilogue.py
  class FuseFTDequantizeEpilogue (line 13) | class FuseFTDequantizeEpilogue:  # pylint: disable=too-few-public-methods
    method transform_module (line 16) | def transform_module(
  function fuse_bias (line 32) | def fuse_bias(func: relax.Function) -> relax.Function:
  function fuse_activation (line 98) | def fuse_activation(func: relax.Function) -> relax.Function:
  function fuse_residual_binary (line 188) | def fuse_residual_binary(func: relax.Function) -> relax.Function:
  function fuse_residual_unary (line 267) | def fuse_residual_unary(func: relax.Function) -> relax.Function:

FILE: python/mlc_llm/compiler_pass/fuse_transpose_matmul.py
  class FuseTransposeMatmul (line 10) | class FuseTransposeMatmul:  # pylint: disable=too-few-public-methods
    method transform_module (line 13) | def transform_module(self, mod: IRModule, _ctx: tvm.transform.PassCont...
  function _pattern (line 31) | def _pattern():
  class _TransposeMatmulFuser (line 59) | class _TransposeMatmulFuser(PyExprMutator):  # pylint: disable=abstract-...
    method __init__ (line 60) | def __init__(self, mod):
    method visit_call_ (line 63) | def visit_call_(  # pylint: disable=arguments-renamed

FILE: python/mlc_llm/compiler_pass/lift_global_buffer_alloc.py
  class LiftTIRGlobalBufferAlloc (line 13) | class LiftTIRGlobalBufferAlloc:  # pylint: disable=too-few-public-methods
    method transform_module (line 16) | def transform_module(
  class _TIRGlobalAllocRewriter (line 26) | class _TIRGlobalAllocRewriter(PyExprMutator):  # pylint: disable=abstrac...
    method __init__ (line 27) | def __init__(self, mod: IRModule):
    method transform (line 35) | def transform(self) -> IRModule:
    method visit_call_ (line 54) | def visit_call_(self, call: relax.Call):  # pylint: disable=arguments-...
  function remove_global_buf_alloc (line 93) | def remove_global_buf_alloc(
  function _has_symbolic_var (line 148) | def _has_symbolic_var(tensor_sinfo: relax.TensorStructInfo) -> bool:
  function _resolve_tir_var_mapping (line 156) | def _resolve_tir_var_mapping(  # pylint: disable=too-many-locals

FILE: python/mlc_llm/compiler_pass/low_batch_specialization.py
  class LowBatchGemvSpecialize (line 12) | class LowBatchGemvSpecialize:  # pylint: disable=too-few-public-methods
    method transform_module (line 15) | def transform_module(

FILE: python/mlc_llm/compiler_pass/pipeline.py
  class _LogProgress (line 49) | class _LogProgress:  # pylint: disable=too-few-public-methods
    method __init__ (line 52) | def __init__(self, *args):
    method transform_module (line 55) | def transform_module(self, mod: IRModule, _ctx: tvm.transform.PassCont...
  class _DebugDump (line 62) | class _DebugDump:  # pylint: disable=too-few-public-methods
    method __init__ (line 66) | def __init__(self, file_name: str, file_path: Optional[Path], show_met...
    method transform_module (line 71) | def transform_module(self, mod: IRModule, _ctx: tvm.transform.PassCont...
  function _mlc_llm_pipeline (line 82) | def _mlc_llm_pipeline(  # pylint: disable=too-many-arguments

FILE: python/mlc_llm/compiler_pass/pipeline_parallel_rewrite.py
  class PipelineParallelRewrite (line 12) | class PipelineParallelRewrite:  # pylint: disable=too-few-public-methods
    method transform_module (line 15) | def transform_module(
  class _PipelineParallelRewriter (line 25) | class _PipelineParallelRewriter(PyExprMutator):  # pylint: disable=abstr...
    method __init__ (line 26) | def __init__(self, mod: IRModule):
    method transform (line 35) | def transform(self) -> IRModule:  # pylint: disable=too-many-locals
    method _create_stage_func (line 105) | def _create_stage_func(  # pylint: disable=too-many-arguments,too-many...
    method visit_var_binding_ (line 202) | def visit_var_binding_(self, binding: relax.VarBinding) -> None:
    method visit_call_ (line 240) | def visit_call_(self, call: relax.Call) -> relax.Call:  # pylint: disa...
    method _prepare_stage_func_params_and_args (line 249) | def _prepare_stage_func_params_and_args(
    method _update_struct_info (line 261) | def _update_struct_info(
    method _copy_undefined_var (line 291) | def _copy_undefined_var(
    method _update_shape (line 301) | def _update_shape(
  function _extract_pipeline_stages (line 311) | def _extract_pipeline_stages(
  function _analyze_required_func_params (line 363) | def _analyze_required_func_params(
  class _RequiredFuncParamAnalyzer (line 376) | class _RequiredFuncParamAnalyzer(PyExprVisitor):
    method __init__ (line 379) | def __init__(self, func_params: List[relax.Var]) -> None:
    method run (line 383) | def run(self, stage_bindings: List[relax.Binding]) -> List[relax.Var]:
    method visit_var_ (line 390) | def visit_var_(self, var: relax.Var) -> None:  # pylint: disable=argum...

FILE: python/mlc_llm/compiler_pass/scatter_tuple_get_item.py
  class ScatterTupleGetItem (line 14) | class ScatterTupleGetItem:  # pylint: disable=too-few-public-methods
    method transform_module (line 17) | def transform_module(self, mod: IRModule, _ctx: tvm.transform.PassCont...
  class _Scatter (line 23) | class _Scatter(PyExprMutator):  # pylint: disable=abstract-method
    method __init__ (line 24) | def __init__(self, mod: IRModule) -> None:
    method transform (line 29) | def transform(self) -> IRModule:
    method visit_var_binding_ (line 38) | def visit_var_binding_(self, binding: relax.VarBinding):
    method visit_dataflow_var_ (line 43) | def visit_dataflow_var_(  # pylint: disable=arguments-renamed

FILE: python/mlc_llm/contrib/embeddings/embeddings.py
  function _extract_metadata (line 20) | def _extract_metadata(mod: Module):
  function _load_params (line 24) | def _load_params(
  function _get_tvm_module (line 37) | def _get_tvm_module(
  class DefaultDebugInstrument (line 52) | class DefaultDebugInstrument:
    method __init__ (line 61) | def __init__(self, debug_out: Path):
    method reset (line 75) | def reset(self, debug_out: Path):
    method __call__ (line 89) | def __call__(self, func, name, before_run, ret_val, *args):
  class MLCEmbeddings (line 111) | class MLCEmbeddings:  # pylint: disable=too-few-public-methods
    method __init__ (line 137) | def __init__(  # pylint: disable=too-many-arguments
    method embed (line 153) | def embed(self, queries: List[str]) -> tvm.runtime.Tensor:
    method _tokenize_queries (line 173) | def _tokenize_queries(self, queries: List[str]) -> Tuple[np.ndarray, n...

FILE: python/mlc_llm/contrib/embeddings/openai.py
  class MLCEmbeddings (line 18) | class MLCEmbeddings(OpenAIEmbeddings):
    method _chunk_tokens (line 19) | def _chunk_tokens(self, texts: Sequence[str]) -> Tuple[List[List], Lis...
    method _batch_embed (line 59) | def _batch_embed(
    method _abatch_embed (line 82) | async def _abatch_embed(
    method _get_len_safe_embeddings (line 107) | def _get_len_safe_embeddings(  # pylint: disable=too-many-locals,unuse...
    method _aget_len_safe_embeddings (line 142) | async def _aget_len_safe_embeddings(  # pylint: disable=too-many-local...
    method embed_documents (line 178) | def embed_documents(
    method aembed_documents (line 202) | async def aembed_documents(
    method embed_query (line 224) | def embed_query(self, text: str) -> List[float]:
    method aembed_query (line 235) | async def aembed_query(self, text: str) -> List[float]:

FILE: python/mlc_llm/conversation_template/registry.py
  class ConvTemplateRegistry (line 8) | class ConvTemplateRegistry:
    method register_conv_template (line 14) | def register_conv_template(conv_template: Conversation, override: bool...
    method get_conv_template (line 30) | def get_conv_template(name: str) -> Optional[Conversation]:

FILE: python/mlc_llm/interface/calibrate.py
  class CalibrationObserver (line 17) | class CalibrationObserver:
    method get (line 25) | def get():
    method callback (line 33) | def callback(
    method save_params (line 51) | def save_params(self, output: str):
  function sample_requests (line 63) | def sample_requests(
  function send_calibration_requests (line 106) | async def send_calibration_requests(
  function calibrate (line 131) | def calibrate(

FILE: python/mlc_llm/interface/chat.py
  function _print_help_str (line 18) | def _print_help_str():
  function _set_up_key_bindings (line 33) | def _set_up_key_bindings():
  class ChatCompletionOverride (line 48) | class ChatCompletionOverride(ConfigOverrideBase):  # pylint: disable=too...
    method from_str (line 60) | def from_str(source: str) -> "ChatCompletionOverride":
  class ModelConfigOverride (line 83) | class ModelConfigOverride(ConfigOverrideBase):  # pylint: disable=too-ma...
    method from_str (line 95) | def from_str(source: str) -> "ModelConfigOverride":
  class ChatState (line 118) | class ChatState:
    method __init__ (line 156) | def __init__(self, engine: Union[JSONFFIEngine, MLCEngine]):
    method slide_history (line 165) | def slide_history(self):
    method process_system_prompts (line 171) | def process_system_prompts(self):
    method generate (line 183) | def generate(self, prompt: str):
    method stats (line 222) | def stats(self):
    method metrics (line 240) | def metrics(self):
    method reset (line 244) | def reset(self):
    method chat (line 249) | def chat(self):
  function chat (line 285) | def chat(

FILE: python/mlc_llm/interface/compile.py
  class CompileArgs (line 28) | class CompileArgs:  # pylint: disable=too-many-instance-attributes
    method __post_init__ (line 42) | def __post_init__(self) -> None:
    method display (line 45) | def display(self) -> None:
  function _apply_preproc_to_params_and_check_pipeline (line 62) | def _apply_preproc_to_params_and_check_pipeline(
  function _infer_kv_state_kind (line 98) | def _infer_kv_state_kind(model_type) -> str:
  function _compile (line 106) | def _compile(args: CompileArgs, model_config: ConfigBase):
  function compile (line 226) | def compile(  # pylint: disable=too-many-arguments,redefined-builtin

FILE: python/mlc_llm/interface/compiler_flags.py
  class IPCAllReduceStrategyType (line 14) | class IPCAllReduceStrategyType(enum.IntEnum):
  class OptimizationFlags (line 24) | class OptimizationFlags:
    method __repr__ (line 34) | def __repr__(self) -> str:
    method from_str (line 49) | def from_str(source: str) -> "OptimizationFlags":
    method update (line 84) | def update(self, target, quantization) -> None:
  class ModelConfigOverride (line 141) | class ModelConfigOverride(ConfigOverrideBase):  # pylint: disable=too-ma...
    method __repr__ (line 153) | def __repr__(self) -> str:
    method from_str (line 170) | def from_str(source: str) -> "ModelConfigOverride":

FILE: python/mlc_llm/interface/convert_weight.py
  class ConversionArgs (line 30) | class ConversionArgs:  # pylint: disable=too-many-instance-attributes
    method display (line 42) | def display(self) -> None:
  function _resolve_base_model_dir (line 62) | def _resolve_base_model_dir(source: Path) -> Path:
  function _merge_lora_adapter_with_base_model (line 67) | def _merge_lora_adapter_with_base_model(base_source: Path, lora_adapter:...
  function _convert_args (line 102) | def _convert_args(args: ConversionArgs) -> None:  # pylint: disable=too-...
  function convert_weight (line 215) | def convert_weight(  # pylint: disable=too-many-arguments

FILE: python/mlc_llm/interface/gen_config.py
  function apply_system_defaults_for_missing_fields (line 29) | def apply_system_defaults_for_missing_fields(mlc_chat_config: MLCChatCon...
  function check_string (line 36) | def check_string(s: str) -> bool:
  function txt2rwkv_tokenizer (line 48) | def txt2rwkv_tokenizer(vocab: Path, out: Path) -> None:
  function json2rwkv_tokenizer (line 73) | def json2rwkv_tokenizer(vocab: Path, out: Path) -> None:
  function gen_config (line 90) | def gen_config(  # pylint: disable=too-many-locals,too-many-arguments,to...

FILE: python/mlc_llm/interface/jit.py
  class JITResult (line 34) | class JITResult:
  function log_jit_policy (line 41) | def log_jit_policy():
  function jit (line 50) | def jit(  # pylint: disable=too-many-locals,too-many-statements

FILE: python/mlc_llm/interface/package.py
  function build_model_library (line 21) | def build_model_library(  # pylint: disable=too-many-branches,too-many-l...
  function validate_model_lib (line 162) | def validate_model_lib(  # pylint: disable=too-many-locals,too-many-stat...
  function build_android_binding (line 264) | def build_android_binding(mlc_llm_source_dir: Path, output: Path) -> None:
  function build_iphone_binding (line 308) | def build_iphone_binding(mlc_llm_source_dir: Path, output: Path) -> None:
  function build_macabi_binding (line 325) | def build_macabi_binding(mlc_llm_source_dir: Path, output: Path) -> None:
  function package (line 349) | def package(

FILE: python/mlc_llm/interface/router.py
  function serve (line 17) | def serve(

FILE: python/mlc_llm/interface/serve.py
  function serve (line 24) | def serve(

FILE: python/mlc_llm/json_ffi/engine.py
  class EngineState (line 24) | class EngineState:
    method get_request_stream_callback (line 27) | def get_request_stream_callback(self) -> Callable[[str], None]:
    method _sync_request_stream_callback (line 35) | def _sync_request_stream_callback(self, chat_completion_stream_respons...
    method handle_chat_completion (line 39) | def handle_chat_completion(
  class BackgroundLoops (line 76) | class BackgroundLoops:
    method __init__ (line 79) | def __init__(self, ffi: dict):
    method __del__ (line 94) | def __del__(self):
    method terminate (line 97) | def terminate(self):
  class Completions (line 106) | class Completions:
    method __init__ (line 113) | def __init__(self, ffi: dict, state: EngineState, background_loops: Ba...
    method create (line 118) | def create(  # pylint: disable=too-many-arguments,too-many-locals
  class Chat (line 201) | class Chat:
    method __init__ (line 206) | def __init__(self, ffi: dict, state: EngineState, background_loops: Ba...
  class JSONFFIEngine (line 210) | class JSONFFIEngine:
    method __init__ (line 213) | def __init__(  # pylint: disable=too-many-arguments,too-many-locals
    method metrics (line 273) | def metrics(self) -> EngineMetrics:
    method _raw_chat_completion (line 277) | def _raw_chat_completion(
    method terminate (line 285) | def terminate(self):
    method _test_reload (line 289) | def _test_reload(self):
    method _test_reset (line 292) | def _test_reset(self):
    method _test_unload (line 295) | def _test_unload(self):

FILE: python/mlc_llm/libinfo.py
  function get_env_paths (line 11) | def get_env_paths(env_var, splitter):
  function get_dll_directories (line 18) | def get_dll_directories():
  function find_lib_path (line 40) | def find_lib_path(name, optional=False):

FILE: python/mlc_llm/loader/huggingface_loader.py
  class HuggingFaceLoader (line 25) | class HuggingFaceLoader:  # pylint: disable=too-few-public-methods
    method __init__ (line 55) | def __init__(
    method load (line 101) | def load(
    method _load_mlc_param (line 135) | def _load_mlc_param(self, mlc_name: str, device: Optional[Device]) -> ...
    method _load_or_quantize (line 160) | def _load_or_quantize(self, mlc_name, param, device: Device):
    method _load_file (line 184) | def _load_file(self, path: Path) -> None:
    method _unload_file (line 196) | def _unload_file(self, path: Path) -> None:
  function _loading_order (line 205) | def _loading_order(param_map: ExternMapping, torch_to_path: Dict[str, Pa...

FILE: python/mlc_llm/loader/mapping.py
  class ExternMapping (line 19) | class ExternMapping:
    method add_mapping (line 48) | def add_mapping(
    method add_unused (line 58) | def add_unused(self, name: str):
  class QuantizeMapping (line 64) | class QuantizeMapping:

FILE: python/mlc_llm/loader/standard_loader.py
  function _default_export_spec (line 18) | def _default_export_spec(model: nn.Module) -> object:
  function make_standard_hf_loader (line 22) | def make_standard_hf_loader(  # pylint: disable=too-many-arguments,too-m...

FILE: python/mlc_llm/loader/stats.py
  class Stats (line 14) | class Stats:
    method timer (line 51) | def timer(self, attr):
    method mem_add (line 63) | def mem_add(self, nbytes: int):
    method mem_rm (line 70) | def mem_rm(self, nbytes: int):
    method log_time_info (line 75) | def log_time_info(self, weight_format: str):
    method log_mem_usage (line 89) | def log_mem_usage(self):

FILE: python/mlc_llm/loader/utils.py
  function check_parameter_usage (line 20) | def check_parameter_usage(param_map: "ExternMapping", extern_weights: Se...
  function load_torch_shard (line 39) | def load_torch_shard(path: Path) -> Iterator[Tuple[str, np.ndarray]]:
  function load_safetensor_shard (line 55) | def load_safetensor_shard(path: Path) -> Iterator[Tuple[str, np.ndarray]]:

FILE: python/mlc_llm/model/baichuan/baichuan_model.py
  class BaichuanConfig (line 23) | class BaichuanConfig(ConfigBase):  # pylint: disable=too-many-instance-a...
    method __post_init__ (line 45) | def __post_init__(self):
  class BaichuanAttention (line 86) | class BaichuanAttention(nn.Module):  # pylint: disable=too-many-instance...
    method __init__ (line 87) | def __init__(self, config: BaichuanConfig):
    method forward (line 99) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
  class BaichuanMLP (line 114) | class BaichuanMLP(nn.Module):
    method __init__ (line 115) | def __init__(self, config: BaichuanConfig):
    method forward (line 129) | def forward(self, x):
  class BaichuanDecoderLayer (line 135) | class BaichuanDecoderLayer(nn.Module):
    method __init__ (line 136) | def __init__(self, config: BaichuanConfig):
    method forward (line 169) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
    method _apply_residual (line 176) | def _apply_residual(self, out, residual):
  class BaichuanModel (line 182) | class BaichuanModel(nn.Module):
    method __init__ (line 183) | def __init__(self, config: BaichuanConfig):
    method forward (line 191) | def forward(self, inputs: Tensor, paged_kv_cache: PagedKVCache):
  class BaichuanForCausalLM (line 199) | class BaichuanForCausalLM(nn.Module):  # pylint: disable=too-many-instan...
    method __init__ (line 200) | def __init__(self, config: BaichuanConfig):
    method to (line 213) | def to(self, dtype: Optional[str] = None):
    method batch_forward (line 218) | def batch_forward(
    method embed (line 234) | def embed(self, input_ids: Tensor):
    method prefill (line 239) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 253) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 262) | def batch_prefill(
    method batch_decode (line 273) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 277) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method create_paged_kv_cache (line 281) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 307) | def get_default_spec(self):

FILE: python/mlc_llm/model/bert/bert_loader.py
  function huggingface (line 17) | def huggingface(
  function huggingface_bge (line 107) | def huggingface_bge(model_config: BertConfig, quantization: Quantization...

FILE: python/mlc_llm/model/bert/bert_model.py
  class BertConfig (line 22) | class BertConfig(ConfigBase):  # pylint: disable=too-many-instance-attri...
    method __post_init__ (line 42) | def __post_init__(self):
  class BertSelfAttention (line 87) | class BertSelfAttention(nn.Module):  # pylint: disable=too-many-instance...
    method __init__ (line 88) | def __init__(self, config: BertConfig):
    method forward (line 103) | def forward(self, hidden_states: Tensor, attention_mask: Tensor):
  class BertSelfOutput (line 116) | class BertSelfOutput(nn.Module):
    method __init__ (line 117) | def __init__(self, config: BertConfig):
    method forward (line 121) | def forward(self, hidden_states: Tensor, input_tensor: Tensor):
  class BertAttention (line 127) | class BertAttention(nn.Module):
    method __init__ (line 128) | def __init__(self, config: BertConfig):
    method forward (line 132) | def forward(self, hidden_states: Tensor, attention_mask: Tensor):
  class BertIntermediate (line 147) | class BertIntermediate(nn.Module):
    method __init__ (line 148) | def __init__(self, config: BertConfig):
    method forward (line 152) | def forward(self, hidden_states: Tensor):
  class BertOutput (line 158) | class BertOutput(nn.Module):
    method __init__ (line 159) | def __init__(self, config: BertConfig):
    method forward (line 163) | def forward(self, hidden_states: Tensor, input_tensor: Tensor):
  class BertLayer (line 169) | class BertLayer(nn.Module):
    method __init__ (line 170) | def __init__(self, config: BertConfig):
    method forward (line 175) | def forward(self, hidden_states: Tensor, attention_mask: Tensor):
  class BertEncoder (line 182) | class BertEncoder(nn.Module):
    method __init__ (line 183) | def __init__(self, config: BertConfig):
    method forward (line 186) | def forward(self, hidden_states: Tensor, attention_mask: Tensor):
  class BertEmbeddings (line 192) | class BertEmbeddings(nn.Module):
    method __init__ (line 193) | def __init__(self, config: BertConfig):
    method forward (line 203) | def forward(self, input_ids: Tensor, token_type_ids: Tensor, position_...
  class BertModel (line 213) | class BertModel(nn.Module):
    method __init__ (line 214) | def __init__(self, config: BertConfig):
    method to (line 219) | def to(self, dtype: Optional[str] = None):
    method forward (line 224) | def forward(self, inputs: Tensor, attention_mask: Tensor):
    method prefill (line 245) | def prefill(self, inputs: Tensor, attention_mask: Tensor):
    method get_default_spec (line 265) | def get_default_spec(self):

FILE: python/mlc_llm/model/chatglm3/chatglm3_loader.py
  function huggingface (line 14) | def huggingface(model_config: GLMConfig, quantization: Quantization) -> ...

FILE: python/mlc_llm/model/chatglm3/chatglm3_model.py
  class GLMConfig (line 23) | class GLMConfig(ConfigBase):  # pylint: disable=too-many-instance-attrib...
    method __post_init__ (line 47) | def __post_init__(self):
  class GLMAttention (line 92) | class GLMAttention(nn.Module):  # pylint: disable=too-many-instance-attr...
    method __init__ (line 93) | def __init__(self, config: GLMConfig):
    method forward (line 119) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
  class GLMMLP (line 134) | class GLMMLP(nn.Module):
    method __init__ (line 135) | def __init__(self, config: GLMConfig):
    method forward (line 160) | def forward(self, x):
  class GLMBlock (line 167) | class GLMBlock(nn.Module):
    method __init__ (line 168) | def __init__(self, config: GLMConfig):
    method forward (line 226) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
    method _apply_residual (line 233) | def _apply_residual(self, out, residual):
  class GLMTransformer (line 239) | class GLMTransformer(nn.Module):
    method __init__ (line 242) | def __init__(self, config: GLMConfig):
    method forward (line 259) | def forward(self, inputs: Tensor, paged_kv_cache: PagedKVCache):
  class ChatGLMModel (line 267) | class ChatGLMModel(nn.Module):
    method __init__ (line 268) | def __init__(self, config: GLMConfig):
    method forward (line 273) | def forward(self, inputs: Tensor, paged_kv_cache: PagedKVCache):
  class ChatGLMForCausalLM (line 279) | class ChatGLMForCausalLM(nn.Module):  # pylint: disable=too-many-instanc...
    method __init__ (line 280) | def __init__(self, config: GLMConfig):
    method to (line 296) | def to(self, dtype: Optional[str] = None):
    method batch_forward (line 301) | def batch_forward(
    method embed (line 317) | def embed(self, input_ids: Tensor):
    method prefill (line 322) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 336) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 345) | def batch_prefill(
    method batch_decode (line 356) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 360) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method create_paged_kv_cache (line 364) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 390) | def get_default_spec(self):

FILE: python/mlc_llm/model/cohere/cohere_loader.py
  function _cohere_name_transform (line 19) | def _cohere_name_transform(name: str) -> str:
  function awq (line 33) | def awq(model_config: CohereConfig, quantization: Quantization) -> Exter...

FILE: python/mlc_llm/model/cohere/cohere_model.py
  class CohereConfig (line 23) | class CohereConfig(ConfigBase):  # pylint: disable=too-many-instance-att...
    method __post_init__ (line 42) | def __post_init__(self):
  class CohereMLP (line 92) | class CohereMLP(nn.Module):
    method __init__ (line 93) | def __init__(self, config: CohereConfig):
    method forward (line 106) | def forward(self, x):
  class CohereAttention (line 114) | class CohereAttention(nn.Module):
    method __init__ (line 115) | def __init__(self, config: CohereConfig):
    method forward (line 135) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
  class CohereDecoderLayer (line 151) | class CohereDecoderLayer(nn.Module):
    method __init__ (line 152) | def __init__(self, config: CohereConfig):
    method forward (line 182) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
    method _apply_parallel_residual (line 190) | def _apply_parallel_residual(self, mlp_out, residual):
  class CohereNorm (line 196) | class CohereNorm(nn.Module):
    method __init__ (line 197) | def __init__(
    method forward (line 205) | def forward(self, x: Tensor) -> Tensor:
  class CohereEmbedding (line 215) | class CohereEmbedding(nn.Embedding):
    method lm_head_forward (line 216) | def lm_head_forward(self, x: nn.Tensor):
  class CohereModel (line 224) | class CohereModel(nn.Module):
    method __init__ (line 225) | def __init__(self, config: CohereConfig):
    method forward (line 233) | def forward(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
  class CohereForCausalLM (line 241) | class CohereForCausalLM(nn.Module):
    method __init__ (line 243) | def __init__(self, config: CohereConfig) -> None:
    method to (line 256) | def to(self, dtype: Optional[str] = None):
    method batch_forward (line 261) | def batch_forward(
    method prefill (line 277) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 294) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 303) | def batch_prefill(
    method batch_decode (line 314) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 318) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method embed (line 322) | def embed(self, input_ids: Tensor):
    method create_paged_kv_cache (line 328) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 354) | def get_default_spec(self):

FILE: python/mlc_llm/model/deepseek/deepseek_loader.py
  function huggingface (line 16) | def huggingface(model_config: DeepseekConfig, quantization: Quantization...

FILE: python/mlc_llm/model/deepseek/deepseek_model.py
  class DeepseekConfig (line 25) | class DeepseekConfig(ConfigBase):  # pylint: disable=too-many-instance-a...
    method __post_init__ (line 56) | def __post_init__(self):
  class DeepseekAttention (line 97) | class DeepseekAttention(nn.Module):  # pylint: disable=too-many-instance...
    method __init__ (line 98) | def __init__(self, config: DeepseekConfig):
    method forward (line 125) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
  class DeepseekMLP (line 149) | class DeepseekMLP(nn.Module):
    method __init__ (line 150) | def __init__(self, config: DeepseekConfig, intermediate_size=None):
    method forward (line 165) | def forward(self, x: Tensor):
  class DeepseekMoE (line 171) | class DeepseekMoE(nn.Module):  # pylint: disable=too-many-instance-attri...
    method __init__ (line 172) | def __init__(self, config: DeepseekConfig):
    method forward (line 196) | def forward(self, x: Tensor):  # pylint: disable=too-many-locals
  class DeepseekDecoderLayer (line 245) | class DeepseekDecoderLayer(nn.Module):  # pylint: disable=too-many-insta...
    method __init__ (line 246) | def __init__(self, config: DeepseekConfig, layer_idx: int):
    method forward (line 315) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
    method _apply_residual (line 324) | def _apply_residual(self, out, residual):
  class DeepseekModel (line 330) | class DeepseekModel(nn.Module):
    method __init__ (line 331) | def __init__(self, config: DeepseekConfig):
    method forward (line 342) | def forward(self, inputs: Tensor, paged_kv_cache: PagedKVCache):
  class DeepseekForCausalLM (line 350) | class DeepseekForCausalLM(nn.Module):  # pylint: disable=too-many-instan...
    method __init__ (line 351) | def __init__(self, config: DeepseekConfig):
    method to (line 365) | def to(self, dtype: Optional[str] = None):
    method batch_forward (line 370) | def batch_forward(
    method embed (line 386) | def embed(self, input_ids: Tensor):
    method prefill (line 391) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 406) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 415) | def batch_prefill(
    method batch_decode (line 426) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 430) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method create_paged_kv_cache (line 434) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 460) | def get_default_spec(self):

FILE: python/mlc_llm/model/deepseek_v2/deepseek_v2_loader.py
  function huggingface (line 17) | def huggingface(  # pylint: disable=too-many-locals,too-many-statements

FILE: python/mlc_llm/model/deepseek_v2/deepseek_v2_model.py
  class DeepseekV2Config (line 27) | class DeepseekV2Config(ConfigBase):  # pylint: disable=too-many-instance...
    method __post_init__ (line 65) | def __post_init__(self):
  class DeepseekV2MLP (line 128) | class DeepseekV2MLP(nn.Module):
    method __init__ (line 129) | def __init__(self, config: DeepseekV2Config, hidden_size=None, interme...
    method forward (line 145) | def forward(self, x: Tensor) -> Tensor:
  function yarn_get_mscale (line 151) | def yarn_get_mscale(scale=1, mscale=1):
  class DeepseekV2YarnRotaryEmbedding (line 157) | class DeepseekV2YarnRotaryEmbedding(nn.Module):
    method __init__ (line 158) | def __init__(self, config: DeepseekV2Config):
    method forward (line 163) | def forward(
  class DeepseekV2Attention (line 212) | class DeepseekV2Attention(nn.Module):  # pylint: disable=too-many-instan...
    method __init__ (line 213) | def __init__(self, config: DeepseekV2Config):
    method forward (line 272) | def forward(  # pylint: disable=too-many-arguments
    method self_attn (line 318) | def self_attn(  # pylint: disable=too-many-arguments
    method cross_attn (line 341) | def cross_attn(
  class DeepseekV2MoE (line 390) | class DeepseekV2MoE(nn.Module):  # pylint: disable=too-many-instance-att...
    method __init__ (line 391) | def __init__(self, config: DeepseekV2Config):
    method forward (line 434) | def forward(self, x: Tensor):
    method to (line 519) | def to(self, dtype: Optional[str] = None):
  class DeepseekV2DecoderLayer (line 526) | class DeepseekV2DecoderLayer(nn.Module):
    method __init__ (line 527) | def __init__(self, config: DeepseekV2Config, layer_idx: int):
    method forward (line 607) | def forward(  # pylint: disable=too-many-arguments
    method _apply_residual (line 625) | def _apply_residual(self, out, residual):
  class DeepseekV2Model (line 631) | class DeepseekV2Model(nn.Module):
    method __init__ (line 632) | def __init__(self, config: DeepseekV2Config):
    method forward (line 642) | def forward(
  class DeepseekV2ForCausalLM (line 658) | class DeepseekV2ForCausalLM(nn.Module):  # pylint: disable=too-many-inst...
    method __init__ (line 659) | def __init__(self, config: DeepseekV2Config):
    method to (line 678) | def to(self, dtype: Optional[str] = None):
    method batch_forward (line 683) | def batch_forward(
    method embed (line 700) | def embed(self, input_ids: Tensor):
    method prefill (line 705) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method extend (line 719) | def extend(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 733) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 742) | def batch_prefill(
    method batch_extend (line 755) | def batch_extend(
    method batch_decode (line 768) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 772) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method create_paged_kv_cache (line 776) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 804) | def get_default_spec(self):

FILE: python/mlc_llm/model/eagle/eagle_loader.py
  function awq (line 26) | def awq(model_config: EagleConfig, quantization: Quantization) -> Extern...

FILE: python/mlc_llm/model/eagle/eagle_model.py
  class EagleConfig (line 22) | class EagleConfig(LlamaConfig):
  class EagleDecoderLayer (line 31) | class EagleDecoderLayer(nn.Module):
    method __init__ (line 32) | def __init__(self, config: EagleConfig, index: int):
    method forward (line 64) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
    method _apply_residual (line 73) | def _apply_residual(self, out, residual):
  class EagleForCausalLM (line 79) | class EagleForCausalLM(nn.Module):  # pylint: disable=too-many-instance-...
    method __init__ (line 80) | def __init__(self, config: EagleConfig):
    method fuse_embed_hidden_states (line 103) | def fuse_embed_hidden_states(self, input_embed: Tensor, hidden_states:...
    method forward_to_last_hidden_states (line 108) | def forward_to_last_hidden_states(self, hidden_states: Tensor, paged_k...
    method forward (line 113) | def forward(self, input_embed: Tensor, hidden_states: Tensor, paged_kv...
    method to (line 118) | def to(self, dtype: Optional[str] = None):
    method batch_forward (line 123) | def batch_forward(
    method embed (line 136) | def embed(self, input_ids: Tensor):
    method prefill_to_last_hidden_states (line 141) | def prefill_to_last_hidden_states(self, hidden_states: Tensor, paged_k...
    method decode_to_last_hidden_states (line 147) | def decode_to_last_hidden_states(self, hidden_states: Tensor, paged_kv...
    method batch_prefill_to_last_hidden_states (line 153) | def batch_prefill_to_last_hidden_states(
    method batch_decode_to_last_hidden_states (line 161) | def batch_decode_to_last_hidden_states(
    method create_paged_kv_cache (line 167) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 193) | def get_default_spec(self):

FILE: python/mlc_llm/model/gemma/gemma_loader.py
  function huggingface (line 15) | def huggingface(model_config: GemmaConfig, quantization: Quantization) -...

FILE: python/mlc_llm/model/gemma/gemma_model.py
  class GemmaConfig (line 21) | class GemmaConfig(ConfigBase):  # pylint: disable=too-many-instance-attr...
    method __post_init__ (line 41) | def __post_init__(self):
  class GemmaEmbedding (line 91) | class GemmaEmbedding(nn.Embedding):
    method lm_head_forward (line 96) | def lm_head_forward(self, x: nn.Tensor):
  class GemmaMLP (line 104) | class GemmaMLP(nn.Module):
    method __init__ (line 105) | def __init__(self, config: GemmaConfig):
    method forward (line 120) | def forward(self, x: Tensor):
  class GemmaAttention (line 126) | class GemmaAttention(nn.Module):  # pylint: disable=too-many-instance-at...
    method __init__ (line 127) | def __init__(self, config: GemmaConfig):
    method forward (line 148) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
  class GemmaDecoderLayer (line 164) | class GemmaDecoderLayer(nn.Module):
    method __init__ (line 165) | def __init__(self, config: GemmaConfig):
    method forward (line 196) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
    method _apply_residual (line 203) | def _apply_residual(self, out, residual):
  class GemmaModel (line 209) | class GemmaModel(nn.Module):
    method __init__ (line 210) | def __init__(self, config: GemmaConfig):
    method forward (line 219) | def forward(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
  class GemmaForCausalLM (line 228) | class GemmaForCausalLM(nn.Module):  # pylint: disable=too-many-instance-...
    method __init__ (line 229) | def __init__(self, config: GemmaConfig):
    method to (line 241) | def to(self, dtype: Optional[str] = None):
    method get_logits (line 246) | def get_logits(self, hidden_states: Tensor):
    method batch_forward (line 252) | def batch_forward(
    method embed (line 266) | def embed(self, input_ids: Tensor):
    method prefill (line 271) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 283) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 290) | def batch_prefill(
    method batch_decode (line 301) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 305) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method create_paged_kv_cache (line 309) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 335) | def get_default_spec(self):

FILE: python/mlc_llm/model/gemma2/gemma2_loader.py
  function huggingface (line 15) | def huggingface(model_config: Gemma2Config, quantization: Quantization) ...

FILE: python/mlc_llm/model/gemma2/gemma2_model.py
  class Gemma2Config (line 23) | class Gemma2Config(GemmaConfig):
    method __post_init__ (line 35) | def __post_init__(self):
  class Gemma2Attention (line 45) | class Gemma2Attention(GemmaAttention):
    method __init__ (line 46) | def __init__(self, config: Gemma2Config):
  class Gemma2DecoderLayer (line 51) | class Gemma2DecoderLayer(nn.Module):
    method __init__ (line 52) | def __init__(self, config: Gemma2Config):
    method forward (line 89) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
    method _apply_post_matmul_norm (line 101) | def _apply_post_matmul_norm(self, out: Tensor, norm: nn.Tensor):
  class Gemma2Model (line 107) | class Gemma2Model(GemmaModel):
    method __init__ (line 108) | def __init__(self, config: Gemma2Config):
  class Gemma2ForCausalLM (line 115) | class Gemma2ForCausalLM(GemmaForCausalLM):  # pylint: disable=too-many-i...
    method __init__ (line 116) | def __init__(self, config: Gemma2Config):
    method get_logits (line 121) | def get_logits(self, hidden_states: Tensor):

FILE: python/mlc_llm/model/gemma3/gemma3_loader.py
  function huggingface (line 15) | def huggingface(model_config: Gemma3Config, quantization: Quantization) ...

FILE: python/mlc_llm/model/gemma3/gemma3_model.py
  class Gemma3TextConfig (line 22) | class Gemma3TextConfig(ConfigBase):  # pylint: disable=too-many-instance...
    method __post_init__ (line 46) | def __post_init__(self):
  class Gemma3Config (line 96) | class Gemma3Config(ConfigBase):  # pylint: disable=too-many-instance-att...
    method __post_init__ (line 109) | def __post_init__(self):
  class Gemma3MLP (line 134) | class Gemma3MLP(nn.Module):
    method __init__ (line 135) | def __init__(self, config: Gemma3Config):
    method forward (line 154) | def forward(self, x: Tensor):
  class Gemma3Attention (line 160) | class Gemma3Attention(nn.Module):  # pylint: disable=too-many-instance-a...
    method __init__ (line 161) | def __init__(self, config: Gemma3Config):
    method forward (line 201) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
  class Gemma3DecoderLayer (line 224) | class Gemma3DecoderLayer(nn.Module):
    method __init__ (line 225) | def __init__(self, config: Gemma3Config):
    method forward (line 263) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
    method _apply_post_matmul_norm (line 275) | def _apply_post_matmul_norm(self, out: Tensor, norm: nn.Tensor):
  class Gemma3TextModel (line 281) | class Gemma3TextModel(nn.Module):
    method __init__ (line 282) | def __init__(self, config: Gemma3Config):
    method forward (line 296) | def forward(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
  class Gemma3LanguageModel (line 305) | class Gemma3LanguageModel(nn.Module):  # pylint: disable=too-many-instan...
    method __init__ (line 306) | def __init__(self, config: Gemma3Config):
    method to (line 320) | def to(self, dtype: Optional[str] = None):
    method get_logits (line 325) | def get_logits(self, hidden_states: Tensor):
    method batch_forward (line 331) | def batch_forward(
    method embed (line 345) | def embed(self, input_ids: Tensor):
    method prefill (line 350) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 362) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 369) | def batch_prefill(
    method batch_decode (line 380) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 384) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method create_paged_kv_cache (line 388) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 425) | def get_default_spec(self):
  class Gemma3ForCausalLM (line 490) | class Gemma3ForCausalLM(nn.Module):  # pylint: disable=too-many-instance...
    method __init__ (line 491) | def __init__(self, config: Gemma3Config):
    method to (line 499) | def to(self, dtype: Optional[str] = None):
    method get_logits (line 505) | def get_logits(self, hidden_states: Tensor):
    method batch_forward (line 511) | def batch_forward(
    method embed (line 525) | def embed(self, input_ids: Tensor):
    method prefill (line 530) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 542) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 549) | def batch_prefill(
    method batch_decode (line 560) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 564) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method create_paged_kv_cache (line 568) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 607) | def get_default_spec(self):

FILE: python/mlc_llm/model/gpt2/gpt2_loader.py
  function huggingface (line 14) | def huggingface(model_config: GPT2Config, quantization: Quantization) ->...

FILE: python/mlc_llm/model/gpt2/gpt2_model.py
  class GPT2Config (line 23) | class GPT2Config(ConfigBase):  # pylint: disable=too-many-instance-attri...
    method __post_init__ (line 40) | def __post_init__(self):
  class GPT2Attention (line 83) | class GPT2Attention(nn.Module):  # pylint: disable=too-many-instance-att...
    method __init__ (line 84) | def __init__(self, config: GPT2Config):
    method forward (line 102) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
  class GPT2MLP (line 127) | class GPT2MLP(nn.Module):
    method __init__ (line 128) | def __init__(self, config: GPT2Config):
    method forward (line 139) | def forward(self, hidden_states: Tensor):
  class GPT2Block (line 146) | class GPT2Block(nn.Module):
    method __init__ (line 147) | def __init__(self, config: GPT2Config):
    method forward (line 179) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
    method _apply_residual (line 192) | def _apply_residual(self, out, residual):
  class GPT2Model (line 198) | class GPT2Model(nn.Module):
    method __init__ (line 199) | def __init__(self, config: GPT2Config):
    method forward (line 206) | def forward(self, inputs: Tensor, paged_kv_cache: PagedKVCache):
  class GPT2LMHeadModel (line 221) | class GPT2LMHeadModel(nn.Module):  # pylint: disable=too-many-instance-a...
    method __init__ (line 222) | def __init__(self, config: GPT2Config):
    method to (line 232) | def to(self, dtype: Optional[str] = None):
    method batch_forward (line 237) | def batch_forward(
    method embed (line 253) | def embed(self, input_ids: Tensor):
    method prefill (line 258) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 272) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 281) | def batch_prefill(
    method batch_decode (line 292) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 296) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method create_paged_kv_cache (line 300) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 326) | def get_default_spec(self):

FILE: python/mlc_llm/model/gpt_bigcode/gpt_bigcode_model.py
  class GPTBigCodeConfig (line 23) | class GPTBigCodeConfig(ConfigBase):  # pylint: disable=too-many-instance...
    method __post_init__ (line 39) | def __post_init__(self):
  class GPTBigCodeMLP (line 75) | class GPTBigCodeMLP(nn.Module):
    method __init__ (line 76) | def __init__(self, config: GPTBigCodeConfig):
    method forward (line 82) | def forward(self, x: Tensor):
  class GPTBigCodeAttention (line 89) | class GPTBigCodeAttention(nn.Module):  # pylint: disable=too-many-instan...
    method __init__ (line 90) | def __init__(self, config: GPTBigCodeConfig):
    method forward (line 109) | def forward(
  class GPTBigCodeBlock (line 131) | class GPTBigCodeBlock(nn.Module):
    method __init__ (line 132) | def __init__(self, config: GPTBigCodeConfig):
    method forward (line 157) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
  class GPTBigCodeModel (line 165) | class GPTBigCodeModel(nn.Module):
    method __init__ (line 166) | def __init__(self, config: GPTBigCodeConfig):
    method forward (line 173) | def forward(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
  class GPTBigCodeForCausalLM (line 188) | class GPTBigCodeForCausalLM(nn.Module):  # pylint: disable=too-many-inst...
    method __init__ (line 189) | def __init__(self, config: GPTBigCodeConfig):
    method to (line 200) | def to(self, dtype: Optional[str] = None):
    method batch_forward (line 205) | def batch_forward(
    method embed (line 221) | def embed(self, input_ids: Tensor):
    method prefill (line 226) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 240) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 249) | def batch_prefill(
    method batch_decode (line 260) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 264) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method create_paged_kv_cache (line 268) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 294) | def get_default_spec(self):

FILE: python/mlc_llm/model/gpt_j/gpt_j_model.py
  class GPTJConfig (line 25) | class GPTJConfig(ConfigBase):  # pylint: disable=too-many-instance-attri...
    method __post_init__ (line 44) | def __post_init__(self):
  class GPTJAttention (line 85) | class GPTJAttention(nn.Module):  # pylint: disable=too-many-instance-att...
    method __init__ (line 86) | def __init__(self, config: GPTJConfig):
    method forward (line 100) | def forward(  # pylint: disable=too-many-locals
  class GPTJMLP (line 129) | class GPTJMLP(nn.Module):
    method __init__ (line 130) | def __init__(self, config: GPTJConfig):  # in MLP: intermediate_size= ...
    method forward (line 137) | def forward(self, hidden_states: Tensor):
  class GPTJBlock (line 144) | class GPTJBlock(nn.Module):
    method __init__ (line 145) | def __init__(self, config: GPTJConfig):
    method forward (line 172) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
    method _apply_residual (line 180) | def _apply_residual(self, out, residual):
  class GPTJModel (line 186) | class GPTJModel(nn.Module):
    method __init__ (line 187) | def __init__(self, config: GPTJConfig):
    method forward (line 194) | def forward(self, inputs: Tensor, paged_kv_cache: PagedKVCache):
  class GPTJForCausalLM (line 202) | class GPTJForCausalLM(nn.Module):  # pylint: disable=too-many-instance-a...
    method __init__ (line 203) | def __init__(self, config: GPTJConfig):
    method to (line 218) | def to(self, dtype: Optional[str] = None):
    method batch_forward (line 223) | def batch_forward(
    method embed (line 239) | def embed(self, input_ids: Tensor):
    method prefill (line 244) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 258) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 267) | def batch_prefill(
    method batch_decode (line 278) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 282) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method create_paged_kv_cache (line 286) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 314) | def get_default_spec(self):

FILE: python/mlc_llm/model/gpt_neox/gpt_neox_loader.py
  function huggingface (line 16) | def huggingface(model_config: GPTNeoXConfig, quantization: Quantization)...

FILE: python/mlc_llm/model/gpt_neox/gpt_neox_model.py
  class GPTNeoXConfig (line 23) | class GPTNeoXConfig(ConfigBase):  # pylint: disable=too-many-instance-at...
    method __post_init__ (line 43) | def __post_init__(self):
  class GPTNeoXAttention (line 90) | class GPTNeoXAttention(nn.Module):  # pylint: disable=too-many-instance-...
    method __init__ (line 93) | def __init__(self, config: GPTNeoXConfig):
    method forward (line 112) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
  class GPTNeoXMLP (line 131) | class GPTNeoXMLP(nn.Module):
    method __init__ (line 132) | def __init__(self, config: GPTNeoXConfig):
    method forward (line 152) | def forward(self, hidden_states: Tensor):
  class GPTNeoXLayer (line 166) | class GPTNeoXLayer(nn.Module):
    method __init__ (line 167) | def __init__(self, config: GPTNeoXConfig):
    method forward (line 205) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
    method _apply_residual (line 226) | def _apply_residual(self, out, residual):
  class GPTNeoXModel (line 232) | class GPTNeoXModel(nn.Module):
    method __init__ (line 233) | def __init__(self, config: GPTNeoXConfig):
    method forward (line 238) | def forward(self, inputs: Tensor, paged_kv_cache: PagedKVCache):
  class GPTNeoXForCausalLM (line 247) | class GPTNeoXForCausalLM(nn.Module):  # pylint: disable=too-many-instanc...
    method __init__ (line 248) | def __init__(self, config: GPTNeoXConfig):
    method to (line 266) | def to(self, dtype: Optional[str] = None):
    method batch_forward (line 271) | def batch_forward(
    method embed (line 287) | def embed(self, input_ids: Tensor):
    method prefill (line 292) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 306) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 315) | def batch_prefill(
    method batch_decode (line 326) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 330) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method create_paged_kv_cache (line 334) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 361) | def get_default_spec(self):

FILE: python/mlc_llm/model/internlm/internlm_model.py
  class InternLMConfig (line 23) | class InternLMConfig(ConfigBase):  # pylint: disable=too-many-instance-a...
    method __post_init__ (line 44) | def __post_init__(self):
  class InternLMAttention (line 85) | class InternLMAttention(nn.Module):  # pylint: disable=too-many-instance...
    method __init__ (line 86) | def __init__(self, config: InternLMConfig):
    method forward (line 102) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
  class InternLMMLP (line 117) | class InternLMMLP(nn.Module):
    method __init__ (line 118) | def __init__(self, config: InternLMConfig):
    method forward (line 133) | def forward(self, x):
  class InternLMDecoderLayer (line 139) | class InternLMDecoderLayer(nn.Module):
    method __init__ (line 140) | def __init__(self, config: InternLMConfig):
    method forward (line 187) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
    method _apply_residual (line 194) | def _apply_residual(self, out, residual):
  class InternLMModel (line 200) | class InternLMModel(nn.Module):
    method __init__ (line 201) | def __init__(self, config: InternLMConfig):
    method forward (line 208) | def forward(self, inputs: Tensor, paged_kv_cache: PagedKVCache):
  class InternLMForCausalLM (line 216) | class InternLMForCausalLM(nn.Module):  # pylint: disable=too-many-instan...
    method __init__ (line 217) | def __init__(self, config: InternLMConfig):
    method to (line 230) | def to(self, dtype: Optional[str] = None):
    method batch_forward (line 235) | def batch_forward(
    method embed (line 251) | def embed(self, input_ids: Tensor):
    method prefill (line 256) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 270) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 279) | def batch_prefill(
    method batch_decode (line 290) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 294) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method create_paged_kv_cache (line 298) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 324) | def get_default_spec(self):

FILE: python/mlc_llm/model/internlm2/internlm2_loader.py
  function huggingface (line 17) | def huggingface(model_config: InternLM2ForCausalLM, quantization: Quanti...

FILE: python/mlc_llm/model/internlm2/internlm2_model.py
  class InternLM2Config (line 23) | class InternLM2Config(ConfigBase):  # pylint: disable=too-many-instance-...
    method __post_init__ (line 46) | def __post_init__(self):
  class InternLM2Attention (line 87) | class InternLM2Attention(nn.Module):  # pylint: disable=too-many-instanc...
    method __init__ (line 88) | def __init__(self, config: InternLM2Config):
    method forward (line 108) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
  class InternLM2MLP (line 123) | class InternLM2MLP(nn.Module):
    method __init__ (line 124) | def __init__(self, config: InternLM2Config):
    method forward (line 138) | def forward(self, x: Tensor):
  class InternLM2DecoderLayer (line 144) | class InternLM2DecoderLayer(nn.Module):
    method __init__ (line 145) | def __init__(self, config: InternLM2Config):
    method forward (line 179) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
    method _apply_residual (line 190) | def _apply_residual(self, out, residual):
  class InternLM2Model (line 196) | class InternLM2Model(nn.Module):
    method __init__ (line 197) | def __init__(self, config: InternLM2Config):
    method forward (line 205) | def forward(self, inputs: Tensor, paged_kv_cache: PagedKVCache):
  class InternLM2ForCausalLM (line 213) | class InternLM2ForCausalLM(nn.Module):  # pylint: disable=R0902
    method __init__ (line 214) | def __init__(self, config: InternLM2Config):
    method to (line 227) | def to(self, dtype: Optional[str] = None):
    method batch_forward (line 232) | def batch_forward(
    method embed (line 248) | def embed(self, input_ids: Tensor):
    method prefill (line 253) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 267) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 276) | def batch_prefill(
    method batch_decode (line 287) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 291) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method create_paged_kv_cache (line 295) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 321) | def get_default_spec(self):

FILE: python/mlc_llm/model/llama/llama_loader.py
  function awq (line 25) | def awq(model_config: LlamaConfig, quantization: Quantization) -> Extern...

FILE: python/mlc_llm/model/llama/llama_model.py
  class LlamaConfig (line 23) | class LlamaConfig(ConfigBase):  # pylint: disable=too-many-instance-attr...
    method __post_init__ (line 45) | def __post_init__(self):  # pylint: disable=too-many-branches
  class LlamaFFN (line 108) | class LlamaFFN(nn.Module):
    method __init__ (line 109) | def __init__(self, config: LlamaConfig):
    method forward (line 124) | def forward(self, x: Tensor):
  class LlamaEmbedding (line 130) | class LlamaEmbedding(nn.Embedding):
    method lm_head_forward (line 133) | def lm_head_forward(self, x: nn.Tensor):
  class LlamaAttention (line 141) | class LlamaAttention(nn.Module):  # pylint: disable=too-many-instance-at...
    method __init__ (line 142) | def __init__(self, config: LlamaConfig):
    method forward (line 159) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
  class LlamaDecoderLayer (line 175) | class LlamaDecoderLayer(nn.Module):
    method __init__ (line 176) | def __init__(self, config: LlamaConfig):
    method forward (line 206) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
    method _apply_residual (line 213) | def _apply_residual(self, out, residual):
  class LlamaModel (line 219) | class LlamaModel(nn.Module):
    method __init__ (line 220) | def __init__(self, config: LlamaConfig):
    method forward (line 239) | def forward(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
  class LlamaForCausalLM (line 249) | class LlamaForCausalLM(nn.Module):  # pylint: disable=too-many-instance-...
    method __init__ (line 250) | def __init__(self, config: LlamaConfig):
    method to (line 284) | def to(self, dtype: Optional[str] = None):
    method batch_forward (line 289) | def batch_forward(
    method batch_forward_to_last_hidden_states (line 304) | def batch_forward_to_last_hidden_states(
    method embed (line 314) | def embed(self, input_ids: Tensor):
    method get_logits (line 319) | def get_logits(self, hidden_states: Tensor):
    method batch_select_last_hidden_states (line 329) | def batch_select_last_hidden_states(self, hidden_states: Tensor, logit...
    method prefill (line 336) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 348) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method prefill_to_last_hidden_states (line 355) | def prefill_to_last_hidden_states(self, input_embed: Tensor, paged_kv_...
    method decode_to_last_hidden_states (line 361) | def decode_to_last_hidden_states(self, input_embed: Tensor, paged_kv_c...
    method batch_prefill (line 367) | def batch_prefill(
    method batch_decode (line 376) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 380) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_prefill_to_last_hidden_states (line 384) | def batch_prefill_to_last_hidden_states(
    method batch_decode_to_last_hidden_states (line 390) | def batch_decode_to_last_hidden_states(
    method batch_verify_to_last_hidden_states (line 396) | def batch_verify_to_last_hidden_states(
    method create_paged_kv_cache (line 402) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 431) | def get_default_spec(self):

FILE: python/mlc_llm/model/llama4/llama4_loader.py
  function huggingface (line 16) | def huggingface(model_config: Llama4Config, quantization: Quantization) ...

FILE: python/mlc_llm/model/llama4/llama4_model.py
  class Llama4TextConfig (line 26) | class Llama4TextConfig(ConfigBase):  # pylint: disable=too-many-instance...
    method __post_init__ (line 56) | def __post_init__(self):  # pylint: disable=too-many-branches
  class Llama4Config (line 96) | class Llama4Config(ConfigBase):  # pylint: disable=too-many-instance-att...
    method __post_init__ (line 111) | def __post_init__(self) -> None:
  class Llama4TextMLP (line 161) | class Llama4TextMLP(nn.Module):
    method __init__ (line 162) | def __init__(self, config: Llama4Config):
    method forward (line 181) | def forward(self, x: Tensor):
  class LlamaEmbedding (line 189) | class LlamaEmbedding(nn.Embedding):
    method lm_head_forward (line 192) | def lm_head_forward(self, x: nn.Tensor):
  class Llama4TextL2Norm (line 200) | class Llama4TextL2Norm(nn.Module):
    method __init__ (line 201) | def __init__(self, eps, hidden_size):
    method forward (line 205) | def forward(self, x):
  class Llama4TextAttention (line 210) | class Llama4TextAttention(nn.Module):  # pylint: disable=too-many-instan...
    method __init__ (line 211) | def __init__(self, config: Llama4Config, layer_idx):
    method forward (line 264) | def forward(  # pylint: disable=too-many-locals
  class Llama4TextExperts (line 338) | class Llama4TextExperts(nn.Module):
    method __init__ (line 339) | def __init__(self, config: Llama4Config):
    method forward (line 353) | def forward(self, hidden_states):
  class Llama4Router (line 362) | class Llama4Router(nn.Module):
    method __init__ (line 363) | def __init__(self, config: Llama4Config):
    method forward (line 373) | def forward(self, hidden_states):
  class Llama4TextMoe (line 390) | class Llama4TextMoe(nn.Module):
    method __init__ (line 391) | def __init__(self, config: Llama4Config):
    method forward (line 399) | def forward(self, hidden_states):
  class Llama4TextDecoderLayer (line 419) | class Llama4TextDecoderLayer(nn.Module):
    method __init__ (line 420) | def __init__(self, config: Llama4Config, layer_idx):
    method forward (line 488) | def forward(
    method _apply_residual (line 510) | def _apply_residual(self, out, residual):
  class Llama4TextModel (line 516) | class Llama4TextModel(nn.Module):
    method __init__ (line 517) | def __init__(self, config: Llama4Config):
    method forward (line 533) | def forward(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
  class Llama4ForCausalLM (line 545) | class Llama4ForCausalLM(nn.Module):  # pylint: disable=too-many-instance...
    method __init__ (line 546) | def __init__(self, config: Llama4Config):
    method to (line 564) | def to(self, dtype: Optional[str] = None):
    method batch_forward (line 569) | def batch_forward(
    method batch_forward_to_last_hidden_states (line 584) | def batch_forward_to_last_hidden_states(
    method embed (line 594) | def embed(self, input_ids: Tensor):
    method get_logits (line 599) | def get_logits(self, hidden_states: Tensor):
    method batch_select_last_hidden_states (line 609) | def batch_select_last_hidden_states(self, hidden_states: Tensor, logit...
    method prefill (line 616) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 628) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method prefill_to_last_hidden_states (line 635) | def prefill_to_last_hidden_states(self, input_embed: Tensor, paged_kv_...
    method decode_to_last_hidden_states (line 641) | def decode_to_last_hidden_states(self, input_embed: Tensor, paged_kv_c...
    method batch_prefill (line 647) | def batch_prefill(
    method batch_decode (line 656) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 660) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_prefill_to_last_hidden_states (line 664) | def batch_prefill_to_last_hidden_states(
    method batch_decode_to_last_hidden_states (line 670) | def batch_decode_to_last_hidden_states(
    method batch_verify_to_last_hidden_states (line 676) | def batch_verify_to_last_hidden_states(
    method create_paged_kv_cache (line 682) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 710) | def get_default_spec(self):

FILE: python/mlc_llm/model/llava/llava_loader.py
  function _num_layers (line 19) | def _num_layers(config: object) -> int:
  function awq (line 31) | def awq(model_config: LlavaConfig, quantization: Quantization) -> Extern...

FILE: python/mlc_llm/model/llava/llava_model.py
  class LlavaConfig (line 36) | class LlavaConfig(ConfigBase):  # pylint: disable=too-many-instance-attr...
    method __post_init__ (line 53) | def __post_init__(self) -> None:
    method get_hf_config (line 90) | def get_hf_config(self, text_config_dict: Dict[str, Any]) -> Dict[str,...
  class LlavaMultiModalProjector (line 121) | class LlavaMultiModalProjector(nn.Module):
    method __init__ (line 122) | def __init__(self, config: LlavaConfig):
    method forward (line 133) | def forward(self, image_features: Tensor) -> Tensor:
  class LlavaForCausalLM (line 140) | class LlavaForCausalLM(Module):
    method __init__ (line 141) | def __init__(self, config: LlavaConfig):
    method to (line 151) | def to(self, dtype: Optional[str] = None):
    method embed (line 157) | def embed(self, input_ids: Tensor) -> Tensor:
    method image_preprocess (line 160) | def image_preprocess(self, pixel_values: Tensor) -> Tensor:
    method image_embed (line 179) | def image_embed(self, pixel_values: Tensor) -> Tensor:
    method batch_forward (line 196) | def batch_forward(
    method prefill (line 206) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 211) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 216) | def batch_prefill(
    method batch_decode (line 224) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 227) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method create_paged_kv_cache (line 230) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 258) | def get_default_spec(self):

FILE: python/mlc_llm/model/medusa/medusa_model.py
  class MedusaConfig (line 15) | class MedusaConfig(ConfigBase):  # pylint: disable=too-many-instance-att...
  class ResBlock (line 35) | class ResBlock(nn.Module):
    method __init__ (line 38) | def __init__(self, hidden_size):
    method forward (line 43) | def forward(self, x):
  class MedusaModel (line 47) | class MedusaModel(nn.Module):
    method __init__ (line 50) | def __init__(self, config: MedusaConfig):
    method get_default_spec (line 63) | def get_default_spec(self):
    method get_logits (line 75) | def get_logits(self, hidden_states: nn.Tensor):
    method to (line 81) | def to(self, dtype: Optional[str] = None):

FILE: python/mlc_llm/model/minicpm/minicpm_loader.py
  function huggingface (line 16) | def huggingface(model_config: MiniCPMConfig, quantization: Quantization)...

FILE: python/mlc_llm/model/minicpm/minicpm_model.py
  class MiniCPMConfig (line 26) | class MiniCPMConfig(ConfigBase):  # pylint: disable=too-many-instance-at...
    method __post_init__ (line 54) | def __post_init__(self):
  class MiniCPMAttention (line 95) | class MiniCPMAttention(nn.Module):  # pylint: disable=too-many-instance-...
    method __init__ (line 96) | def __init__(self, config: MiniCPMConfig):
    method forward (line 120) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
  class MiniCPMEmbedding (line 144) | class MiniCPMEmbedding(nn.Embedding):
    method lm_head_forward (line 149) | def lm_head_forward(self, x: nn.Tensor):
  class MiniCPMMLP (line 157) | class MiniCPMMLP(nn.Module):
    method __init__ (line 158) | def __init__(self, config: MiniCPMConfig):
    method forward (line 171) | def forward(self, x: Tensor):
  class MiniCPMMoE (line 177) | class MiniCPMMoE(nn.Module):
    method __init__ (line 178) | def __init__(self, config: MiniCPMConfig):
    method forward (line 197) | def forward(self, x: Tensor):  # pylint: disable=too-many-locals
  class MiniCPMDecoderLayer (line 255) | class MiniCPMDecoderLayer(nn.Module):  # pylint: disable=too-many-instan...
    method __init__ (line 256) | def __init__(self, config: MiniCPMConfig):
    method forward (line 304) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
    method _apply_residual (line 321) | def _apply_residual(self, out, residual):
  class MiniCPMModel (line 327) | class MiniCPMModel(nn.Module):
    method __init__ (line 328) | def __init__(self, config: MiniCPMConfig):
    method forward (line 336) | def forward(self, inputs: Tensor, paged_kv_cache: PagedKVCache):
  class MiniCPMForCausalLM (line 344) | class MiniCPMForCausalLM(nn.Module):  # pylint: disable=too-many-instanc...
    method __init__ (line 345) | def __init__(self, config: MiniCPMConfig):
    method to (line 363) | def to(self, dtype: Optional[str] = None):
    method batch_forward (line 368) | def batch_forward(
    method embed (line 387) | def embed(self, input_ids: Tensor):
    method prefill (line 392) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 409) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 421) | def batch_prefill(
    method batch_decode (line 432) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 436) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method create_paged_kv_cache (line 440) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 466) | def get_default_spec(self):

FILE: python/mlc_llm/model/ministral3/ministral3_loader.py
  function _dequantize_block_scale_weight (line 17) | def _dequantize_block_scale_weight(  # pylint: disable=too-many-locals
  function huggingface (line 44) | def huggingface(  # pylint: disable=too-many-locals,too-many-statements

FILE: python/mlc_llm/model/ministral3/ministral3_model.py
  class Ministral3Config (line 25) | class Ministral3Config(ConfigBase):  # pylint: disable=too-many-instance...
    method from_dict (line 52) | def from_dict(  # type: ignore[override]
    method __post_init__ (line 68) | def __post_init__(self):  # pylint: disable=too-many-branches,too-many...
  class Ministral3Embedding (line 178) | class Ministral3Embedding(nn.Embedding):
    method lm_head_forward (line 183) | def lm_head_forward(self, x: nn.Tensor):
  class Ministral3MLP (line 194) | class Ministral3MLP(nn.Module):
    method __init__ (line 197) | def __init__(self, config: Ministral3Config):
    method forward (line 213) | def forward(self, x: Tensor):
  function yarn_get_sm_scale (line 219) | def yarn_get_sm_scale(scale=1, mscale=1):
  class Ministral3Attention (line 225) | class Ministral3Attention(nn.Module):  # pylint: disable=too-many-instan...
    method __init__ (line 228) | def __init__(self, config: Ministral3Config):
    method forward (line 252) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
  class Ministral3DecoderLayer (line 268) | class Ministral3DecoderLayer(nn.Module):
    method __init__ (line 271) | def __init__(self, config: Ministral3Config):
    method forward (line 301) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
    method _apply_residual (line 308) | def _apply_residual(self, out, residual):
  class Ministral3Model (line 314) | class Ministral3Model(nn.Module):
    method __init__ (line 317) | def __init__(self, config: Ministral3Config):
    method forward (line 327) | def forward(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
  class Mistral3ForConditionalGeneration (line 335) | class Mistral3ForConditionalGeneration(nn.Module):  # pylint: disable=to...
    method __init__ (line 336) | def __init__(self, config: Ministral3Config):
    method _mark_modules_no_quant (line 357) | def _mark_modules_no_quant(self, modules: Tuple[str, ...]):
    method to (line 371) | def to(self, dtype: Optional[str] = None):
    method batch_forward (line 376) | def batch_forward(
    method embed (line 396) | def embed(self, input_ids: Tensor):
    method prefill (line 401) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 419) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 432) | def batch_prefill(
    method batch_decode (line 443) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 447) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method create_paged_kv_cache (line 451) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 478) | def get_default_spec(self):

FILE: python/mlc_llm/model/mistral/mistral_loader.py
  function awq (line 25) | def awq(model_config: MistralConfig, quantization: Quantization) -> Exte...

FILE: python/mlc_llm/model/mistral/mistral_model.py
  class MistralConfig (line 23) | class MistralConfig(ConfigBase):  # pylint: disable=too-many-instance-at...
    method __post_init__ (line 43) | def __post_init__(self):  # pylint: disable=too-many-branches
  class MistralMLP (line 98) | class MistralMLP(nn.Module):
    method __init__ (line 101) | def __init__(self, config: MistralConfig):
    method forward (line 116) | def forward(self, x: Tensor):
  class MistralAttention (line 122) | class MistralAttention(nn.Module):  # pylint: disable=too-many-instance-...
    method __init__ (line 125) | def __init__(self, config: MistralConfig):
    method forward (line 141) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
  class MistralDecoderLayer (line 157) | class MistralDecoderLayer(nn.Module):
    method __init__ (line 160) | def __init__(self, config: MistralConfig):
    method forward (line 190) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
    method _apply_residual (line 197) | def _apply_residual(self, out, residual):
  class MistralModel (line 203) | class MistralModel(nn.Module):
    method __init__ (line 206) | def __init__(self, config: MistralConfig):
    method forward (line 215) | def forward(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
  class MistralForCausalLM (line 223) | class MistralForCausalLM(nn.Module):  # pylint: disable=too-many-instanc...
    method __init__ (line 226) | def __init__(self, config: MistralConfig):
    method to (line 240) | def to(self, dtype: Optional[str] = None):
    method batch_forward (line 245) | def batch_forward(
    method embed (line 261) | def embed(self, input_ids: Tensor):
    method prefill (line 266) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 280) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 289) | def batch_prefill(
    method batch_decode (line 300) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 304) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method create_paged_kv_cache (line 308) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 334) | def get_default_spec(self):

FILE: python/mlc_llm/model/mixtral/mixtral_loader.py
  function huggingface (line 16) | def huggingface(model_config: MixtralConfig, quantization: Quantization)...

FILE: python/mlc_llm/model/mixtral/mixtral_model.py
  class MixtralConfig (line 25) | class MixtralConfig(LlamaConfig):  # pylint: disable=too-many-instance-a...
  class MixtralMoE (line 35) | class MixtralMoE(nn.Module):
    method __init__ (line 38) | def __init__(self, config: MixtralConfig):
    method forward (line 67) | def forward(self, x: Tensor):
  class MixtralDecoderLayer (line 125) | class MixtralDecoderLayer(nn.Module):
    method __init__ (line 128) | def __init__(self, config: MixtralConfig):
    method forward (line 155) | def forward(self, hidden_states: Tensor, attention_mask: Tensor, total...
    method batch_forward (line 163) | def batch_forward(self, hidden_states: Tensor, paged_kv_cache: PagedKV...
    method _apply_residual (line 170) | def _apply_residual(self, out, residual):
  class MixtralModel (line 176) | class MixtralModel(LlamaModel):
    method __init__ (line 179) | def __init__(self, config: MixtralConfig):
  class MixtralForCausalLM (line 186) | class MixtralForCausalLM(LlamaForCausalLM):
    method __init__ (line 189) | def __init__(self, config: MixtralConfig):

FILE: python/mlc_llm/model/model.py
  class EmbeddingMetadata (line 65) | class EmbeddingMetadata:
  class Model (line 86) | class Model:
    method __post_init__ (line 123) | def __post_init__(self):

FILE: python/mlc_llm/model/nemotron/nemotron_model.py
  class NemotronConfig (line 23) | class NemotronConfig(ConfigBase):  # pylint: disable=too-many-instance-a...
    method __post_init__ (line 48) | def __post_init__(self):  # pylint: disable=too-many-branches
  class NemotronMLP (line 75) | class NemotronMLP(nn.Module):
    method __init__ (line 78) | def __init__(self, config: NemotronConfig):
    method forward (line 88) | def forward(self, x: Tensor) -> Tensor:
  class NemotronEmbedding (line 96) | class NemotronEmbedding(nn.Embedding):
    method lm_head_forward (line 99) | def lm_head_forward(self, x: Tensor):
  class NemotronLayerNorm1P (line 107) | class NemotronLayerNorm1P(nn.LayerNorm):
    method __init__ (line 110) | def __init__(self, normalized_shape: int, eps: float = 1e-5, elementwi...
    method forward (line 113) | def forward(self, x: Tensor) -> Tensor:
  class NemotronAttention (line 124) | class NemotronAttention(nn.Module):  # pylint: disable=too-many-instance...
    method __init__ (line 125) | def __init__(self, config: NemotronConfig):
    method forward (line 142) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
  class NemotronDecoderLayer (line 158) | class NemotronDecoderLayer(nn.Module):
    method __init__ (line 159) | def __init__(self, config: NemotronConfig):
    method forward (line 184) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
    method _apply_residual (line 191) | def _apply_residual(self, out, residual):
  class NemotronModel (line 197) | class NemotronModel(nn.Module):
    method __init__ (line 198) | def __init__(self, config: NemotronConfig):
    method forward (line 217) | def forward(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
  class NemotronForCausalLM (line 227) | class NemotronForCausalLM(nn.Module):  # pylint: disable=too-many-instan...
    method __init__ (line 228) | def __init__(self, config: NemotronConfig):
    method to (line 263) | def to(self, dtype: Optional[str] = None):
    method batch_forward (line 268) | def batch_forward(
    method batch_forward_to_last_hidden_states (line 283) | def batch_forward_to_last_hidden_states(
    method embed (line 293) | def embed(self, input_ids: Tensor):
    method get_logits (line 298) | def get_logits(self, hidden_states: Tensor):
    method batch_select_last_hidden_states (line 308) | def batch_select_last_hidden_states(self, hidden_states: Tensor, logit...
    method prefill (line 315) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 327) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method prefill_to_last_hidden_states (line 334) | def prefill_to_last_hidden_states(self, input_embed: Tensor, paged_kv_...
    method decode_to_last_hidden_states (line 340) | def decode_to_last_hidden_states(self, input_embed: Tensor, paged_kv_c...
    method batch_prefill (line 346) | def batch_prefill(
    method batch_decode (line 355) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 359) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_prefill_to_last_hidden_states (line 363) | def batch_prefill_to_last_hidden_states(
    method batch_decode_to_last_hidden_states (line 369) | def batch_decode_to_last_hidden_states(
    method batch_verify_to_last_hidden_states (line 375) | def batch_verify_to_last_hidden_states(
    method create_paged_kv_cache (line 381) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 411) | def get_default_spec(self):

FILE: python/mlc_llm/model/olmo/olmo_loader.py
  function awq (line 25) | def awq(model_config: OLMoConfig, quantization: Quantization) -> ExternM...

FILE: python/mlc_llm/model/olmo/olmo_model.py
  class OLMoConfig (line 25) | class OLMoConfig(ConfigBase):  # pylint: disable=too-many-instance-attri...
    method __post_init__ (line 47) | def __post_init__(self):  # pylint: disable=too-many-branches
  class OLMoEmbedding (line 107) | class OLMoEmbedding(nn.Embedding):
    method lm_head_forward (line 110) | def lm_head_forward(self, x: nn.Tensor):
  class OLMoAttention (line 118) | class OLMoAttention(nn.Module):  # pylint: disable=missing-class-docstring
    method __init__ (line 119) | def __init__(self, config: OLMoConfig):
    method forward (line 141) | def forward(  # pylint: disable=missing-function-docstring
  class OLMoFFN (line 175) | class OLMoFFN(nn.Module):  # pylint: disable=missing-class-docstring
    method __init__ (line 176) | def __init__(self, config: OLMoConfig):
    method forward (line 196) | def forward(self, x: Tensor):  # pylint: disable=missing-function-docs...
  class OLMoDecoderLayer (line 205) | class OLMoDecoderLayer(nn.Module):  # pylint: disable=missing-class-docs...
    method __init__ (line 206) | def __init__(self, config: OLMoConfig):
    method _apply_residual (line 243) | def _apply_residual(self, out, residual):
    method forward (line 248) | def forward(  # pylint: disable=missing-function-docstring
  class OLMoModel (line 258) | class OLMoModel(nn.Module):  # pylint: disable=missing-class-docstring
    method __init__ (line 259) | def __init__(self, config: OLMoConfig):
    method forward (line 282) | def forward(  # pylint: disable=missing-function-docstring
  class OLMoForCausalLM (line 294) | class OLMoForCausalLM(  # pylint: disable=missing-class-docstring,too-ma...
    method __init__ (line 297) | def __init__(self, config: OLMoConfig):
    method to (line 329) | def to(self, dtype: Optional[str] = None):
    method batch_forward (line 334) | def batch_forward(  # pylint: disable=missing-function-docstring
    method batch_forward_to_last_hidden_states (line 348) | def batch_forward_to_last_hidden_states(  # pylint: disable=missing-fu...
    method embed (line 357) | def embed(self, input_ids: Tensor):  # pylint: disable=missing-functio...
    method get_logits (line 362) | def get_logits(self, hidden_states: Tensor):  # pylint: disable=missin...
    method batch_select_last_hidden_states (line 372) | def batch_select_last_hidden_states(  # pylint: disable=missing-functi...
    method prefill (line 381) | def prefill(  # pylint: disable=missing-function-docstring
    method decode (line 397) | def decode(  # pylint: disable=missing-function-docstring
    method prefill_to_last_hidden_states (line 405) | def prefill_to_last_hidden_states(  # pylint: disable=missing-function...
    method decode_to_last_hidden_states (line 412) | def decode_to_last_hidden_states(  # pylint: disable=missing-function-...
    method batch_prefill (line 419) | def batch_prefill(  # pylint: disable=missing-function-docstring
    method batch_decode (line 428) | def batch_decode(  # pylint: disable=missing-function-docstring
    method batch_verify (line 434) | def batch_verify(  # pylint: disable=missing-function-docstring
    method batch_prefill_to_last_hidden_states (line 440) | def batch_prefill_to_last_hidden_states(  # pylint: disable=missing-fu...
    method batch_decode_to_last_hidden_states (line 446) | def batch_decode_to_last_hidden_states(  # pylint: disable=missing-fun...
    method batch_verify_to_last_hidden_states (line 452) | def batch_verify_to_last_hidden_states(  # pylint: disable=missing-fun...
    method create_paged_kv_cache (line 458) | def create_paged_kv_cache(  # pylint: disable=missing-function-docstri...
    method get_default_spec (line 486) | def get_default_spec(self):  # pylint: disable=missing-function-docstring

FILE: python/mlc_llm/model/orion/orion_model.py
  class OrionConfig (line 23) | class OrionConfig(ConfigBase):  # pylint: disable=too-many-instance-attr...
    method __post_init__ (line 41) | def __post_init__(self):
  class OrionFFN (line 90) | class OrionFFN(nn.Module):
    method __init__ (line 91) | def __init__(self, config: OrionConfig):
    method forward (line 106) | def forward(self, x: Tensor):
  class OrionAttention (line 112) | class OrionAttention(nn.Module):  # pylint: disable=too-many-instance-at...
    method __init__ (line 113) | def __init__(self, config: OrionConfig):
    method forward (line 130) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
  class OrionDecoderLayer (line 146) | class OrionDecoderLayer(nn.Module):
    method __init__ (line 147) | def __init__(self, config: OrionConfig):
    method forward (line 177) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
    method _apply_residual (line 184) | def _apply_residual(self, out, residual):
  class OrionModel (line 190) | class OrionModel(nn.Module):
    method __init__ (line 191) | def __init__(self, config: OrionConfig):
    method forward (line 200) | def forward(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
  class OrionForCausalLM (line 208) | class OrionForCausalLM(nn.Module):  # pylint: disable=too-many-instance-...
    method __init__ (line 209) | def __init__(self, config: OrionConfig):
    method to (line 222) | def to(self, dtype: Optional[str] = None):
    method batch_forward (line 227) | def batch_forward(
    method embed (line 243) | def embed(self, input_ids: Tensor):
    method prefill (line 248) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 262) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 271) | def batch_prefill(
    method batch_decode (line 282) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 286) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method create_paged_kv_cache (line 290) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 316) | def get_default_spec(self):

FILE: python/mlc_llm/model/phi/phi_loader.py
  function huggingface (line 16) | def huggingface(model_config: PhiConfig, quantization: Quantization) -> ...
  function phi1_huggingface (line 87) | def phi1_huggingface(model_config: Phi1Config, quantization: Quantizatio...

FILE: python/mlc_llm/model/phi/phi_model.py
  class Phi1Config (line 23) | class Phi1Config(ConfigBase):  # pylint: disable=too-many-instance-attri...
    method __post_init__ (line 42) | def __post_init__(self):
  class PhiConfig (line 91) | class PhiConfig(ConfigBase):  # pylint: disable=too-many-instance-attrib...
    method __post_init__ (line 111) | def __post_init__(self):
    method from_phi1 (line 149) | def from_phi1(config: Phi1Config) -> "PhiConfig":
  class PhiMLP (line 174) | class PhiMLP(nn.Module):
    method __init__ (line 175) | def __init__(self, config: PhiConfig):
    method forward (line 186) | def forward(self, hidden_states: Tensor):
  class PhiMHA (line 194) | class PhiMHA(nn.Module):  # pylint: disable=too-many-instance-attributes
    method __init__ (line 195) | def __init__(self, config: PhiConfig):
    method forward (line 211) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
  class PhiParallelBlock (line 227) | class PhiParallelBlock(nn.Module):
    method __init__ (line 228) | def __init__(self, config: PhiConfig):
    method forward (line 259) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
    method _apply_parallel_residual (line 276) | def _apply_parallel_residual(self, attn_out, mlp_out, residual):
  class PhiCausalLMHead (line 284) | class PhiCausalLMHead(nn.Module):
    method __init__ (line 285) | def __init__(self, config: PhiConfig) -> None:
    method forward (line 291) | def forward(self, hidden_states: Tensor):
  class PhiModel (line 300) | class PhiModel(nn.Module):
    method __init__ (line 301) | def __init__(self, config: PhiConfig) -> None:
    method forward (line 306) | def forward(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
  class PhiForCausalLM (line 314) | class PhiForCausalLM(nn.Module):
    method __init__ (line 316) | def __init__(self, config: Union[PhiConfig, Phi1Config]) -> None:
    method to (line 335) | def to(self, dtype: Optional[str] = None):
    method batch_forward (line 340) | def batch_forward(
    method prefill (line 356) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 372) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 381) | def batch_prefill(
    method batch_decode (line 392) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 396) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method embed (line 400) | def embed(self, input_ids: Tensor):
    method create_paged_kv_cache (line 406) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 433) | def get_default_spec(self):

FILE: python/mlc_llm/model/phi3/phi3_loader.py
  function phi3_huggingface (line 14) | def phi3_huggingface(model_config: Phi3Config, quantization: Quantizatio...

FILE: python/mlc_llm/model/phi3/phi3_model.py
  class Phi3Config (line 23) | class Phi3Config(ConfigBase):  # pylint: disable=too-many-instance-attri...
    method __post_init__ (line 47) | def __post_init__(self):
  class Phi3Embedding (line 102) | class Phi3Embedding(nn.Embedding):
    method lm_head_forward (line 105) | def lm_head_forward(self, x: nn.Tensor):
  class Phi3MLP (line 113) | class Phi3MLP(nn.Module):
    method __init__ (line 114) | def __init__(self, config: Phi3Config):
    method forward (line 125) | def forward(self, hidden_states: Tensor):
  class PhiMHA (line 132) | class PhiMHA(nn.Module):  # pylint: disable=too-many-instance-attributes
    method __init__ (line 133) | def __init__(self, config: Phi3Config):
    method forward (line 153) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
  class Phi3ParallelBlock (line 169) | class Phi3ParallelBlock(nn.Module):
    method __init__ (line 170) | def __init__(self, config: Phi3Config):
    method forward (line 204) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
    method _apply_parallel_residual (line 211) | def _apply_parallel_residual(self, mlp_out, residual):
  class Phi3Model (line 217) | class Phi3Model(nn.Module):
    method __init__ (line 218) | def __init__(self, config: Phi3Config) -> None:
    method forward (line 224) | def forward(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
  class Phi3ForCausalLM (line 232) | class Phi3ForCausalLM(nn.Module):
    method __init__ (line 234) | def __init__(self, config: Phi3Config) -> None:
    method to (line 258) | def to(self, dtype: Optional[str] = None):
    method get_logits (line 263) | def get_logits(self, hidden_states: Tensor):
    method batch_forward (line 273) | def batch_forward(
    method prefill (line 286) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 298) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 305) | def batch_prefill(
    method batch_decode (line 316) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 320) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method embed (line 324) | def embed(self, input_ids: Tensor):
    method create_paged_kv_cache (line 330) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 359) | def get_default_spec(self):

FILE: python/mlc_llm/model/phi3v/phi3v_image.py
  class ImageProjection (line 16) | class ImageProjection(Module):  # pylint: disable=too-many-instance-attr...
    method __init__ (line 17) | def __init__(self, config: ConfigBase):
    method forward (line 25) | def forward(self, image_features: Tensor) -> Tensor:
  class Phi3ImageEmbedding (line 55) | class Phi3ImageEmbedding(Module):
    method __init__ (line 56) | def __init__(self, config: ConfigBase):
    method apply_schedule (line 69) | def apply_schedule(self, sch, block, bdx=32, tile=[32, 32]):
    method dyn_repeat_4d_tensor (line 80) | def dyn_repeat_4d_tensor(self, input_tensor, r0, r1, r2, r3) -> Tensor:
    method dyn_concate_dim_2 (line 119) | def dyn_concate_dim_2(self, input_1, input_2) -> Tensor:
    method dyn_concate_dim_1 (line 158) | def dyn_concate_dim_1(self, input_1, input_2) -> Tensor:
    method get_img_features (line 192) | def get_img_features(self, img_embeds: Tensor) -> Tensor:
    method reshape_hd_patches_2x2merge (line 197) | def reshape_hd_patches_2x2merge(self, image_features, h_crop, w_crop):
    method add_image_newline (line 267) | def add_image_newline(self, image_features_hd):
    method forward (line 283) | def forward(self, pixel_values: Tensor, h_crop, w_crop) -> Tensor:

FILE: python/mlc_llm/model/phi3v/phi3v_loader.py
  function huggingface (line 15) | def huggingface(model_config: Phi3VConfig, quantization: Quantization) -...

FILE: python/mlc_llm/model/phi3v/phi3v_model.py
  class Phi3VConfig (line 38) | class Phi3VConfig(ConfigBase):  # pylint: disable=too-many-instance-attr...
    method __post_init__ (line 63) | def __post_init__(self):
  class Phi3VForCausalLM (line 130) | class Phi3VForCausalLM(nn.Module):
    method __init__ (line 132) | def __init__(self, config: Phi3VConfig) -> None:
    method to (line 161) | def to(self, dtype: Optional[str] = None):
    method batch_forward (line 166) | def batch_forward(
    method prefill (line 182) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 198) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 207) | def batch_prefill(
    method batch_decode (line 218) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 222) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method embed (line 226) | def embed(self, input_ids: Tensor):
    method image_preprocess (line 233) | def image_preprocess(
    method image_embed (line 283) | def image_embed(  # pylint: disable=too-many-arguments
    method create_paged_kv_cache (line 296) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 324) | def get_default_spec(self):

FILE: python/mlc_llm/model/qwen/qwen_model.py
  class QWenConfig (line 23) | class QWenConfig(ConfigBase):  # pylint: disable=too-many-instance-attri...
    method __post_init__ (line 42) | def __post_init__(self):
  class QWenAttention (line 83) | class QWenAttention(nn.Module):  # pylint: disable=too-many-instance-att...
    method __init__ (line 84) | def __init__(self, config: QWenConfig):
    method forward (line 98) | def forward(  # pylint: disable=too-many-locals
  class QWenMLP (line 118) | class QWenMLP(nn.Module):
    method __init__ (line 119) | def __init__(self, config: QWenConfig):
    method forward (line 133) | def forward(self, x: Tensor):
  class QWenBlock (line 139) | class QWenBlock(nn.Module):
    method __init__ (line 140) | def __init__(self, config: QWenConfig):
    method forward (line 174) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
    method _apply_residual (line 181) | def _apply_residual(self, out, residual):
  class QWenModel (line 187) | class QWenModel(nn.Module):
    method __init__ (line 188) | def __init__(self, config: QWenConfig):
    method forward (line 194) | def forward(self, inputs: Tensor, paged_kv_cache: PagedKVCache):
  class QWenLMHeadModel (line 202) | class QWenLMHeadModel(nn.Module):  # pylint: disable=too-many-instance-a...
    method __init__ (line 203) | def __init__(self, config: QWenConfig):
    method to (line 215) | def to(self, dtype: Optional[str] = None):
    method batch_forward (line 220) | def batch_forward(
    method embed (line 235) | def embed(self, input_ids: Tensor):
    method prefill (line 240) | def prefill(self, inputs: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 258) | def decode(self, inputs: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 267) | def batch_prefill(self, inputs: Tensor, logit_positions: Tensor, paged...
    method batch_decode (line 273) | def batch_decode(self, inputs: Tensor, paged_kv_cache: PagedKVCache):
    method batch_verify (line 277) | def batch_verify(self, inputs: Tensor, paged_kv_cache: PagedKVCache):
    method create_paged_kv_cache (line 281) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 307) | def get_default_spec(self):

FILE: python/mlc_llm/model/qwen2/qwen2_model.py
  class QWen2Config (line 24) | class QWen2Config(ConfigBase):  # pylint: disable=too-many-instance-attr...
    method __post_init__ (line 45) | def __post_init__(self):
  class QWen2Attention (line 86) | class QWen2Attention(nn.Module):  # pylint: disable=too-many-instance-at...
    method __init__ (line 87) | def __init__(self, config: QWen2Config):
    method forward (line 107) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
  class Qwen2Embedding (line 131) | class Qwen2Embedding(nn.Embedding):
    method lm_head_forward (line 136) | def lm_head_forward(self, x: nn.Tensor):
  class QWen2MLP (line 144) | class QWen2MLP(nn.Module):
    method __init__ (line 145) | def __init__(self, config: QWen2Config):
    method forward (line 156) | def forward(self, x: Tensor):
  class QWen2DecoderLayer (line 162) | class QWen2DecoderLayer(nn.Module):
    method __init__ (line 163) | def __init__(self, config: QWen2Config):
    method forward (line 198) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
    method _apply_residual (line 207) | def _apply_residual(self, out, residual):
  class QWen2Model (line 213) | class QWen2Model(nn.Module):
    method __init__ (line 214) | def __init__(self, config: QWen2Config):
    method forward (line 221) | def forward(self, inputs: Tensor, paged_kv_cache: PagedKVCache):
  class QWen2LMHeadModel (line 229) | class QWen2LMHeadModel(nn.Module):  # pylint: disable=too-many-instance-...
    method __init__ (line 230) | def __init__(self, config: QWen2Config):
    method to (line 247) | def to(self, dtype: Optional[str] = None):
    method batch_forward (line 252) | def batch_forward(
    method embed (line 272) | def embed(self, input_ids: Tensor):
    method prefill (line 277) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 294) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 306) | def batch_prefill(
    method batch_decode (line 317) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 321) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method create_paged_kv_cache (line 325) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 351) | def get_default_spec(self):

FILE: python/mlc_llm/model/qwen2_5_vl/qwen2_5_vl_model.py
  class Qwen25VLVisionTokenConfig (line 47) | class Qwen25VLVisionTokenConfig:
  class Qwen25VLVisionGridConfig (line 57) | class Qwen25VLVisionGridConfig:
  class Qwen25VLAttentionState (line 66) | class Qwen25VLAttentionState:
  class Qwen25VLConfig (line 77) | class Qwen25VLConfig(ConfigBase):  # pylint: disable=too-many-instance-a...
    method __post_init__ (line 106) | def __post_init__(self):  # pylint: disable=too-many-branches
    method image_token_id (line 160) | def image_token_id(self) -> int:
    method video_token_id (line 164) | def video_token_id(self) -> int:
    method vision_start_token_id (line 168) | def vision_start_token_id(self) -> int:
    method vision_end_token_id (line 172) | def vision_end_token_id(self) -> int:
    method spatial_merge_size (line 176) | def spatial_merge_size(self) -> int:
    method temporal_patch_size (line 180) | def temporal_patch_size(self) -> int:
    method tokens_per_second (line 184) | def tokens_per_second(self) -> float:
    method vision_metadata (line 188) | def vision_metadata(self) -> VisionPositionMetadata:
  class Qwen25VLEmbedding (line 198) | class Qwen25VLEmbedding(nn.Embedding):
    method lm_head_forward (line 201) | def lm_head_forward(self, x: Tensor):
  class Qwen25VLAttention (line 206) | class Qwen25VLAttention(nn.Module):
    method __init__ (line 207) | def __init__(self, config: Qwen25VLConfig):
    method head_dim (line 240) | def head_dim(self) -> int:
    method num_attention_heads (line 244) | def num_attention_heads(self) -> int:
    method num_key_value_heads (line 248) | def num_key_value_heads(self) -> int:
    method forward (line 251) | def forward(  # pylint: disable=too-many-locals
  class Qwen25VLMLP (line 274) | class Qwen25VLMLP(nn.Module):
    method __init__ (line 275) | def __init__(self, config: Qwen25VLConfig):
    method forward (line 286) | def forward(self, x: Tensor):
  class Qwen25VLDecoderLayer (line 292) | class Qwen25VLDecoderLayer(nn.Module):
    method __init__ (line 293) | def __init__(self, config: Qwen25VLConfig):
    method _set_tp (line 304) | def _set_tp(self, config: Qwen25VLConfig):
    method forward (line 328) | def forward(
    method _apply_residual (line 343) | def _apply_residual(self, out: Tensor, residual: Tensor) -> Tensor:
  class Qwen25VLModel (line 349) | class Qwen25VLModel(nn.Module):
    method __init__ (line 350) | def __init__(self, config: Qwen25VLConfig):
    method forward (line 364) | def forward(
  class Qwen25VLLMHeadModel (line 377) | class Qwen25VLLMHeadModel(nn.Module):
    method __init__ (line 378) | def __init__(self, config: Qwen25VLConfig):
    method to (line 386) | def to(self, dtype: Optional[str] = None):
    method _apply_lm_head (line 391) | def _apply_lm_head(self, hidden_states: Tensor):
    method _set_mrope_delta (line 400) | def _set_mrope_delta(self, paged_kv_cache: PagedKVCache, deltas: Tensor):
    method _get_mrope_delta (line 404) | def _get_mrope_delta(self, paged_kv_cache: PagedKVCache, batch: int) -...
    method _build_decode_position_ids (line 411) | def _build_decode_position_ids(
    method prefill (line 425) | def prefill(
    method decode (line 444) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 452) | def batch_prefill(  # pylint: disable=too-many-arguments
    method batch_forward (line 467) | def batch_forward(  # pylint: disable=too-many-arguments
    method batch_decode (line 482) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 490) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method embed (line 493) | def embed(self, input_ids: Tensor):
    method create_paged_kv_cache (line 498) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 526) | def get_default_spec(self):

FILE: python/mlc_llm/model/qwen2_moe/qwen2_moe_loader.py
  function huggingface (line 16) | def huggingface(model_config: Qwen2MoeConfig, quantization: Quantization...

FILE: python/mlc_llm/model/qwen2_moe/qwen2_moe_model.py
  class Qwen2MoeConfig (line 23) | class Qwen2MoeConfig(QWen2Config):  # pylint: disable=too-many-instance-...
  class Qwen2MoeMLP (line 37) | class Qwen2MoeMLP(nn.Module):
    method __init__ (line 38) | def __init__(self, config: Qwen2MoeConfig, intermediate_size: Optional...
    method forward (line 50) | def forward(self, x: Tensor):
  class Qwen2MoeSparseMoeBlock (line 56) | class Qwen2MoeSparseMoeBlock(nn.Module):  # pylint: disable=too-many-ins...
    method __init__ (line 59) | def __init__(self, config: Qwen2MoeConfig):
    method forward (line 90) | def forward(self, x: Tensor):
  class Qwen2MoeDecoderLayer (line 141) | class Qwen2MoeDecoderLayer(nn.Module):
    method __init__ (line 142) | def __init__(self, config: Qwen2MoeConfig):
    method forward (line 193) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
    method _apply_residual (line 202) | def _apply_residual(self, out, residual):
  class Qwen2MoeModel (line 208) | class Qwen2MoeModel(nn.Module):
    method __init__ (line 209) | def __init__(self, config: Qwen2MoeConfig):
    method forward (line 216) | def forward(self, inputs: Tensor, paged_kv_cache: PagedKVCache):
  class Qwen2MoeForCausalLM (line 224) | class Qwen2MoeForCausalLM(nn.Module):  # pylint: disable=too-many-instan...
    method __init__ (line 225) | def __init__(self, config: Qwen2MoeConfig):
    method to (line 240) | def to(self, dtype: Optional[str] = None):
    method batch_forward (line 245) | def batch_forward(
    method embed (line 261) | def embed(self, input_ids: Tensor):
    method prefill (line 266) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 280) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 289) | def batch_prefill(
    method batch_decode (line 300) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 304) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method create_paged_kv_cache (line 308) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 334) | def get_default_spec(self):

FILE: python/mlc_llm/model/qwen3/qwen3_loader.py
  function huggingface (line 17) | def huggingface(
  function huggingface_embedding (line 150) | def huggingface_embedding(model_config: Qwen3Config, quantization: Quant...

FILE: python/mlc_llm/model/qwen3/qwen3_model.py
  class Qwen3Config (line 24) | class Qwen3Config(ConfigBase):  # pylint: disable=too-many-instance-attr...
    method __post_init__ (line 47) | def __post_init__(self):
  class Qwen3Attention (line 109) | class Qwen3Attention(nn.Module):  # pylint: disable=too-many-instance-at...
    method __init__ (line 110) | def __init__(self, config: Qwen3Config):
    method forward (line 134) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
  class Qwen3Embedding (line 162) | class Qwen3Embedding(nn.Embedding):
    method lm_head_forward (line 167) | def lm_head_forward(self, x: nn.Tensor):
  class Qwen3MLP (line 175) | class Qwen3MLP(nn.Module):
    method __init__ (line 176) | def __init__(self, config: Qwen3Config):
    method forward (line 187) | def forward(self, x: Tensor):
  class Qwen3DecoderLayer (line 193) | class Qwen3DecoderLayer(nn.Module):
    method __init__ (line 194) | def __init__(self, config: Qwen3Config):
    method forward (line 230) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
    method _apply_residual (line 239) | def _apply_residual(self, out, residual):
  class Qwen3Model (line 245) | class Qwen3Model(nn.Module):
    method __init__ (line 246) | def __init__(self, config: Qwen3Config):
    method forward (line 253) | def forward(self, inputs: Tensor, paged_kv_cache: PagedKVCache):
  class Qwen3LMHeadModel (line 261) | class Qwen3LMHeadModel(nn.Module):  # pylint: disable=too-many-instance-...
    method __init__ (line 262) | def __init__(self, config: Qwen3Config):
    method to (line 280) | def to(self, dtype: Optional[str] = None):
    method batch_forward (line 285) | def batch_forward(
    method embed (line 305) | def embed(self, input_ids: Tensor):
    method prefill (line 310) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 327) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 339) | def batch_prefill(
    method batch_decode (line 350) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 354) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method create_paged_kv_cache (line 358) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 384) | def get_default_spec(self):
  class Qwen3EmbeddingModel (line 449) | class Qwen3EmbeddingModel(Qwen3LMHeadModel):
    method prefill_to_last_hidden_states (line 457) | def prefill_to_last_hidden_states(self, input_embed: Tensor, paged_kv_...
    method decode_to_last_hidden_states (line 462) | def decode_to_last_hidden_states(self, input_embed: Tensor, paged_kv_c...
    method batch_prefill_to_last_hidden_states (line 467) | def batch_prefill_to_last_hidden_states(
    method batch_decode_to_last_hidden_states (line 474) | def batch_decode_to_last_hidden_states(
    method get_default_spec (line 481) | def get_default_spec(self):

FILE: python/mlc_llm/model/qwen3_moe/qwen3_moe_loader.py
  function huggingface (line 17) | def huggingface(model_config: Qwen3MoeConfig, quantization: Quantization...

FILE: python/mlc_llm/model/qwen3_moe/qwen3_moe_model.py
  class Qwen3MoeConfig (line 23) | class Qwen3MoeConfig(Qwen3Config):  # pylint: disable=too-many-instance-...
  class Qwen3MoeMLP (line 36) | class Qwen3MoeMLP(nn.Module):
    method __init__ (line 37) | def __init__(self, config: Qwen3MoeConfig, intermediate_size: Optional...
    method forward (line 49) | def forward(self, x: Tensor):
  class Qwen3MoeSparseMoeBlock (line 55) | class Qwen3MoeSparseMoeBlock(nn.Module):  # pylint: disable=too-many-ins...
    method __init__ (line 58) | def __init__(self, config: Qwen3MoeConfig):
    method forward (line 88) | def forward(self, x: Tensor):
  class Qwen3MoeDecoderLayer (line 146) | class Qwen3MoeDecoderLayer(nn.Module):
    method __init__ (line 147) | def __init__(self, config: Qwen3MoeConfig):
    method forward (line 190) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
    method _apply_residual (line 199) | def _apply_residual(self, out, residual):
  class Qwen3MoeModel (line 205) | class Qwen3MoeModel(nn.Module):
    method __init__ (line 206) | def __init__(self, config: Qwen3MoeConfig):
    method forward (line 213) | def forward(self, inputs: Tensor, paged_kv_cache: PagedKVCache):
  class Qwen3MoeForCausalLM (line 221) | class Qwen3MoeForCausalLM(nn.Module):  # pylint: disable=too-many-instan...
    method __init__ (line 222) | def __init__(self, config: Qwen3MoeConfig):
    method to (line 238) | def to(self, dtype: Optional[str] = None):
    method batch_forward (line 243) | def batch_forward(
    method embed (line 259) | def embed(self, input_ids: Tensor):
    method prefill (line 264) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 278) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 287) | def batch_prefill(
    method batch_decode (line 298) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 302) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method create_paged_kv_cache (line 306) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 332) | def get_default_spec(self):

FILE: python/mlc_llm/model/rwkv5/rwkv5_loader.py
  function huggingface (line 15) | def huggingface(model_config: RWKV5Config, quantization: Quantization) -...

FILE: python/mlc_llm/model/rwkv5/rwkv5_model.py
  class StateID (line 19) | class StateID:
  class RWKV5Config (line 28) | class RWKV5Config(ConfigBase):  # pylint: disable=too-many-instance-attr...
    method __post_init__ (line 46) | def __post_init__(self):
  function create_wkv5_func (line 64) | def create_wkv5_func(
  function token_shift (line 132) | def token_shift(state: Tensor, x: Tensor):
  function last_token (line 142) | def last_token(x: Tensor):
  class RWKV5_FNN (line 152) | class RWKV5_FNN(nn.Module):
    method __init__ (line 153) | def __init__(self, config: RWKV5Config, layer_id: int):
    method forward (line 162) | def forward(self, x: Tensor, state: RNNState):
  class RWKV5_Attention (line 175) | class RWKV5_Attention(nn.Module):  # pylint: disable=too-many-instance-a...
    method __init__ (line 178) | def __init__(self, config: RWKV5Config, layer_id: int):
    method forward (line 203) | def forward(self, x: Tensor, state: RNNState):  # pylint: disable=too-...
    method to (line 253) | def to(self, dtype: Optional[str] = None):
  class RWKV5_Layer (line 274) | class RWKV5_Layer(nn.Module):
    method __init__ (line 275) | def __init__(self, config: RWKV5Config, layer_id: int):
    method forward (line 295) | def forward(self, x: Tensor, state: RNNState) -> Tensor:
  class RWKV5_Model (line 307) | class RWKV5_Model(nn.Module):
    method __init__ (line 310) | def __init__(self, config: RWKV5Config):
    method forward (line 321) | def forward(self, input_embed: Tensor, state: RNNState):
  class RWKV5_ForCausalLM (line 329) | class RWKV5_ForCausalLM(nn.Module):  # pylint: disable=too-many-instance...
    method __init__ (line 332) | def __init__(self, config: RWKV5Config):
    method to (line 341) | def to(self, dtype: Optional[str] = None):
    method embed (line 346) | def embed(self, input_ids: Tensor):
    method forward (line 349) | def forward(
    method prefill (line 365) | def prefill(self, input_embed: Tensor, state: RNNState):
    method decode (line 369) | def decode(self, input_embed: Tensor, state: RNNState):
    method batch_prefill (line 373) | def batch_prefill(self, input_embeds: Tensor, logit_positions: Tensor,...
    method batch_decode (line 377) | def batch_decode(self, input_embeds: Tensor, state: RNNState):
    method batch_verify (line 381) | def batch_verify(self, input_embeds: Tensor, state: RNNState):
    method create_rnn_state (line 385) | def create_rnn_state(
    method get_default_spec (line 403) | def get_default_spec(self):

FILE: python/mlc_llm/model/rwkv6/rwkv6_loader.py
  function huggingface (line 13) | def huggingface(model_config: RWKV6Config, quantization: Quantization) -...

FILE: python/mlc_llm/model/rwkv6/rwkv6_model.py
  class StateID (line 19) | class StateID:
  class RWKV6Config (line 28) | class RWKV6Config(ConfigBase):  # pylint: disable=too-many-instance-attr...
    method __post_init__ (line 46) | def __post_init__(self):
  function create_wkv6_func (line 64) | def create_wkv6_func(
  function token_shift (line 129) | def token_shift(state: Tensor, x: Tensor):
  function last_token (line 139) | def last_token(x: Tensor):
  function unbind_to_five (line 148) | def unbind_to_five(x: Tensor) -> Tuple[Tensor, Tensor, Tensor, Tensor, T...
  class RWKV6_FNN (line 163) | class RWKV6_FNN(nn.Module):
    method __init__ (line 164) | def __init__(self, config: RWKV6Config, layer_id: int):
    method forward (line 173) | def forward(self, x: Tensor, state: RNNState):
  class RWKV6_Attention (line 190) | class RWKV6_Attention(nn.Module):  # pylint: disable=too-many-instance-a...
    method __init__ (line 193) | def __init__(self, config: RWKV6Config, layer_id: int):
    method forward (line 229) | def forward(self, x: Tensor, state: RNNState):  # pylint: disable=too-...
    method to (line 298) | def to(self, dtype: Optional[str] = None):
  class RWKV6_Layer (line 325) | class RWKV6_Layer(nn.Module):
    method __init__ (line 326) | def __init__(self, config: RWKV6Config, layer_id: int):
    method forward (line 346) | def forward(self, x: Tensor, state: RNNState) -> Tensor:
  class RWKV6_Model (line 358) | class RWKV6_Model(nn.Module):
    method __init__ (line 361) | def __init__(self, config: RWKV6Config):
    method forward (line 372) | def forward(self, input_embed: Tensor, state: RNNState):
  class RWKV6_ForCausalLM (line 380) | class RWKV6_ForCausalLM(nn.Module):  # pylint: disable=too-many-instance...
    method __init__ (line 383) | def __init__(self, config: RWKV6Config):
    method to (line 393) | def to(self, dtype: Optional[str] = None):
    method embed (line 398) | def embed(self, input_ids: Tensor):
    method forward (line 401) | def forward(
    method prefill (line 417) | def prefill(self, input_embed: Tensor, state: RNNState):
    method decode (line 421) | def decode(self, input_embed: Tensor, state: RNNState):
    method batch_prefill (line 425) | def batch_prefill(self, input_embeds: Tensor, logit_positions: Tensor,...
    method batch_decode (line 429) | def batch_decode(self, input_embeds: Tensor, state: RNNState):
    method batch_verify (line 433) | def batch_verify(self, input_embeds: Tensor, state: RNNState):
    method create_rnn_state (line 437) | def create_rnn_state(
    method get_default_spec (line 455) | def get_default_spec(self):

FILE: python/mlc_llm/model/stable_lm/stablelm_model.py
  class StableLmConfig (line 23) | class StableLmConfig(ConfigBase):  # pylint: disable=too-many-instance-a...
    method __post_init__ (line 43) | def __post_init__(self):
  class StableLmAttention (line 84) | class StableLmAttention(nn.Module):  # pylint: disable=too-many-instance...
    method __init__ (line 85) | def __init__(self, config: StableLmConfig):
    method forward (line 107) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
  class StableLmMLP (line 122) | class StableLmMLP(nn.Module):
    method __init__ (line 123) | def __init__(self, config: StableLmConfig):
    method forward (line 137) | def forward(self, x: Tensor):
  class StableLmDecoderLayer (line 143) | class StableLmDecoderLayer(nn.Module):
    method __init__ (line 144) | def __init__(self, config: StableLmConfig):
    method forward (line 179) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
    method _apply_residual (line 186) | def _apply_residual(self, out, residual):
  class StableLmModel (line 192) | class StableLmModel(nn.Module):
    method __init__ (line 193) | def __init__(self, config: StableLmConfig):
    method forward (line 201) | def forward(self, inputs: Tensor, paged_kv_cache: PagedKVCache):
  class StableLmForCausalLM (line 209) | class StableLmForCausalLM(nn.Module):  # pylint: disable=too-many-instan...
    method __init__ (line 210) | def __init__(self, config: StableLmConfig):
    method to (line 225) | def to(self, dtype: Optional[str] = None):
    method batch_forward (line 230) | def batch_forward(
    method embed (line 246) | def embed(self, input_ids: Tensor):
    method prefill (line 251) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 265) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 274) | def batch_prefill(
    method batch_decode (line 285) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 289) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method create_paged_kv_cache (line 293) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 320) | def get_default_spec(self):

FILE: python/mlc_llm/model/starcoder2/starcoder2_loader.py
  function huggingface (line 16) | def huggingface(model_config: Starcoder2Config, quantization: Quantizati...

FILE: python/mlc_llm/model/starcoder2/starcoder2_model.py
  class Starcoder2Config (line 23) | class Starcoder2Config(ConfigBase):  # pylint: disable=too-many-instance...
    method __post_init__ (line 46) | def __post_init__(self):
  class Starcoder2Attention (line 87) | class Starcoder2Attention(nn.Module):  # pylint: disable=too-many-instan...
    method __init__ (line 88) | def __init__(self, config: Starcoder2Config):
    method forward (line 115) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
  class Starcoder2MLP (line 130) | class Starcoder2MLP(nn.Module):
    method __init__ (line 131) | def __init__(self, config: Starcoder2Config):
    method forward (line 147) | def forward(self, hidden_states: Tensor):
  class Starcoder2DecoderLayer (line 154) | class Starcoder2DecoderLayer(nn.Module):
    method __init__ (line 155) | def __init__(self, config: Starcoder2Config):
    method forward (line 200) | def forward(self, hidden_states: Tensor, paged_kv_cache: PagedKVCache,...
    method _apply_residual (line 207) | def _apply_residual(self, out, residual):
  class Starcoder2Model (line 213) | class Starcoder2Model(nn.Module):
    method __init__ (line 214) | def __init__(self, config: Starcoder2Config):
    method forward (line 222) | def forward(self, inputs: Tensor, paged_kv_cache: PagedKVCache):
  class Starcoder2ForCausalLM (line 230) | class Starcoder2ForCausalLM(nn.Module):  # pylint: disable=too-many-inst...
    method __init__ (line 231) | def __init__(self, config: Starcoder2Config):
    method to (line 245) | def to(self, dtype: Optional[str] = None):
    method batch_forward (line 250) | def batch_forward(
    method embed (line 266) | def embed(self, input_ids: Tensor):
    method prefill (line 271) | def prefill(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method decode (line 285) | def decode(self, input_embed: Tensor, paged_kv_cache: PagedKVCache):
    method batch_prefill (line 294) | def batch_prefill(
    method batch_decode (line 305) | def batch_decode(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method batch_verify (line 309) | def batch_verify(self, input_embeds: Tensor, paged_kv_cache: PagedKVCa...
    method create_paged_kv_cache (line 313) | def create_paged_kv_cache(  # pylint: disable=too-many-arguments
    method get_default_spec (line 339) | def get_default_spec(self):

FILE: python/mlc_llm/model/vision/clip_vision.py
  class CLIPVisionConfig (line 30) | class CLIPVisionConfig(ConfigBase):  # pylint: disable=too-many-instance...
  class CLIPVisionEmbeddings (line 49) | class CLIPVisionEmbeddings(Module):  # pylint: disable=too-many-instance...
    method __init__ (line 50) | def __init__(self, config: CLIPVisionConfig):
    method forward (line 69) | def forward(self, pixel_values: Tensor) -> Tensor:
  function sigmoid (line 94) | def sigmoid(x: Tensor, name: str = "sigmoid") -> Tensor:
  class QuickGELU (line 112) | class QuickGELU(Module):
    method forward (line 113) | def forward(self, input_tensor: Tensor) -> Tensor:
  class CLIPMLP (line 117) | class CLIPMLP(Module):
    method __init__ (line 118) | def __init__(self, config: CLIPVisionConfig):
    method forward (line 124) | def forward(self, hidden_states: Tensor) -> Tensor:
  class CLIPAttention (line 131) | class CLIPAttention(Module):  # pylint: disable=too-many-instance-attrib...
    method __init__ (line 132) | def __init__(self, config: CLIPVisionConfig):
    method forward (line 148) | def forward(
  class CLIPEncoderLayer (line 164) | class CLIPEncoderLayer(Module):
    method __init__ (line 165) | def __init__(self, config: CLIPVisionConfig):
    method forward (line 173) | def forward(self, hidden_states: Tensor) -> Tensor:
  class CLIPEncoder (line 187) | class CLIPEncoder(Module):
    method __init__ (line 188) | def __init__(self, config: CLIPVisionConfig):
    method forward (line 194) | def forward(self, inputs_embeds: Tensor) -> Tensor:
  class CLIPVisionTransformer (line 205) | class CLIPVisionTransformer(Module):
    method __init__ (line 206) | def __init__(self, config: CLIPVisionConfig):
    method forward (line 214) | def forward(self, pixel_values: Tensor) -> Tensor:
  class CLIPVisionModel (line 221) | class CLIPVisionModel(Module):
    method __init__ (line 224) | def __init__(self, config: CLIPVisionConfig):
    method forward (line 228) | def forward(self, pixel_values: Tensor) -> Tensor:

FILE: python/mlc_llm/model/vision/image_processing.py
  function _var (line 10) | def _var(dtype, size=1):
  class ImageProcessor (line 15) | class ImageProcessor(Module):
    method __init__ (line 16) | def __init__(self):
    method apply_schedule (line 20) | def apply_schedule(self, sch, block, bdx=32, tile=[32, 32]):
    method resize (line 30) | def resize(self, image: Tensor, params):  # image layout:NCHW
    method crop (line 95) | def crop(self, image: Tensor, crop_size):
    method rescale (line 148) | def rescale(self, image: Tensor, rescale_factor=1 / 255.0, o_dtype="fl...
    method normalize (line 187) | def normalize(self, image: Tensor, o_dtype="float32"):
    method pad (line 238) | def pad(self, image: Tensor, dtype="uint8"):
    method preprocess (line 285) | def preprocess(self, pixel_values):

FILE: python/mlc_llm/nn/expert.py
  class MixtralExperts (line 9) | class MixtralExperts(nn.Module):
    method __init__ (line 12) | def __init__(self, num_local_experts, in_features, out_features, tenso...
    method forward (line 20) | def forward(self, x: Tensor, indptr: Tensor):  # pylint: disable=inval...

FILE: python/mlc_llm/nn/kv_cache.py
  class PagedKVCache (line 14) | class PagedKVCache(TVMPagedKVCache):  # pylint: disable=too-few-public-m...
    method create_generic (line 18) | def create_generic(  # pylint: disable=too-many-locals

FILE: python/mlc_llm/nn/rnn_state.py
  class RNNState (line 11) | class RNNState(Object):
    method create (line 15) | def create(
    method get (line 69) | def get(
    method set (line 111) | def set(self, layer_id: int, state_id: int, value: Tensor) -> "RNNState":
    method create_get_func (line 139) | def create_get_func(
    method create_set_func (line 236) | def create_set_func(

FILE: python/mlc_llm/op/attention.py
  function attention (line 18) | def attention(  # pylint: disable=invalid-name,too-many-locals,too-many-...

FILE: python/mlc_llm/op/batch_matmul.py
  function quantized_bmm (line 11) | def quantized_bmm(

FILE: python/mlc_llm/op/batch_spec_verify.py
  function batch_spec_verify (line 10) | def batch_spec_verify(vocab_size):

FILE: python/mlc_llm/op/cutlass.py
  function group_gemm (line 9) | def group_gemm(
  function fp8_gemm (line 82) | def fp8_gemm(
  function fp8_groupwise_scaled_gemm (line 140) | def fp8_groupwise_scaled_gemm(  # pylint: disable=too-many-arguments
  function fp8_groupwise_scaled_bmm (line 211) | def fp8_groupwise_scaled_bmm(  # pylint: disable=too-many-arguments
  function fp8_groupwise_scaled_group_gemm (line 283) | def fp8_groupwise_scaled_group_gemm(  # pylint: disable=too-many-argumen...

FILE: python/mlc_llm/op/extern.py
  class ExternModuleStore (line 25) | class ExternModuleStore:
  function enable (line 40) | def enable(target: Target, flashinfer: bool, faster_transformer: bool, c...
  function get_store (line 59) | def get_store() -> ExternModuleStore:
  function configure (line 64) | def configure() -> None:

FILE: python/mlc_llm/op/ft_gemm.py
  function faster_transformer_dequantize_gemm (line 11) | def faster_transformer_dequantize_gemm(  # pylint: disable=too-many-argu...
  function faster_transformer_moe_gemm (line 96) | def faster_transformer_moe_gemm(  # pylint: disable=too-many-arguments

FILE: python/mlc_llm/op/moe_matmul.py
  function gemv (line 13) | def gemv(x: Tensor, w: Tensor, indptr: Tensor) -> Tensor:
  function dequantize_gemv (line 78) | def dequantize_gemv(  # pylint: disable=too-many-arguments
  function dequantize_float8_gemv (line 180) | def dequantize_float8_gemv(
  function dequantize_block_scale_float8_gemv (line 299) | def dequantize_block_scale_float8_gemv(
  function group_gemm (line 386) | def group_gemm(x: Tensor, w: Tensor, indptr: Tensor):  # pylint: disable...
  function dequantize_group_gemm (line 565) | def dequantize_group_gemm(

FILE: python/mlc_llm/op/moe_misc.py
  function moe_sum (line 15) | def moe_sum(x: Tensor, dim: int) -> Tensor:
  function _gating_topk_init_local_top_k (line 36) | def _gating_topk_init_local_top_k(k_val, dtype, local_top_k, local_top_k...
  function _gating_topk_process_value (line 43) | def _gating_topk_process_value(  # pylint: disable=too-many-arguments
  function gating_topk (line 66) | def gating_topk(scores: Tensor, k: int) -> Tuple[Tensor, Tensor]:
  function gating_softmax_topk (line 138) | def gating_softmax_topk(  # pylint: disable=too-many-statements
  function group_limited_greedy_topk (line 243) | def group_limited_greedy_topk(  # pylint: disable=too-many-arguments
  function moe_cumsum (line 376) | def moe_cumsum(expert_indices: Tensor, num_local_experts: int) -> Tensor:
  function get_indices (line 458) | def get_indices(cumsum: Tensor, expert_indices: Tensor) -> Tuple[Tensor,...
  function get_indptr (line 533) | def get_indptr(
  function scatter_output (line 613) | def scatter_output(x: Tensor, indices: Tensor) -> Tensor:

FILE: python/mlc_llm/op/mrope.py
  function _rotate_half (line 14) | def _rotate_half(x: Tensor) -> Tensor:
  function _repeat_mrope_section (line 21) | def _repeat_mrope_section(section: Sequence[int]) -> Tuple[int, ...]:
  function _split_indices_from_sizes (line 29) | def _split_indices_from_sizes(sizes: Sequence[int]) -> List[int]:
  function _reorder_cos_sin (line 39) | def _reorder_cos_sin(
  class MultimodalRotaryEmbedding (line 58) | class MultimodalRotaryEmbedding(nn.Module):
    method __init__ (line 61) | def __init__(
    method forward (line 78) | def forward(self, reference: Tensor, position_ids: Tensor) -> Tuple[Te...
  function apply_multimodal_rotary_pos_emb (line 122) | def apply_multimodal_rotary_pos_emb(  # pylint: disable=too-many-arguments
  class VisionPositionMetadata (line 145) | class VisionPositionMetadata:
    method merged_hw (line 154) | def merged_hw(self, height: int, width: int) -> Tuple[int, int]:
  function _text_chunk (line 165) | def _text_chunk(length: int, offset: int) -> np.ndarray:
  function _grid_chunk (line 175) | def _grid_chunk(  # pylint: disable=too-many-arguments
  function _find_token_index (line 197) | def _find_token_index(tokens: Sequence[int], token_id: int, start: int) ...
  function _next_chunk_offset (line 204) | def _next_chunk_offset(chunks: Sequence[np.ndarray]) -> int:
  function _count_vision_items (line 210) | def _count_vision_items(
  function _next_vision_block (line 224) | def _next_vision_block(
  function _load_grid_for_block (line 239) | def _load_grid_for_block(  # pylint: disable=too-many-arguments
  function _build_sequence_position_ids (line 262) | def _build_sequence_position_ids(  # pylint: disable=too-many-arguments,...
  function _text_only_position_ids (line 345) | def _text_only_position_ids(
  function get_mrope_position_ids (line 363) | def get_mrope_position_ids(  # pylint: disable=too-many-arguments,too-ma...

FILE: python/mlc_llm/op/pipeline_parallel.py
  function pipeline_stage_boundary (line 9) | def pipeline_stage_boundary(*tensors: Tensor) -> List[Tensor]:

FILE: python/mlc_llm/op/top_p_pivot.py
  function top_p_pivot (line 13) | def top_p_pivot(pN, target: tvm.target.Target):
  function top_p_renorm (line 270) | def top_p_renorm(target: tvm.target.Target = None):

FILE: python/mlc_llm/op/triton.py
  function _get_triton_w8a8_block_fp8_gemm (line 22) | def _get_triton_w8a8_block_fp8_gemm():
  function _get_triton_w8a8_block_fp8_group_gemm (line 113) | def _get_triton_w8a8_block_fp8_group_gemm():
  function get_tir_w8a8_block_fp8_matmul (line 273) | def get_tir_w8a8_block_fp8_matmul(  # pylint: disable=too-many-arguments...
  function get_tir_w8a8_block_fp8_group_matmul (line 373) | def get_tir_w8a8_block_fp8_group_matmul(  # pylint: disable=too-many-arg...
  function _compute_expert_id_per_block (line 497) | def _compute_expert_id_per_block(
  function fp8_groupwise_scaled_gemm (line 568) | def fp8_groupwise_scaled_gemm(  # pylint: disable=too-many-arguments,too...
  function fp8_groupwise_scaled_group_gemm (line 668) | def fp8_groupwise_scaled_group_gemm(  # pylint: disable=too-many-argumen...

FILE: python/mlc_llm/protocol/conversation_protocol.py
  class MessagePlaceholders (line 10) | class MessagePlaceholders(Enum):
  class Conversation (line 23) | class Conversation(BaseModel):
    method __init__ (line 85) | def __init__(self, role_templates: Optional[Dict[str, str]] = None, **...
    method check_message_seps (line 98) | def check_message_seps(cls, seps: List[str]) -> List[str]:
    method to_json_dict (line 104) | def to_json_dict(self) -> Dict[str, Any]:
    method from_json_dict (line 109) | def from_json_dict(cls: Type[T], json_dict: Dict[str, Any]) -> T:
    method as_prompt (line 114) | def as_prompt(self, config=None) -> List[Any]:
  function _get_url_from_item (line 199) | def _get_url_from_item(item: Dict) -> str:
  function _combine_consecutive_messages (line 217) | def _combine_consecutive_messages(messages: List[Any]) -> List[Any]:

FILE: python/mlc_llm/protocol/debug_protocol.py
  class DisaggConfig (line 8) | class DisaggConfig(BaseModel):
  class DebugConfig (line 29) | class DebugConfig(BaseModel):

FILE: python/mlc_llm/protocol/error_protocol.py
  class BadRequestError (line 10) | class BadRequestError(ValueError):
    method __init__ (line 13) | def __init__(self, *args: object) -> None:
  class ErrorResponse (line 17) | class ErrorResponse(BaseModel):
  function create_error_response (line 25) | def create_error_response(status_code: HTTPStatus, message: str) -> fast...
  function bad_request_error_handler (line 33) | async def bad_request_error_handler(_request: fastapi.Request, e: BadReq...

FILE: python/mlc_llm/protocol/generation_config.py
  class GenerationConfig (line 12) | class GenerationConfig(BaseModel):  # pylint:

FILE: python/mlc_llm/protocol/microserving_protocol.py
  class PrepRecvRequest (line 8) | class PrepRecvRequest(CompletionRequest):
  class PrepRecvResponse (line 22) | class PrepRecvResponse(BaseModel):
  class RemoteSendRequest (line 39) | class RemoteSendRequest(CompletionRequest):
  class StartGenerateRequest (line 63) | class StartGenerateRequest(CompletionRequest):

FILE: python/mlc_llm/protocol/mlc_chat_config.py
  class MLCChatConfig (line 25) | class MLCChatConfig(BaseModel):
    method get_system_defaults_for_missing_fields (line 66) | def get_system_defaults_for_missing_fields(self) -> Dict[str, Any]:

FILE: python/mlc_llm/protocol/openai_api_protocol.py
  class ListResponse (line 27) | class ListResponse(BaseModel):
  class TopLogProbs (line 32) | class TopLogProbs(BaseModel):
  class LogProbsContent (line 38) | class LogProbsContent(BaseModel):
  class LogProbs (line 45) | class LogProbs(BaseModel):
  class CompletionLogProbs (line 49) | class CompletionLogProbs(BaseModel):
  class CompletionUsage (line 58) | class CompletionUsage(BaseModel):
  class StreamOptions (line 67) | class StreamOptions(BaseModel):
  class EmbeddingRequest (line 74) | class EmbeddingRequest(BaseModel):
    method validate_input (line 87) | def validate_input(cls, v):
  class EmbeddingObject (line 98) | class EmbeddingObject(BaseModel):
  class EmbeddingUsage (line 104) | class EmbeddingUsage(BaseModel):
  class EmbeddingResponse (line 109) | class EmbeddingResponse(BaseModel):
  class ModelResponse (line 123) | class ModelResponse(BaseModel):
  class RequestResponseFormat (line 137) | class RequestResponseFormat(BaseModel):
  class CompletionRequest (line 146) | class CompletionRequest(BaseModel):
    method check_penalty_range (line 174) | def check_penalty_range(cls, penalty_value: Optional[float]) -> Option...
    method check_logit_bias (line 182) | def check_logit_bias(
    method check_logprobs (line 197) | def check_logprobs(self) -> "CompletionRequest":
  class CompletionResponseChoice (line 206) | class CompletionResponseChoice(BaseModel):
  class CompletionResponse (line 213) | class CompletionResponse(BaseModel):
  class ChatFunction (line 229) | class ChatFunction(BaseModel):
  class ChatTool (line 235) | class ChatTool(BaseModel):
  class ChatFunctionCall (line 240) | class ChatFunctionCall(BaseModel):
  class ChatToolCall (line 245) | class ChatToolCall(BaseModel):
  class ChatCompletionMessage (line 251) | class ChatCompletionMessage(BaseModel):
  class ChatCompletionRequest (line 259) | class ChatCompletionRequest(BaseModel):
    method check_penalty_range (line 289) | def check_penalty_range(cls, penalty_value: Optional[float]) -> Option...
    method check_logit_bias (line 297) | def check_logit_bias(
    method check_logprobs (line 312) | def check_logprobs(self) -> "ChatCompletionRequest":
    method check_stream_options (line 323) | def check_stream_options(self) -> "ChatCompletionRequest":
    method check_debug_config (line 332) | def check_debug_config(self) -> "ChatCompletionRequest":
    method check_message_validity (line 348) | def check_message_validity(self) -> None:
    method check_function_call_usage (line 366) | def check_function_call_usage(self, conv_template: Conversation) -> None:
  class ChatCompletionResponseChoice (line 415) | class ChatCompletionResponseChoice(BaseModel):
  class ChatCompletionStreamResponseChoice (line 422) | class ChatCompletionStreamResponseChoice(BaseModel):
  class ChatCompletionResponse (line 429) | class ChatCompletionResponse(BaseModel):
  class ChatCompletionStreamResponse (line 443) | class ChatCompletionStreamResponse(BaseModel):
  function openai_api_get_unsupported_fields (line 460) | def openai_api_get_unsupported_fields(

FILE: python/mlc_llm/quantization/awq_quantization.py
  function _make_divisible (line 15) | def _make_divisible(c, divisor):  # pylint: disable=invalid-name
  function _calculate_zeros_width (line 19) | def _calculate_zeros_width(in_features, group_size=128, pack_num=8):
  class AWQQuantize (line 35) | class AWQQuantize:  # pylint: disable=too-many-instance-attributes
    method __post_init__ (line 53) | def __post_init__(self):
    method quantize_model (line 70) | def quantize_model(
    method _dequantize (line 133) | def _dequantize(
  class AWQQuantizeLinear (line 175) | class AWQQuantizeLinear(nn.Module):  # pylint: disable=too-many-instance...
    method __init__ (line 178) | def __init__(  # pylint: disable=too-many-arguments
    method from_linear (line 213) | def from_linear(linear: nn.Linear, config: AWQQuantize) -> "AWQQuantiz...
    method forward (line 238) | def forward(self, x: nn.Tensor) -> nn.Tensor:  # pylint: disable=inval...
    method to (line 271) | def to(self, dtype: Optional[str] = None) -> None:

FILE: python/mlc_llm/quantization/block_scale_quantization.py
  class BlockScaleQuantize (line 23) | class BlockScaleQuantize:  # pylint: disable=too-many-instance-attributes
    method __post_init__ (line 34) | def __post_init__(self):
    method quantize_model (line 47) | def quantize_model(
  class BlockScaleQuantizeLinear (line 181) | class BlockScaleQuantizeLinear(nn.Module):  # pylint: disable=too-many-i...
    method __init__ (line 184) | def __init__(  # pylint: disable=too-many-arguments
    method from_linear (line 214) | def from_linear(
    method forward (line 259) | def forward(self, x: nn.Tensor) -> nn.Tensor:
    method to (line 323) | def to(self, dtype: Optional[str] = None) -> None:
  class BlockScaleQuantizeLinearStaticActivation (line 334) | class BlockScaleQuantizeLinearStaticActivation(BlockScaleQuantizeLinear):
    method __init__ (line 337) | def __init__(  # pylint: disable=too-many-arguments
    method from_linear (line 360) | def from_linear(
    method forward (line 410) | def forward(self, x: nn.Tensor) -> nn.Tensor:
  class BlockScaleQuantizeMixtralExperts (line 460) | class BlockScaleQuantizeMixtralExperts(nn.Module):  # pylint: disable=to...
    method __init__ (line 463) | def __init__(  # pylint: disable=too-many-arguments
    method from_mixtral_experts (line 488) | def from_mixtral_experts(
    method forward (line 533) | def forward(self, x: nn.Tensor, indptr: nn.Tensor) -> nn.Tensor:
    method to (line 594) | def to(self, dtype: Optional[str] = None) -> None:
  function rowwise_group_quant_fp8 (line 603) | def rowwise_group_quant_fp8(  # pylint: disable=too-many-arguments
  function static_activation_group_quant_fp8 (line 701) | def static_activation_group_quant_fp8(
  function broadcast_activation_scale (line 735) | def broadcast_activation_scale(
  function dequantize_float8_groupwise_scaled_gemv (line 752) | def dequantize_float8_groupwise_scaled_gemv(

FILE: python/mlc_llm/quantization/fp8_quantization.py
  class FP8PerTensorQuantizeMixtralExperts (line 14) | class FP8PerTensorQuantizeMixtralExperts(
    method __init__ (line 19) | def __init__(
    method from_mixtral_experts (line 32) | def from_mixtral_experts(
    method forward (line 72) | def forward(self, x: nn.Tensor, indptr: nn.Tensor) -> nn.Tensor:  # py...

FILE: python/mlc_llm/quantization/ft_quantization.py
  class FTQuantize (line 29) | class FTQuantize:  # pylint: disable=too-many-instance-attributes
    method fallback_group_quantize (line 42) | def fallback_group_quantize(self) -> GroupQuantize:
    method __post_init__ (line 61) | def __post_init__(self):
    method quantize_model (line 76) | def quantize_model(
    method quantize_weight (line 171) | def quantize_weight(self, weight: Tensor) -> List[Tensor]:
    method _quantize (line 256) | def _quantize(  # pylint: disable=too-many-locals
  class FTQuantizeLinear (line 325) | class FTQuantizeLinear(nn.Module):  # pylint: disable=too-many-instance-...
    method __init__ (line 328) | def __init__(  # pylint: disable=too-many-arguments
    method from_linear (line 357) | def from_linear(src: nn.Linear, config: FTQuantize) -> "FTQuantizeLine...
    method forward (line 385) | def forward(self, x: nn.Tensor) -> nn.Tensor:  # pylint: disable=inval...
    method to (line 403) | def to(self, dtype: Optional[str] = None) -> None:

FILE: python/mlc_llm/quantization/group_quantization.py
  class GroupQuantize (line 28) | class GroupQuantize:  # pylint: disable=too-many-instance-attributes
    method __post_init__ (line 46) | def __post_init__(self):
    method quantize_model (line 65) | def quantize_model(
    method _dequantize (line 155) | def _dequantize(
    method quantize_weight (line 188) | def quantize_weight(
    method _quantize (line 237) | def _quantize(  # pylint: disable=too-many-locals
  class GroupQuantizeLinear (line 311) | class GroupQuantizeLinear(nn.Module):  # pylint: disable=too-many-instan...
    method __init__ (line 314) | def __init__(  # pylint: disable=too-many-arguments
    method from_linear (line 358) | def from_linear(src: nn.Linear, config: GroupQuantize) -> "GroupQuanti...
    method forward (line 392) | def forward(self, x: nn.Tensor) -> nn.Tensor:  # pylint: disable=inval...
    method to (line 441) | def to(self, dtype: Optional[str] = None) -> None:
  class GroupQuantizeEmbedding (line 454) | class GroupQuantizeEmbedding(nn.Module):
    method __init__ (line 457) | def __init__(self, num: Union[int, tir.Var], dim: int, config: GroupQu...
    method from_embedding (line 468) | def from_embedding(embedding: nn.Embedding, config: GroupQuantize) -> ...
    method forward (line 488) | def forward(self, x: nn.Tensor):  # pylint: disable=invalid-name
    method lm_head_forward (line 526) | def lm_head_forward(self, x: nn.Tensor):
  class GroupQuantizeMixtralExperts (line 561) | class GroupQuantizeMixtralExperts(nn.Module):  # pylint: disable=too-man...
    method __init__ (line 564) | def __init__(
    method from_mixtral_experts (line 590) | def from_mixtral_experts(
    method forward (line 621) | def forward(self, x: nn.Tensor, indptr: nn.Tensor) -> nn.Tensor:  # py...

FILE: python/mlc_llm/quantization/model_quantization.py
  function make_quantization_functions (line 20) | def make_quantization_functions(  # pylint: disable=too-many-arguments, ...
  function make_awq_quant (line 139) | def make_awq_quant(

FILE: python/mlc_llm/quantization/no_quantization.py
  class NoQuantize (line 7) | class NoQuantize:  # pylint: disable=too-many-instance-attributes
    method __post_init__ (line 14) | def __post_init__(self):

FILE: python/mlc_llm/quantization/per_tensor_quantization.py
  class PerTensorQuantize (line 30) | class PerTensorQuantize:  # pylint: disable=too-many-instance-attributes
    method __post_init__ (line 53) | def __post_init__(self):
    method quantize_model (line 61) | def quantize_model(
    method quantize_weight (line 169) | def quantize_weight(self, weight) -> List[Tensor]:
    method quantize_float8 (line 221) | def quantize_float8(  # pylint: disable=too-many-locals
    method _dequantize (line 286) | def _dequantize(
    method dequantize_float8 (line 301) | def dequantize_float8(
  class PerTensorQuantizeLinear (line 326) | class PerTensorQuantizeLinear(nn.Module):  # pylint: disable=too-many-in...
    method __init__ (line 329) | def __init__(  # pylint: disable=too-many-arguments
    method from_linear (line 363) | def from_linear(
    method forward (line 402) | def forward(self, x: nn.Tensor) -> nn.Tensor:  # pylint: disable=inval...
    method to (line 491) | def to(self, dtype: Optional[str] = None) -> None:
  class PerTensorQuantizeEmbedding (line 505) | class PerTensorQuantizeEmbedding(nn.Module):
    method __init__ (line 508) | def __init__(self, num: Union[int, tir.Var], dim: int, config: PerTens...
    method from_embedding (line 521) | def from_embedding(
    method forward (line 543) | def forward(self, x: nn.Tensor):  # pylint: disable=invalid-name
    method lm_head_forward (line 576) | def lm_head_forward(self, x: nn.Tensor):
  class PerTensorQuantizeMixtralExperts (line 606) | class PerTensorQuantizeMixtralExperts(nn.Module):  # pylint: disable=too...
    method __init__ (line 611) | def __init__(
    method from_mixtral_experts (line 641) | def from_mixtral_experts(
    method forward (line 675) | def forward(self, x: nn.Tensor, indptr: nn.Tensor) -> nn.Tensor:  # py...

FILE: python/mlc_llm/quantization/utils.py
  function convert_uint_to_float (line 14) | def convert_uint_to_float(  # pylint: disable=too-many-arguments
  function is_final_fc (line 50) | def is_final_fc(name: str) -> bool:
  function is_moe_gate (line 56) | def is_moe_gate(name: str, node: nn.Linear) -> bool:
  function compile_quantize_func (line 61) | def compile_quantize_func(mod: IRModule, device) -> Callable:
  function apply_sharding (line 86) | def apply_sharding(shard_strategy, name: str, weight: nn.Parameter):
  function convert_uint_packed_fp8_to_float (line 98) | def convert_uint_packed_fp8_to_float(  # pylint: disable=too-many-arguments
  function pack_weight (line 137) | def pack_weight(

FILE: python/mlc_llm/router/router.py
  class Router (line 17) | class Router:  # pylint: disable=too-many-instance-attributes
    method __init__ (line 20) | def __init__(
    method terminate (line 106) | def terminate(self):
    method handle_completion (line 111) | async def handle_completion(
    method translate_request (line 133) | async def translate_request(
    method _pick_endpoint (line 150) | def _pick_endpoint(self, endpoint_ids: Iterable[int]) -> int:
    method _handle_completion_round_robin (line 161) | async def _handle_completion_round_robin(
    method _handle_completion_disagg (line 221) | async def _handle_completion_disagg(  # pylint: disable=too-many-locals
    method send_prepare_receive (line 312) | async def send_prepare_receive(
    method send_remote_send (line 341) | async def send_remote_send(
    method send_start_generate (line 360) | async def send_start_generate(

FILE: python/mlc_llm/serve/config.py
  class EngineConfig (line 9) | class EngineConfig:  # pylint: disable=too-many-instance-attributes
    method asjson (line 162) | def asjson(self) -> str:
    method from_json (line 167) | def from_json(json_str: str) -> "EngineConfig":

FILE: python/mlc_llm/serve/data.py
  class Data (line 14) | class Data(Object):  # pylint: disable=too-few-public-methods
    method __init__ (line 17) | def __init__(self):  # pylint: disable=super-init-not-called
  class TextData (line 22) | class TextData(Data):
    method __init__ (line 31) | def __init__(self, text: str):
    method text (line 35) | def text(self) -> str:
    method __str__ (line 39) | def __str__(self) -> str:
  class TokenData (line 44) | class TokenData(Data):  # pylint: disable=too-few-public-methods
    method __init__ (line 53) | def __init__(self, token_ids: List[int]):
    method token_ids (line 57) | def token_ids(self) -> List[int]:
  class ImageData (line 64) | class ImageData(Data):
    method __init__ (line 73) | def __init__(self, image: Tensor, embed_size: int):
    method image (line 78) | def image(self) -> Tensor:
    method __len__ (line 82) | def __len__(self):
    method from_url (line 87) | def from_url(url: str, config: Dict) -> "ImageData":
    method get_embed_size (line 120) | def get_embed_size(config: Dict) -> int:
    method get_input_size (line 128) | def get_input_size(config: Dict) -> int:
  class SingleRequestStreamOutput (line 135) | class SingleRequestStreamOutput:
  class RequestStreamOutput (line 161) | class RequestStreamOutput(Object):  # pylint: disable=too-few-public-met...
    method unpack (line 178) | def unpack(self) -> Tuple[str, List[SingleRequestStreamOutput]]:

FILE: python/mlc_llm/serve/embedding_engine.py
  class AsyncEmbeddingEngine (line 19) | class AsyncEmbeddingEngine:  # pylint: disable=too-many-instance-attributes
    method __init__ (line 43) | def __init__(  # pylint: disable=too-many-branches
    method _init_encoder (line 89) | def _init_encoder(self, model: str) -> None:
    method _init_decoder (line 115) | def _init_decoder(self, model: str) -> None:
    method embed (line 164) | def embed(self, inputs: List[str]) -> Tuple[List[List[float]], int]:
    method async_embed (line 183) | async def async_embed(self, inputs: List[str]) -> Tuple[List[List[floa...
    method _embed_encoder (line 203) | def _embed_encoder(  # pylint: disable=too-many-locals
    method _embed_decoder (line 275) | def _embed_decoder(self, inputs: List[str]) -> Tuple[List[List[float]]...
    method _build_sub_batches (line 332) | def _build_sub_batches(
    method _batch_embed_decoder (line 362) | def _batch_embed_decoder(  # pylint: disable=too-many-arguments,too-ma...
    method _sequential_embed_decoder (line 422) | def _sequential_embed_decoder(  # pylint: disable=too-many-arguments,t...
    method terminate (line 479) | def terminate(self) -> None:
    method __del__ (line 486) | def __del__(self):

FILE: python/mlc_llm/serve/engine.py
  class AsyncChat (line 37) | class AsyncChat:  # pylint: disable=too-few-public-methods
    method __init__ (line 40) | def __init__(self, engine: weakref.ReferenceType) -> None:
  class Chat (line 45) | class Chat:  # pylint: disable=too-few-public-methods
    method __init__ (line 48) | def __init__(self, engine: weakref.ReferenceType) -> None:
  class AsyncChatCompletion (line 53) | class AsyncChatCompletion:  # pylint: disable=too-few-public-methods
    method __init__ (line 61) | def __init__(self, engine: weakref.ReferenceType) -> None:
    method create (line 65) | async def create(  # pylint: disable=too-many-arguments,too-many-locals
    method create (line 120) | async def create(  # pylint: disable=too-many-arguments,too-many-locals
    method create (line 174) | async def create(  # pylint: disable=too-many-arguments,too-many-locals
  class ChatCompletion (line 250) | class ChatCompletion:  # pylint: disable=too-few-public-methods
    method __init__ (line 258) | def __init__(self, engine: weakref.ReferenceType) -> None:
    method create (line 262) | def create(  # pylint: disable=too-many-arguments,too-many-locals
    method create (line 317) | def create(  # pylint: disable=too-many-arguments,too-many-locals
    method create (line 369) | def create(  # pylint: disable=too-many-arguments,too-many-locals
  class AsyncCompletion (line 445) | class AsyncCompletion:  # pylint: disable=too-few-public-methods
    method __init__ (line 453) | def __init__(self, engine: weakref.ReferenceType) -> None:
    method create (line 457) | async def create(  # pylint: disable=too-many-arguments,too-many-locals
    method create (line 512) | async def create(  # pylint: disable=too-many-arguments,too-many-locals
    method create (line 564) | async def create(  # pylint: disable=too-many-arguments,too-many-locals
  class Completion (line 640) | class Completion:  # pylint: disable=too-few-public-methods
    method __init__ (line 648) | def __init__(self, engine: weakref.ReferenceType) -> None:
    method create (line 652) | def create(  # pylint: disable=too-many-arguments,too-many-locals
    method create (line 707) | def create(  # pylint: disable=too-many-arguments,too-many-locals
    method create (line 759) | def create(  # pylint: disable=too-many-arguments,too-many-locals
  class AsyncMLCEngine (line 835) | class AsyncMLCEngine(engine_base.MLCEngineBase):
    method __init__ (line 885) | def __init__(  # pylint: disable=too-many-arguments,too-many-locals
    method abort (line 907) | async def abort(self, request_id: str) -> None:
    method metrics (line 917) | async def metrics(self) -> engine_base.EngineMetrics:
    method _chat_completion (line 928) | async def _chat_completion(  # pylint: disable=too-many-arguments,too-...
    method _completion (line 1072) | async def _completion(  # pylint: disable=too-many-arguments,too-many-...
    method _handle_chat_completion (line 1190) | async def _handle_chat_completion(
    method _handle_completion (line 1256) | async def _handle_completion(
    method _generate (line 1328) | async def _generate(
    method _abort (line 1404) | def _abort(self, request_id: str):
  class MLCEngine (line 1410) | class MLCEngine(engine_base.MLCEngineBase):
    method __init__ (line 1460) | def __init__(  # pylint: disable=too-many-arguments,too-many-locals
    method abort (line 1482) | def abort(self, request_id: str) -> None:
    method metrics (line 1492) | def metrics(self) -> engine_base.EngineMetrics:
    method _chat_completion (line 1503) | def _chat_completion(  # pylint: disable=too-many-arguments,too-many-l...
    method _completion (line 1637) | def _completion(  # pylint: disable=too-many-arguments,too-many-locals
    method _handle_chat_completion (line 1754) | def _handle_chat_completion(
    method _handle_completion (line 1802) | def _handle_completion(
    method _generate (line 1856) | def _generate(  # pylint: disable=too-many-locals
    method _request_stream_callback_impl (line 1928) | def _request_stream_callback_impl(

FILE: python/mlc_llm/serve/engine_base.py
  class ModelInfo (line 35) | class ModelInfo:
  function _check_engine_config (line 55) | def _check_engine_config(
  function _parse_models (line 95) | def _parse_models(
  function _process_model_args (line 112) | def _process_model_args(
  function _print_engine_mode_logging_msg (line 177) | def _print_engine_mode_logging_msg(
  class EngineMetrics (line 218) | class EngineMetrics:
    method __init__ (line 223) | def __init__(self, metrics):
    method __str__ (line 226) | def __str__(self):
    method __repr__ (line 229) | def __repr__(self):
    method __getitem__ (line 232) | def __getitem__(self, key):
    method prometheus_text (line 235) | def prometheus_text(self) -> str:
  function _query_engine_metrics (line 266) | def _query_engine_metrics(engine):
  function _async_query_engine_metrics (line 281) | async def _async_query_engine_metrics(engine):
  class CallbackStreamOutput (line 302) | class CallbackStreamOutput:
  class AsyncRequestStream (line 328) | class AsyncRequestStream:
    method __init__ (line 351) | def __init__(self) -> None:
    method push (line 355) | def push(self, item_or_exception: Union[List[CallbackStreamOutput], Ex...
    method finish (line 368) | def finish(self) -> None:
    method __aiter__ (line 373) | def __aiter__(self):
    method __anext__ (line 376) | async def __anext__(self) -> List[CallbackStreamOutput]:
  class EngineState (line 385) | class EngineState:
    method __init__ (line 413) | def __init__(self, enable_tracing: bool) -> None:
    method record_event (line 418) | def record_event(self, request_id: str, event: str) -> None:
    method get_request_stream_callback (line 439) | def get_request_stream_callback(
    method async_lazy_init_event_loop (line 462) | def async_lazy_init_event_loop(self) -> None:
    method _async_request_stream_callback (line 469) | def _async_request_stream_callback(self, delta_outputs: List[data.Requ...
    method _async_request_stream_callback_impl (line 487) | def _async_request_stream_callback_impl(
    method _sync_request_stream_callback (line 543) | def _sync_request_stream_callback(self, delta_outputs: List[data.Reque...
  class MLCEngineBase (line 551) | class MLCEngineBase:  # pylint: disable=too-many-instance-attributes,too...
    method __init__ (line 568) | def __init__(  # pylint: disable=too-many-arguments,too-many-locals
    method __del__ (line 654) | def __del__(self):
    method terminate (line 658) | def terminate(self):
    method _debug_call_func_on_all_worker (line 671) | def _debug_call_func_on_all_worker(
    method reset (line 677) | def reset(self):
  function process_chat_completion_request (line 682) | def process_chat_completion_request(  # pylint: disable=too-many-arguments
  function process_chat_completion_stream_output (line 778) | def process_chat_completion_stream_output(  # pylint: disable=too-many-a...
  function process_completion_request (line 887) | def process_completion_request(  # pylint: disable=too-many-arguments
  function get_logprobs_from_delta (line 969) | def get_logprobs_from_delta(
  function process_completion_stream_output (line 1006) | def process_completion_stream_output(  # pylint: disable=too-many-arguments
  function create_completion_suffix_response (line 1104) | def create_completion_suffix_response(
  function convert_function_str_to_json (line 1151) | def convert_function_str_to_json(stringified_calls: str) -> List[Union[D...
  function process_function_call_output (line 1176) | def process_function_call_output(
  function wrap_chat_completion_response (line 1212) | def wrap_chat_completion_response(  # pylint: disable=too-many-arguments
  function wrap_completion_response (line 1252) | def wrap_completion_response(  # pylint: disable=too-many-arguments

FILE: python/mlc_llm/serve/engine_utils.py
  function get_unsupported_fields (line 15) | def get_unsupported_fields(request: RequestProtocol) -> List[str]:
  function openai_api_get_generation_config (line 30) | def openai_api_get_generation_config(request: RequestProtocol) -> Dict[s...
  function get_generation_config (line 63) | def get_generation_config(
  function random_uuid (line 96) | def random_uuid() -> str:
  function check_unsupported_fields (line 101) | def check_unsupported_fields(request: RequestProtocol) -> None:
  function check_and_get_prompts_length (line 111) | def check_and_get_prompts_length(
  function process_prompts (line 129) | def process_prompts(
  function convert_prompts_to_data (line 170) | def convert_prompts_to_data(
  class ErrorCleanupScope (line 185) | class ErrorCleanupScope:
    method __init__ (line 253) | def __init__(self, cleanup: Callable):
    method __enter__ (line 256) | def __enter__(self):
    method __exit__ (line 259) | def __exit__(self, exc_type, exc_value, traceback) -> None:
  function load_embedding_params (line 268) | def load_embedding_params(model_weight_path, device, model_metadata) -> ...
  function get_embedding_metadata (line 293) | def get_embedding_metadata(config: Dict[str, Any]) -> Optional[Dict[str,...
  function detect_embedding_model_type (line 311) | def detect_embedding_model_type(mod) -> Literal["encoder", "decoder"]:

FILE: python/mlc_llm/serve/entrypoints/debug_entrypoints.py
  function debug_dump_event_trace (line 17) | async def debug_dump_event_trace(request: fastapi.Request):
  function debug_cuda_profiler_start (line 61) | async def debug_cuda_profiler_start(_request: fastapi.Request):
  function debug_cuda_profiler_stop (line 74) | async def debug_cuda_profiler_stop(_request: fastapi.Request):
  function debug_dump_engine_metrics (line 87) | async def debug_dump_engine_metrics(request: fastapi.Request):
  function debug_reset_engine_stats (line 110) | async def debug_reset_engine_stats(request: fastapi.Request):

FILE: python/mlc_llm/serve/entrypoints/metrics_entrypoints.py
  function metrics (line 14) | async def metrics(_request: fastapi.Request):

FILE: python/mlc_llm/serve/entrypoints/microserving_entrypoints.py
  function prep_recv (line 23) | async def prep_recv(request: PrepRecvRequest, raw_request: fastapi.Reque...
  function remote_send (line 49) | async def remote_send(request: RemoteSendRequest, raw_request: fastapi.R...
  function start_generate (line 67) | async def start_generate(request: StartGenerateRequest, raw_request: fas...

FILE: python/mlc_llm/serve/entrypoints/openai_entrypoints.py
  function verify_api_key (line 29) | def verify_api_key(request: fastapi.Request):
  function request_embedding (line 46) | async def request_embedding(request: EmbeddingRequest):
  function request_models (line 125) | async def request_models() -> ListResponse:
  function request_completion (line 137) | async def request_completion(request: CompletionRequest, raw_request: fa...
  function request_chat_completion (line 241) | async def request_chat_completion(

FILE: python/mlc_llm/serve/event_trace_recorder.py
  class EventTraceRecorder (line 10) | class EventTraceRecorder(Object):
    method __init__ (line 13) | def __init__(self) -> None:  # pylint: disable=super-init-not-called
    method add_event (line 19) | def add_event(self, request_id: str, event: str) -> None:
    method dump_json (line 39) | def dump_json(self) -> str:

FILE: python/mlc_llm/serve/radix_tree.py
  class PagedRadixTree (line 12) | class PagedRadixTree(Object):
    method __init__ (line 15) | def __init__(self):  # pylint: disable=super-init-not-called
    method match (line 21) | def match(self, tokens: Union[ShapeTuple, List, Tuple]) -> Tuple[int, ...
    method add (line 44) | def add(self, seq_id: int) -> None:
    method remove (line 55) | def remove(self, seq_id: int) -> None:
    method extend (line 66) | def extend(self, seq_id: int, tokens: Union[ShapeTuple, List, Tuple]) ...
    method rollback (line 81) | def rollback(self, seq_id: int, num_tokens: int) -> None:
    method fork (line 94) | def fork(self, seq_id: int, parent_seq_id: int, forked_offset: int) ->...
    method get (line 112) | def get(self, seq_id: int) -> ShapeTuple:
    method get_length (line 128) | def get_length(self, seq_id: int) -> int:
    method free_capacity (line 144) | def free_capacity(self) -> int:

FILE: python/mlc_llm/serve/request.py
  class Request (line 15) | class Request(Object):
    method inputs (line 27) | def inputs(self) -> List[Data]:
    method generation_config (line 32) | def generation_config(self) -> GenerationConfig:

FILE: python/mlc_llm/serve/server/popen_server.py
  class PopenServer (line 18) | class PopenServer:  # pylint: disable=too-many-instance-attributes
    method __init__ (line 25) | def __init__(  # pylint: disable=too-many-arguments
    method start (line 59) | def start(  # pylint: disable=too-many-branches,too-many-statements
    method terminate (line 163) | def terminate(self) -> None:
    method __enter__ (line 197) | def __enter__(self):
    method __exit__ (line 202) | def __exit__(self, exc_type, exc_val, exc_tb):

FILE: python/mlc_llm/serve/server/server_context.py
  class ServerContext (line 11) | class ServerContext:
    method __init__ (line 19) | def __init__(self) -> None:
    method __enter__ (line 24) | def __enter__(self):
    method __exit__ (line 30) | def __exit__(self, exc_type, exc_value, traceback):
    method current (line 40) | def current():
    method add_model (line 44) | def add_model(self, hosted_model: str, engine: AsyncMLCEngine) -> None:
    method get_engine (line 50) | def get_engine(self, model: Optional[str]) -> Optional[AsyncMLCEngine]:
    method get_model_list (line 57) | def get_model_list(self) -> List[str]:
    method add_embedding_engine (line 61) | def add_embedding_engine(self, hosted_model: str, engine: "AsyncEmbedd...
    method get_embedding_engine (line 67) | def get_embedding_engine(self, model: Optional[str]) -> Optional["Asyn...

FILE: python/mlc_llm/serve/sync_engine.py
  function _create_tvm_module (line 35) | def _create_tvm_module(
  class SyncMLCEngine (line 45) | class SyncMLCEngine:
    method __init__ (line 85) | def __init__(  # pylint: disable=too-many-arguments,too-many-locals
    method generate (line 156) | def generate(  # pylint: disable=too-many-locals
    method create_request (line 290) | def create_request(
    method add_request (line 320) | def add_request(self, request: Request) -> None:
    method abort_request (line 330) | def abort_request(self, request_id: str) -> None:
    method step (line 340) | def step(self) -> None:
    method reset (line 354) | def reset(self) -> None:
    method metrics (line 358) | def metrics(self) -> EngineMetrics:

FILE: python/mlc_llm/support/argparse.py
  class ArgumentParser (line 7) | class ArgumentParser(argparse.ArgumentParser):
    method error (line 10) | def error(self, message):

FILE: python/mlc_llm/support/auto_config.py
  function detect_mlc_chat_config (line 21) | def detect_mlc_chat_config(mlc_chat_config: str) -> Path:
  function detect_config (line 74) | def detect_config(config: str) -> Path:
  function detect_model_type (line 120) | def detect_model_type(model_type: str, config: Path) -> "Model":
  function detect_quantization (line 160) | def detect_quantization(quantization_arg: str, config: Path) -> "Quantiz...

FILE: python/mlc_llm/support/auto_device.py
  function detect_device (line 24) | def detect_device(device_hint: str) -> Optional[Device]:
  function device2str (line 47) | def device2str(device: Device) -> str:
  function _device_exists (line 52) | def _device_exists(device: Device) -> bool:

FILE: python/mlc_llm/support/auto_target.py
  function detect_target_and_host (line 31) | def detect_target_and_host(target_hint: str, host_hint: str = "auto") ->...
  function _detect_target_gpu (line 64) | def _detect_target_gpu(hint: str) -> Tuple[Target, BuildFunc]:
  function _detect_target_host (line 105) | def _detect_target_host(hint: str) -> Target:
  function _is_device (line 118) | def _is_device(device: str):
  function _add_system_lib_prefix (line 126) | def _add_system_lib_prefix(mod: IRModule, prefix: str, is_system_lib: bo...
  function _build_metal_x86_64 (line 142) | def _build_metal_x86_64():
  function _build_iphone (line 161) | def _build_iphone():
  function _build_android (line 186) | def _build_android():
  function _build_android_so (line 209) | def _build_android_so():
  function _build_webgpu (line 232) | def _build_webgpu():
  function _build_mali (line 272) | def _build_mali():
  function _build_default (line 291) | def _build_default():
  function detect_cuda_arch_list (line 314) | def detect_cuda_arch_list(target: Target) -> List[int]:
  function _register_cuda_hook (line 332) | def _register_cuda_hook(target: Target):
  function detect_system_lib_prefix (line 365) | def detect_system_lib_prefix(

FILE: python/mlc_llm/support/auto_weight.py
  function detect_weight (line 16) | def detect_weight(
  function _guess_weight_format (line 93) | def _guess_weight_format(weight_path: Path) -> Tuple[Path, str]:
  function _check_pytorch (line 118) | def _check_pytorch(weight_path: Path) -> Optional[Path]:
  function _check_safetensor (line 141) | def _check_safetensor(weight_path: Path) -> Optional[Path]:

FILE: python/mlc_llm/support/config.py
  class ConfigBase (line 28) | class ConfigBase:
    method from_dict (line 35) | def from_dict(cls: Type[ConfigClass], source: Dict[str, Any]) -> Confi...
    method from_file (line 54) | def from_file(cls: Type[ConfigClass], source: Path) -> ConfigClass:
    method asdict (line 73) | def asdict(self):
  class ConfigOverrideBase (line 86) | class ConfigOverrideBase:
    method apply (line 91) | def apply(self, config):

FILE: python/mlc_llm/support/constants.py
  function _check (line 11) | def _check():
  function _get_cache_dir (line 26) | def _get_cache_dir() -> Path:
  function _get_dso_suffix (line 49) | def _get_dso_suffix() -> str:
  function _get_test_model_path (line 59) | def _get_test_model_path() -> List[Path]:
  function _get_read_only_weight_caches (line 74) | def _get_read_only_weight_caches() -> List[Path]:

FILE: python/mlc_llm/support/convert_tiktoken.py
  function bpe (line 13) | def bpe(
  function generate_vocab_and_merges (line 33) | def generate_vocab_and_merges(encoder, mergeable_ranks):
  function convert_tiktoken (line 64) | def convert_tiktoken(model_path, output_dir, context_window_size=None):

FILE: python/mlc_llm/support/download_cache.py
  function log_download_cache_policy (line 27) | def log_download_cache_policy():
  function _ensure_directory_not_exist (line 36) | def _ensure_directory_not_exist(path: Path, force_redo: bool) -> None:
  function git_clone (line 47) | def git_clone(url: str, destination: Path, ignore_lfs: bool) -> None:
  function git_lfs_pull (line 76) | def git_lfs_pull(repo_dir: Path, ignore_extensions: Optional[List[str]] ...
  function download_file (line 102) | def download_file(
  function download_and_cache_mlc_weights (line 127) | def download_and_cache_mlc_weights(  # pylint: disable=too-many-locals
  function get_or_download_model (line 202) | def get_or_download_model(model: str) -> Path:

FILE: python/mlc_llm/support/logging.py
  function enable_logging (line 10) | def enable_logging():
  function getLogger (line 22) | def getLogger(name: str):  # pylint: disable=invalid-name

FILE: python/mlc_llm/support/max_thread_check.py
  function get_max_num_threads_per_block (line 6) | def get_max_num_threads_per_block(target: Target) -> int:
  function check_thread_limits (line 18) | def check_thread_limits(target: Target, bdx: int, bdy: int, bdz: int, gd...

FILE: python/mlc_llm/support/preshard.py
  function _sharded_param_name (line 15) | def _sharded_param_name(param_name, worker_id):
  function _create_shard_func (line 19) | def _create_shard_func(
  function _compile_shard_funcs (line 55) | def _compile_shard_funcs(mod: IRModule, device: Device):
  function apply_preshard (line 71) | def apply_preshard(

FILE: python/mlc_llm/support/random.py
  function set_global_random_seed (line 6) | def set_global_random_seed(seed):

FILE: python/mlc_llm/support/style.py
  class Styles (line 6) | class Styles(Enum):
  function red (line 25) | def red(text: str) -> str:
  function green (line 30) | def green(text: str) -> str:
  function yellow (line 35) | def yellow(text: str) -> str:
  function blue (line 40) | def blue(text: str) -> str:
  function purple (line 45) | def purple(text: str) -> str:
  function cyan (line 50) | def cyan(text: str) -> str:
  function bold (line 55) | def bold(text: str) -> str:
  function underline (line 60) | def underline(text: str) -> str:

FILE: python/mlc_llm/support/tensor_parallel.py
  class ShardSingleDim (line 12) | class ShardSingleDim:
    method gen_tir (line 36) | def gen_tir(self, shards: int, weight: nn.Tensor) -> tir.PrimFunc:
    method gen_shard_info (line 83) | def gen_shard_info(self, shards: int, weight: nn.Tensor) -> Dict[str, ...
    method _compute_in_shape (line 92) | def _compute_in_shape(self, shards: int, weight: nn.Tensor) -> List[int]:
  function shard_bias (line 99) | def shard_bias(linear: nn.Linear, tensor_parallel_shards: int):

FILE: python/mlc_llm/support/tqdm.py
  function _redirect_print (line 12) | def _redirect_print():
  function redirect (line 31) | def redirect():

FILE: python/mlc_llm/testing/debug_chat.py
  function _extract_metadata (line 27) | def _extract_metadata(mod: Module):
  function _load_params (line 31) | def _load_params(
  function _get_tvm_module (line 44) | def _get_tvm_module(
  class DefaultDebugInstrument (line 59) | class DefaultDebugInstrument:
    method __init__ (line 68) | def __init__(self, debug_out: Path):
    method reset (line 82) | def reset(self, debug_out: Path):
    method __call__ (line 96) | def __call__(self, func, name, before_run, ret_val, *args):
  class DebugChat (line 145) | class DebugChat:  # pylint: disable=too-many-instance-attributes, too-fe...
    method __init__ (line 165) | def __init__(  # pylint: disable=too-many-arguments
    method _preprocess_prompts (line 290) | def _preprocess_prompts(
    method _embed (line 323) | def _embed(
    method _prefill (line 358) | def _prefill(self, embedding: tvm.runtime.Tensor, input_len: int):
    method _decode (line 396) | def _decode(self, token: int, kv_caches: Object):
    method _softmax_with_temperature (line 403) | def _softmax_with_temperature(self, logits: np.ndarray, temperature: f...
    method _apply_presence_and_freq_penalty (line 412) | def _apply_presence_and_freq_penalty(
    method _sample_token_from_logits (line 418) | def _sample_token_from_logits(
    method generate (line 440) | def generate(
  function main (line 490) | def main():

FILE: python/mlc_llm/testing/debug_compare.py
  function _print_as_table (line 16) | def _print_as_table(sorted_list):
  class LibCompare (line 40) | class LibCompare(LibCompareVMInstrument):
    method __init__ (line 66) | def __init__(  # pylint: disable=too-many-arguments, unused-argument
    method reset (line 85) | def reset(self, debug_out: Path):  # pylint: disable=unused-argument
    method skip_instrument (line 109) | def skip_instrument(self, func, name, before_run, ret_val, *args):
    method compare (line 124) | def compare(
  function get_instrument (line 146) | def get_instrument(args):
  function main (line 182) | def main():

FILE: python/mlc_llm/testing/pytest_utils.py
  function require_test_model (line 13) | def require_test_model(*models: str):
  function require_test_tokenizers (line 83) | def require_test_tokenizers(*models: str):

FILE: python/mlc_llm/tokenizers/streamer.py
  class TextStreamer (line 13) | class TextStreamer(Object):
    method __init__ (line 18) | def __init__(self, tokenizer: Tokenizer) -> None:  # pylint: disable=s...
    method put (line 25) | def put(self, delta_tokens: Union[List[int], ShapeTuple]) -> str:
    method finish (line 47) | def finish(self) -> str:
  class StopStrHandler (line 53) | class StopStrHandler(Object):
    method __init__ (line 58) | def __init__(  # pylint: disable=super-init-not-called
    method put (line 67) | def put(self, token_id: int) -> List[int]:
    method finish (line 77) | def finish(self) -> List[int]:
    method stop_triggered (line 84) | def stop_triggered(self) -> bool:

FILE: python/mlc_llm/tokenizers/tokenizers.py
  class TokenizerInfo (line 19) | class TokenizerInfo:  # pylint: disable=too-many-instance-attributes
    method asjson (line 48) | def asjson(self) -> str:
    method from_json (line 53) | def from_json(json_str: str) -> "TokenizerInfo":
  class Tokenizer (line 59) | class Tokenizer(Object):
    method __init__ (line 62) | def __init__(self, tokenizer_path: str) -> None:  # pylint: disable=su...
    method encode (line 69) | def encode(self, text: str) -> List[int]:
    method encode_batch (line 84) | def encode_batch(self, texts: List[str]) -> List[List[int]]:
    method decode (line 99) | def decode(self, token_ids: List[int]) -> str:
    method detect_tokenizer_info (line 117) | def detect_tokenizer_info(tokenizer_path: str) -> TokenizerInfo:

FILE: python/setup.py
  function get_lib_path (line 14) | def get_lib_path():
  function git_describe_version (line 35) | def git_describe_version(original_version):
  function parse_requirements (line 50) | def parse_requirements(filename: os.PathLike):
  class BinaryDistribution (line 76) | class BinaryDistribution(Distribution):
    method has_ext_modules (line 79) | def has_ext_modules(self):
    method is_pure (line 83) | def is_pure(self):
  function main (line 88) | def main():

FILE: scripts/check_url_validity.py
  function find_urls_in_file (line 8) | def find_urls_in_file(file_path):
  function main (line 22) | def main():

FILE: tests/cpp/conv_template_unittest.cc
  type mlc (line 5) | namespace mlc {
    type llm (line 6) | namespace llm {
      type json_ffi (line 7) | namespace json_ffi {
        function _TestConvTemplateLoadJSONTextContent (line 9) | void _TestConvTemplateLoadJSONTextContent() {
        function _TestConvTemplateLoadJSONPartsContent (line 64) | void _TestConvTemplateLoadJSONPartsContent() {
        function TEST (line 124) | TEST(JsonFFIConvTest, LoadJSONTextContentTest) { _TestConvTemplate...
        function TEST (line 125) | TEST(JsonFFIConvTest, LoadJSONPartsContentTest) { _TestConvTemplat...

FILE: tests/python/compiler_pass/test_fuse_ft_dequantize_matmul_epilogue.py
  function test_fuse_bias (line 12) | def test_fuse_bias():
  function test_fuse_activation (line 75) | def test_fuse_activation():
  function test_fuse_bias_activation (line 134) | def test_fuse_bias_activation():
  function test_fuse_residual_binary (line 198) | def test_fuse_residual_binary():
  function test_fuse_residual_unary (line 267) | def test_fuse_residual_unary():

FILE: tests/python/conftest.py
  function pytest_configure (line 21) | def pytest_configure(config):

FILE: tests/python/conversation_template/test_conversation_protocol.py
  function get_conv_templates (line 7) | def get_conv_templates():
  function test_json (line 23) | def test_json(conv_template_name):
  function test_prompt (line 31) | def test_prompt(conv_template_name):

FILE: tests/python/conversation_template/test_llama_template.py
  function test_llama3_prompt (line 10) | def test_llama3_prompt():

FILE: tests/python/integration/test_model_compile.py
  function run_command (line 78) | def run_command(log_file, cmd):
  function test_model_compile (line 87) | def test_model_compile():  # pylint: disable=too-many-locals

FILE: tests/python/json_ffi/test_json_ffi_engine.py
  function run_chat_completion (line 55) | def run_chat_completion(
  function run_json_schema_function_calling (line 92) | def run_json_schema_function_calling(
  function test_chat_completion (line 152) | def test_chat_completion(model):
  function test_reload_reset_unload (line 169) | def test_reload_reset_unload(model):
  function test_json_schema_with_system_prompt (line 185) | def test_json_schema_with_system_prompt(model):

FILE: tests/python/json_ffi/test_json_ffi_engine_image.py
  function base64_encode_image (line 10) | def base64_encode_image(url: str) -> str:
  function run_chat_completion (line 35) | def run_chat_completion(
  function test_chat_completion (line 74) | def test_chat_completion():

FILE: tests/python/json_ffi/test_json_ffi_engine_mock.py
  function check_error_handling (line 13) | def check_error_handling(engine, expect_str, **params):
  function test_chat_completion_misuse (line 38) | def test_chat_completion_misuse(model: str):
  function check_normal_param_passing (line 52) | def check_normal_param_passing(engine):
  function check_n_generation (line 82) | def check_n_generation(engine):
  function test_chat_completion_api (line 97) | def test_chat_completion_api(model: str):

FILE: tests/python/loader/test_awq.py
  function test_load_llama (line 23) | def test_load_llama(param_path: Union[str, Path]):

FILE: tests/python/loader/test_huggingface.py
  function test_load_torch_llama (line 23) | def test_load_torch_llama(base_path: Union[str, Path]):
  function test_load_safetensor_llama (line 47) | def test_load_safetensor_llama(base_path: Union[str, Path]):

FILE: tests/python/model/test_gemma3.py
  function test_gemma3_model_registered (line 9) | def test_gemma3_model_registered():
  function test_gemma3_creation (line 21) | def test_gemma3_creation(model_name: str):
  function test_gemma3_config_validation (line 49) | def test_gemma3_config_validation():

FILE: tests/python/model/test_gpt2.py
  function test_gpt2_creation (line 8) | def test_gpt2_creation(model_name: str):

FILE: tests/python/model/test_gptNeox.py
  function test_mistral_creation (line 8) | def test_mistral_creation(model_name: str):

FILE: tests/python/model/test_kv_cache.py
  function test_nn_module_paged_kv_cache (line 15) | def test_nn_module_paged_kv_cache():

FILE: tests/python/model/test_llama.py
  function test_llama2_creation (line 10) | def test_llama2_creation(model_name: str):

FILE: tests/python/model/test_llama_quantization.py
  function test_llama2_group_quantization (line 20) | def test_llama2_group_quantization(model_name: str, quant_name: str):
  function test_llama2_no_quantization (line 62) | def test_llama2_no_quantization(model_name: str, quant_name: str):

FILE: tests/python/model/test_mistral.py
  function test_mistral_creation (line 8) | def test_mistral_creation(model_name: str):

FILE: tests/python/model/test_phi.py
  function test_phi_creation (line 8) | def test_phi_creation(model_name: str):

FILE: tests/python/model/test_qwen3_embedding.py
  function _load_embed_weight (line 51) | def _load_embed_weight(hf_dir):
  function _hf_logits (line 60) | def _hf_logits(text, tokenizer, hf_model, embed_weight):
  function _mlc_logits (line 68) | def _mlc_logits(text, tokenizer, mlc_module, params, metadata, dev, embe...
  function test_mlc_hf_logit_match (line 116) | def test_mlc_hf_logit_match():

FILE: tests/python/op/test_batch_spec_verify.py
  function test_batch_spec_verify (line 15) | def test_batch_spec_verify(nbatch, vocab, plist):

FILE: tests/python/op/test_fp8_block_matmul.py
  function test_fp8_block_matmul_cutlass (line 30) | def test_fp8_block_matmul_cutlass(M: int, N: int, K: int, dtype: str):
  function test_fp8_block_matmul_triton (line 117) | def test_fp8_block_matmul_triton(M: int, N: int, K: int, dtype: str):
  function test_fp8_block_group_matmul_cutlass (line 210) | def test_fp8_block_group_matmul_cutlass(M: int, N: int, K: int, dtype: s...
  function test_fp8_block_group_matmul_triton (line 356) | def test_fp8_block_group_matmul_triton(M: int, N: int, K: int, dtype: str):
  function test_fp8_block_bmm_cutlass (line 489) | def test_fp8_block_bmm_cutlass(M: int, N: int, K: int, H: int, dtype: str):
  function test_fp8_block_gemv_tir (line 562) | def test_fp8_block_gemv_tir(N: int, K: int, up: bool, dtype: str):
  function blockwise_matmul (line 672) | def blockwise_matmul(
  function blockwise_group_matmul (line 704) | def blockwise_group_matmul(
  function blockwise_group_matmul_unquantized (line 742) | def blockwise_group_matmul_unquantized(
  function blockwise_bmm (line 772) | def blockwise_bmm(
  function blockwise_quant_fp8 (line 809) | def blockwise_quant_fp8(
  function rowwise_quant_fp8 (line 879) | def rowwise_quant_fp8(
  function test_cutlass_gemm (line 936) | def test_cutlass_gemm():
  function test_triton_gemm (line 957) | def test_triton_gemm():
  function test_cutlass_group_gemm (line 973) | def test_cutlass_group_gemm():
  function test_triton_group_gemm (line 988) | def test_triton_group_gemm():
  function test_cutlass_bmm (line 1003) | def test_cutlass_bmm():
  function test_tir_moe_gemv (line 1019) | def test_tir_moe_gemv():

FILE: tests/python/op/test_mrope.py
  function _numpy_rotate_half (line 18) | def _numpy_rotate_half(x: np.ndarray) -> np.ndarray:
  function _numpy_apply_mrope (line 23) | def _numpy_apply_mrope(
  function _evaluate_tensor (line 64) | def _evaluate_tensor(expr):
  function _run_mlc_mrope (line 72) | def _run_mlc_mrope(
  function test_apply_mrope_matches_numpy_reference (line 116) | def test_apply_mrope_matches_numpy_reference():
  function test_get_mrope_position_ids_text_only (line 135) | def test_get_mrope_position_ids_text_only():
  function test_get_mrope_position_ids_single_image_block (line 160) | def test_get_mrope_position_ids_single_image_block():
  function test_apply_mrope_accepts_3_batch_seq_layout (line 194) | def test_apply_mrope_accepts_3_batch_seq_layout():
  function test_get_mrope_position_ids_output_is_directly_usable (line 218) | def test_get_mrope_position_ids_output_is_directly_usable():

FILE: tests/python/op/test_top_p_pivot.py
  function test_top_p_renorm (line 16) | def test_top_p_renorm(batch_size, vocab):

FILE: tests/python/op/test_tree_attn.py
  function test_tree_attn (line 18) | def test_tree_attn(nbatch, h_q, h_kv, d, rotary_mode):

FILE: tests/python/op/test_two_stage_softmax.py
  function test_two_stage_softmax (line 11) | def test_two_stage_softmax():

FILE: tests/python/quantization/test_awq_quantization.py
  function dequantize_np (line 16) | def dequantize_np(
  function test_dequantize_weight (line 52) | def test_dequantize_weight(quant_name: str, shape: List[int], dtype: str):

FILE: tests/python/quantization/test_group_quantization.py
  function quantize_np (line 21) | def quantize_np(config: GroupQuantize, weight: np.ndarray):
  function dequantize_np (line 55) | def dequantize_np(
  function test_quantize_weight (line 96) | def test_quantize_weight(quant_name: str, shape: List[int], dtype: str, ...
  function test_dequantize_weight (line 123) | def test_dequantize_weight(quant_name: str, shape: List[int], dtype: str):
  function test_quantize_model (line 160) | def test_quantize_model(quant_name: str, shape: List[int], dtype: str):

FILE: tests/python/router/test_router.py
  function get_router_1tp1 (line 15) | def get_router_1tp1():
  function get_router_2tp1 (line 27) | def get_router_2tp1():
  function get_router_1tp2 (line 41) | def get_router_1tp2():
  function get_router_2tp2 (line 54) | def get_router_2tp2():
  function test_router (line 76) | async def test_router(schedule: str = "round_robin", endpoints_config: s...

FILE: tests/python/serve/evaluate_engine.py
  function _parse_args (line 11) | def _parse_args():
  function generate_requests (line 25) | def generate_requests(
  function benchmark (line 40) | def benchmark(args: argparse.Namespace):

FILE: tests/python/serve/server/conftest.py
  function served_model (line 11) | def served_model() -> Tuple[str, str]:
  function launch_server (line 24) | def launch_server(served_model):  # pylint: disable=redefined-outer-name

FILE: tests/python/serve/server/test_embedding_server.py
  function _skip_if_no_model (line 56) | def _skip_if_no_model():
  function check_embedding_response (line 73) | def check_embedding_response(
  function expect_error (line 114) | def expect_error(response_str: str, msg_prefix: Optional[str] = None):
  function launch_embedding_server (line 129) | def launch_embedding_server():
  function client (line 199) | def client(launch_embedding_server):
  function test_models_endpoint (line 211) | def test_models_endpoint():
  function test_single_string_input (line 224) | def test_single_string_input(client):
  function test_batch_string_input (line 242) | def test_batch_string_input(client):
  function test_batch_index_ordering (line 249) | def test_batch_index_ordering(client):
  function test_cosine_similarity_via_endpoint (line 261) | def test_cosine_similarity_via_endpoint(client):
  function test_dimension_truncation (line 284) | def test_dimension_truncation(client):
  function test_base64_encoding (line 305) | def test_base64_encoding():
  function test_any_model_name_works_with_single_engine (line 330) | def test_any_model_name_works_with_single_engine():

FILE: tests/python/serve/server/test_server.py
  function is_json (line 53) | def is_json(s: str) -> bool:
  function is_json_prefix (line 61) | def is_json_prefix(s: str) -> bool:
  function check_openai_nonstream_response (line 76) | def check_openai_nonstream_response(
  function check_openai_stream_response (line 140) | def check_openai_stream_response(
  function expect_error (line 222) | def expect_error(response_str: str, msg_prefix: Optional[str] = None):
  function test_openai_v1_models (line 230) | def test_openai_v1_models(
  function test_openai_v1_completions (line 251) | def test_openai_v1_completions(
  function test_openai_v1_completions_openai_package (line 298) | def test_openai_v1_completions_openai_package(
  function test_openai_v1_completions_echo (line 341) | def test_openai_v1_completions_echo(
  function test_openai_v1_completions_suffix (line 391) | def test_openai_v1_completions_suffix(
  function test_openai_v1_completions_stop_str (line 442) | def test_openai_v1_completions_stop_str(
  function test_openai_v1_completions_temperature (line 492) | def test_openai_v1_completions_temperature(
  function test_openai_v1_completions_json (line 538) | def test_openai_v1_completions_json(
  function test_openai_v1_completions_json_schema (line 585) | def test_openai_v1_completions_json_schema(
  function test_openai_v1_completions_logit_bias (line 643) | def test_openai_v1_completions_logit_bias(
  function test_openai_v1_completions_presence_frequency_penalty (line 692) | def test_openai_v1_completions_presence_frequency_penalty(
  function test_openai_v1_completions_seed (line 738) | def test_openai_v1_completions_seed(
  function test_openai_v1_completions_prompt_overlong (line 774) | def test_openai_v1_completions_prompt_overlong(
  function test_openai_v1_completions_invalid_logprobs (line 808) | def test_openai_v1_completions_invalid_logprobs(
  function test_openai_v1_chat_completions_invalid_logprobs (line 833) | def test_openai_v1_chat_completions_invalid_logprobs(
  function test_openai_v1_completions_unsupported_args (line 867) | def test_openai_v1_completions_unsupported_args(
  function test_openai_v1_completions_request_cancellation (line 888) | def test_openai_v1_completions_request_cancellation(
  function test_openai_v1_chat_completions (line 947) | def test_openai_v1_chat_completions(
  function test_openai_v1_chat_completions_n (line 990) | def test_openai_v1_chat_completions_n(
  function test_openai_v1_chat_completions_openai_package (line 1036) | def test_openai_v1_chat_completions_openai_package(
  function test_openai_v1_chat_completions_max_tokens (line 1077) | def test_openai_v1_chat_completions_max_tokens(
  function test_openai_v1_chat_completions_json (line 1123) | def test_openai_v1_chat_completions_json(
  function test_openai_v1_chat_completions_json_schema (line 1170) | def test_openai_v1_chat_completions_json_schema(
  function test_openai_v1_chat_completions_ignore_eos (line 1229) | def test_openai_v1_chat_completions_ignore_eos(
  function test_openai_v1_chat_completions_system_prompt_wrong_pos (line 1276) | def test_openai_v1_chat_completions_system_prompt_wrong_pos(
  function test_debug_dump_event_trace (line 1312) | def test_debug_dump_event_trace(
  function test_metrics (line 1324) | def test_metrics(

FILE: tests/python/serve/server/test_server_function_call.py
  function check_openai_nonstream_response (line 19) | def check_openai_nonstream_response(
  function check_openai_stream_response (line 58) | def check_openai_stream_response(
  function test_openai_v1_chat_completion_function_call (line 157) | def test_openai_v1_chat_completion_function_call(

FILE: tests/python/serve/server/test_server_image.py
  function is_json_or_json_prefix (line 19) | def is_json_or_json_prefix(s: str) -> bool:
  function check_openai_nonstream_response (line 34) | def check_openai_nonstream_response(
  function check_openai_stream_response (line 90) | def check_openai_stream_response(
  function test_openai_v1_chat_completions (line 201) | def test_openai_v1_chat_completions(

FILE: tests/python/serve/test_embedding_engine.py
  function _skip_if_no_model (line 39) | def _skip_if_no_model():
  function embedding_engine (line 53) | def embedding_engine():
  function cosine_similarity (line 72) | def cosine_similarity(a, b):
  function test_engine_model_type (line 83) | def test_engine_model_type(embedding_engine):
  function test_engine_pooling_strategy (line 88) | def test_engine_pooling_strategy(embedding_engine):
  function test_single_text_shape (line 101) | def test_single_text_shape(embedding_engine):
  function test_single_text_unit_norm (line 109) | def test_single_text_unit_norm(embedding_engine):
  function test_batch_count (line 127) | def test_batch_count(embedding_engine):
  function test_batch_all_normalized (line 134) | def test_batch_all_normalized(embedding_engine):
  function test_batch_consistent_dimension (line 142) | def test_batch_consistent_dimension(embedding_engine):
  function test_cosine_similarity_ranking (line 160) | def test_cosine_similarity_ranking(embedding_engine):
  function test_deterministic_output (line 176) | def test_deterministic_output(embedding_engine):
  function test_async_embed (line 190) | def test_async_embed(embedding_engine):
  function test_empty_string (line 211) | def test_empty_string(embedding_engine):
  function test_long_text_decoder_chunked_prefill (line 229) | def test_long_text_decoder_chunked_prefill(embedding_engine):
  function _get_encoder_tokens (line 241) | def _get_encoder_tokens(embedding_engine, text):
  function test_long_text_encoder_truncation (line 255) | def test_long_text_encoder_truncation(embedding_engine):  # pylint: disa...
  function test_long_vs_short_semantic_quality (line 306) | def test_long_vs_short_semantic_quality(embedding_engine):
  function test_unicode_text (line 327) | def test_unicode_text(embedding_engine):

FILE: tests/python/serve/test_event_trace_recorder.py
  function test_event_trace_recorder (line 12) | def test_event_trace_recorder():

FILE: tests/python/serve/test_radix_tree.py
  function test_add (line 9) | def test_add():
  function test_remove (line 17) | def test_remove():
  function test_extend (line 52) | def test_extend():
  function test_fork (line 73) | def test_fork():
  function test_fork_2 (line 90) | def test_fork_2():
  function test_rollback (line 102) | def test_rollback():

FILE: tests/python/serve/test_serve_async_engine.py
  function test_engine_generate (line 25) | async def test_engine_generate(model: str):
  function test_chat_completion (line 83) | async def test_chat_completion(model: str):
  function test_chat_completion_non_stream (line 134) | async def test_chat_completion_non_stream(model: str):
  function test_completion (line 184) | async def test_completion(model: str):
  function test_completion_non_stream (line 234) | async def test_completion_non_stream(model: str):

FILE: tests/python/serve/test_serve_async_engine_spec.py
  function test_engine_generate (line 28) | async def test_engine_generate(model: str, small_model: str):

FILE: tests/python/serve/test_serve_engine.py
  function test_engine_generate (line 24) | def test_engine_generate(model: str):
  function test_chat_completion (line 63) | def test_chat_completion(model: str):
  function test_chat_completion_non_stream (line 108) | def test_chat_completion_non_stream(model: str):
  function test_completion (line 152) | def test_completion(model: str):
  function test_completion_non_stream (line 196) | def test_completion_non_stream(model: str):

FILE: tests/python/serve/test_serve_engine_grammar.py
  function test_batch_generation_with_grammar (line 20) | def test_batch_generation_with_grammar(model: str):
  function test_batch_generation_with_schema (line 97) | def test_batch_generation_with_schema(model: str):
  function test_batch_generation_jump_forward (line 204) | def test_batch_generation_jump_forward(model: str, jump_forward: bool = ...
  function run_async_engine (line 263) | async def run_async_engine(
  function test_async_engine (line 342) | def test_async_engine(

FILE: tests/python/serve/test_serve_engine_image.py
  function get_test_image (line 9) | def get_test_image(config) -> data.ImageData:
  function test_engine_generate (line 13) | def test_engine_generate():

FILE: tests/python/serve/test_serve_engine_mock.py
  function test_completion_api (line 20) | def test_completion_api(model: str):

FILE: tests/python/serve/test_serve_engine_prefix_cache.py
  function test_engine_system_prompt (line 20) | def test_engine_system_prompt(engine):
  function test_engine_multi_round (line 56) | def test_engine_multi_round(engine):
  function test_basic_engine_system_prompt (line 75) | def test_basic_engine_system_prompt(model: str):
  function test_basic_engine_multi_round (line 89) | def test_basic_engine_multi_round(model: str):
  function test_engine_spec_multi_round (line 103) | def test_engine_spec_multi_round(model: str, small_model: str):
  function test_engine_eagle_multi_round (line 119) | def test_engine_eagle_multi_round(model: str):

FILE: tests/python/serve/test_serve_engine_rnn.py
  function test_engine_generate (line 22) | def test_engine_generate() -> None:

FILE: tests/python/serve/test_serve_engine_spec.py
  function create_requests (line 26) | def create_requests(
  function test_engine_basic (line 59) | def test_engine_basic(model: str, small_model: str):
  function test_engine_eagle_basic (line 122) | def test_engine_eagle_basic(model: str):
  function test_engine_continuous_batching_1 (line 192) | def test_engine_continuous_batching_1(model: str, small_model: str):
  function test_engine_eagle_continuous_batching_1 (line 274) | def test_engine_eagle_continuous_batching_1(model: str):
  function compare_output_text (line 359) | def compare_output_text(output_text1, output_text2):
  function test_engine_generate (line 375) | def test_engine_generate(model: str, small_model: str, compare_precision...
  function test_engine_eagle_generate (line 433) | def test_engine_eagle_generate(model: str):
  function test_engine_efficiency (line 466) | def test_engine_efficiency(model: str):
  function test_engine_spec_efficiency (line 525) | def test_engine_spec_efficiency(model: str, small_model: str):
  function test_engine_eagle_spec_efficiency (line 591) | def test_engine_eagle_spec_efficiency(model: str):

FILE: tests/python/serve/test_serve_sync_engine.py
  function create_requests (line 26) | def create_requests(
  function test_engine_basic (line 57) | def test_engine_basic(model: str):
  function test_engine_continuous_batching_1 (line 116) | def test_engine_continuous_batching_1(model: str):
  function test_engine_continuous_batching_2 (line 196) | def test_engine_continuous_batching_2(model: str):
  function test_engine_continuous_batching_3 (line 276) | def test_engine_continuous_batching_3(model: str):
  function test_engine_generate (line 364) | def test_engine_generate(model: str):
  function test_engine_hybrid_prefill (line 389) | def test_engine_hybrid_prefill(model: str):

FILE: tests/python/support/test_auto_config.py
  function _create_json_file (line 17) | def _create_json_file(json_path, data):
  function test_detect_config (line 22) | def test_detect_config():
  function test_detect_config_fail (line 32) | def test_detect_config_fail():

FILE: tests/python/support/test_auto_weight.py
  function _create_json_file (line 18) | def _create_json_file(json_path, data):
  function test_detect_weight (line 36) | def test_detect_weight(weight_format, index_filename, result):
  function test_detect_weight_in_config_json (line 61) | def test_detect_weight_in_config_json(weight_format, index_filename, res...
  function test_detect_weight_same_dir_config_json (line 93) | def test_detect_weight_same_dir_config_json(weight_format, index_filenam...
  function test_find_weight_fail (line 107) | def test_find_weight_fail():

FILE: tests/python/support/test_cli_convert_weight.py
  function test_convert_weight_cli_passes_lora_adapter (line 13) | def test_convert_weight_cli_passes_lora_adapter(monkeypatch):

FILE: tests/python/support/test_convert_weight_lora_merge.py
  function test_resolve_base_model_dir (line 14) | def test_resolve_base_model_dir():
  function test_convert_weight_with_lora_uses_merged_source (line 26) | def test_convert_weight_with_lora_uses_merged_source(monkeypatch):
  function test_convert_weight_with_lora_rejects_awq (line 91) | def test_convert_weight_with_lora_rejects_awq():

FILE: tests/python/tokenizers/test_streamer.py
  function test_text_streamer (line 58) | def test_text_streamer(llama_tokenizer_path: str):  # pylint: disable=re...
  function stop_handler_process_tokens (line 68) | def stop_handler_process_tokens(
  function test_stop_str_handler_stop (line 84) | def test_stop_str_handler_stop(llama_tokenizer_path: str):  # pylint: di...
  function test_stop_str_handler_not_stop (line 100) | def test_stop_str_handler_not_stop(
  function test_stop_str_handler_return_cached_tokens (line 112) | def test_stop_str_handler_return_cached_tokens(
  function test_stop_str_handler_throughput (line 130) | def test_stop_str_handler_throughput(
  function test_text_streamer_emojis (line 171) | def test_text_streamer_emojis(

FILE: version.py
  function py_str (line 35) | def py_str(cstr):
  function git_describe_version (line 39) | def git_describe_version():
  function update (line 126) | def update(file_name, pattern, repl, dry_run=False):
  function sync_version (line 153) | def sync_version(pub_ver, local_ver, dry_run):
  function main (line 164) | def main():