SYMBOL INDEX (289 symbols across 28 files) FILE: app.py function load_model (line 50) | async def load_model( function root (line 63) | def root(): function predict (line 73) | async def predict( function main (line 166) | def main(): FILE: lightning_module.py class NougatModelPLModule (line 23) | class NougatModelPLModule(pl.LightningModule): method __init__ (line 24) | def __init__(self, config): method training_step (line 60) | def training_step(self, batch, batch_idx): method validation_step (line 78) | def validation_step(self, batch, batch_idx, dataset_idx=0): method on_validation_epoch_end (line 102) | def on_validation_epoch_end(self): method configure_optimizers (line 110) | def configure_optimizers(self): method cosine_scheduler (line 159) | def cosine_scheduler(optimizer, training_steps, warmup_steps): method exponential_scheduler (line 170) | def exponential_scheduler(optimizer, warmup_steps, lr, min_lr=5e-5, ga... method get_progress_bar_dict (line 182) | def get_progress_bar_dict(self): method on_save_checkpoint (line 190) | def on_save_checkpoint(self, checkpoint): class NougatDataPLModule (line 200) | class NougatDataPLModule(pl.LightningDataModule): method __init__ (line 201) | def __init__(self, config): method train_dataloader (line 211) | def train_dataloader(self): method val_dataloader (line 226) | def val_dataloader(self): method seed_worker (line 239) | def seed_worker(wordker_id): method ignore_none_collate (line 245) | def ignore_none_collate(batch): FILE: nougat/dataset/create_index.py function convert_pt2px (line 30) | def convert_pt2px(pt, dpi=96): function read_metadata (line 39) | def read_metadata(data: Dict) -> List[List[Dict]]: function index_paper (line 58) | def index_paper(directory: Path, args: argparse.Namespace): function create_index (line 102) | def create_index(args): FILE: nougat/dataset/gen_seek.py function get_args (line 13) | def get_args(): FILE: nougat/dataset/parser/document.py class Element (line 35) | class Element(Generic[EL]): method plaintext (line 48) | def plaintext(self): method append (line 51) | def append(self, child: EL) -> EL: method find_parent (line 56) | def find_parent(self, class_or_tuple: Type[T]) -> T: class UnknownElement (line 66) | class UnknownElement(Element): class TextElement (line 71) | class TextElement(Element): method plaintext (line 75) | def plaintext(self): method append (line 78) | def append(self, child: "Element"): class Math (line 83) | class Math(Element): class PlaintextMath (line 88) | class PlaintextMath(Math): class LatexMath (line 93) | class LatexMath(Math): method plaintext (line 98) | def plaintext(self): class Author (line 103) | class Author: class Link (line 110) | class Link(Element): class InlineRef (line 115) | class InlineRef(Element): method as_dict (line 118) | def as_dict(self): class Reference (line 125) | class Reference: method as_dict (line 150) | def as_dict(self): class SpanElement (line 163) | class SpanElement(Element): class Italic (line 168) | class Italic(SpanElement): class Bold (line 173) | class Bold(SpanElement): class Superscript (line 178) | class Superscript(SpanElement): class Subscript (line 183) | class Subscript(SpanElement): class Paragraph (line 188) | class Paragraph(Element): class TableRow (line 193) | class TableRow(Element): method add_cell (line 196) | def add_cell(self, cell: Element): method plaintext (line 202) | def plaintext(self): method add_cell (line 535) | def add_cell(self, cell: TableCell): method __iter__ (line 540) | def __iter__(self): method __len__ (line 543) | def __len__(self) -> int: method __bool__ (line 546) | def __bool__(self) -> bool: method cum_cell_widths (line 550) | def cum_cell_widths(self) -> List[int]: method cell_widths (line 554) | def cell_widths(self) -> List[int]: method width (line 558) | def width(self) -> int: method _hline (line 561) | def _hline(self, orientation: str) -> str: method hline_above (line 592) | def hline_above(self) -> str: method hline_below (line 596) | def hline_below(self) -> str: method plaintext (line 600) | def plaintext(self) -> str: class TableHead (line 207) | class TableHead(TableRow): class Table (line 212) | class Table(Element): method add_row (line 219) | def add_row(self, row: TableRow) -> TableRow: method plaintext (line 225) | def plaintext(self): class Equation (line 230) | class Equation(Element): class EquationList (line 235) | class EquationList(Element): method add_equation (line 238) | def add_equation(self, eqn: Equation) -> Equation: method plaintext (line 244) | def plaintext(self): class Algorithm (line 249) | class Algorithm(Element): method add_line (line 254) | def add_line(self, line: Element) -> Element: method plaintext (line 260) | def plaintext(self): class Definition (line 265) | class Definition(Element): method plaintext (line 270) | def plaintext(self): class DefinitionList (line 280) | class DefinitionList(Element): method add_item (line 295) | def add_item(self, item: Definition) -> Definition: method plaintext (line 301) | def plaintext(self): class Figure (line 310) | class Figure(Element): class Section (line 317) | class Section(Element): class SectionHeader (line 325) | class SectionHeader(Element): class ListItem (line 332) | class ListItem(Element): class ListContainer (line 337) | class ListContainer(Element): method add_item (line 342) | def add_item(self, item: ListItem) -> ListItem: method plaintext (line 348) | def plaintext(self): class Footnote (line 353) | class Footnote(Element): class Document (line 358) | class Document(Element, Reference): method add_reference (line 366) | def add_reference(self, reference): method add_inline_ref (line 369) | def add_inline_ref(self, in_ref): method set_bib (line 372) | def set_bib(self, reference): class Spec (line 377) | class Spec: method __hash__ (line 405) | def __hash__(self) -> int: method __eq__ (line 408) | def __eq__(self, __o: object) -> bool: method set_align (line 411) | def set_align(self, classes: List[str], style: Optional[str] = None) -... method set_border (line 439) | def set_border(self, classes: List[str]) -> None: method set_attrs (line 446) | def set_attrs(self, attrs: Dict[str, Any]) -> None: method __str__ (line 454) | def __str__(self) -> str: class TableCell (line 463) | class TableCell(Element): method __post_init__ (line 486) | def __post_init__(self, *args, **kwargs) -> None: method __hash__ (line 491) | def __hash__(self) -> int: method __eq__ (line 494) | def __eq__(self, __o: object) -> bool: method set_attrs (line 497) | def set_attrs(self, attrs: Dict[str, Any]) -> None: method plaintext (line 505) | def plaintext(self): class TableRow (line 512) | class TableRow(Element): method add_cell (line 196) | def add_cell(self, cell: Element): method plaintext (line 202) | def plaintext(self): method add_cell (line 535) | def add_cell(self, cell: TableCell): method __iter__ (line 540) | def __iter__(self): method __len__ (line 543) | def __len__(self) -> int: method __bool__ (line 546) | def __bool__(self) -> bool: method cum_cell_widths (line 550) | def cum_cell_widths(self) -> List[int]: method cell_widths (line 554) | def cell_widths(self) -> List[int]: method width (line 558) | def width(self) -> int: method _hline (line 561) | def _hline(self, orientation: str) -> str: method hline_above (line 592) | def hline_above(self) -> str: method hline_below (line 596) | def hline_below(self) -> str: method plaintext (line 600) | def plaintext(self) -> str: class Tabular (line 605) | class Tabular(Element): method add_row (line 622) | def add_row(self, row: TableRow) -> TableRow: method width (line 628) | def width(self) -> int: method cols (line 635) | def cols(self) -> List[List[TableCell]]: method _square_table (line 643) | def _square_table(self) -> None: method get_table_spec (line 660) | def get_table_spec(self) -> str: method plaintext (line 696) | def plaintext(self): class Table (line 701) | class Table(Element): method add_row (line 219) | def add_row(self, row: TableRow) -> TableRow: method plaintext (line 225) | def plaintext(self): FILE: nougat/dataset/parser/html2md.py function check_file_path (line 17) | def check_file_path(paths: List[Path], wdir: Optional[Path] = None) -> L... FILE: nougat/dataset/parser/latexml_parser.py function printerr (line 17) | def printerr(*args, **kwargs): function is_wrapper_element (line 43) | def is_wrapper_element(element: BeautifulSoup) -> bool: function ignore_element (line 47) | def ignore_element(element: BeautifulSoup) -> bool: function _get_classes (line 51) | def _get_classes(el: BeautifulSoup) -> Set[str]: function _detach_selected (line 60) | def _detach_selected(element: BeautifulSoup, selector: str) -> None: function parse_latexml_authors (line 65) | def parse_latexml_authors(ltx_authors: BeautifulSoup) -> List[Author]: function parse_latexml_citations (line 71) | def parse_latexml_citations(cite: BeautifulSoup, parent: Element) -> None: function _clean_html_whitespace (line 89) | def _clean_html_whitespace(text: str) -> str: function parse_latexml_children (line 98) | def parse_latexml_children(html: BeautifulSoup, parent: Element) -> None: function parse_latexml_references (line 420) | def parse_latexml_references(html: BeautifulSoup, doc: Document) -> None: function parse_latexml (line 429) | def parse_latexml( FILE: nougat/dataset/parser/markdown.py function remove_trailing_whitespace (line 39) | def remove_trailing_whitespace(parts: List[str]) -> None: function remove_line_breaks (line 48) | def remove_line_breaks(parts: List[str]): function leading_trailing_whitespace (line 55) | def leading_trailing_whitespace( function latex_escape (line 84) | def latex_escape(string: str) -> str: function is_empty (line 88) | def is_empty(content: List) -> bool: function format_element (line 98) | def format_element( function format_iterator (line 330) | def format_iterator( function format_children (line 359) | def format_children( function format_document (line 367) | def format_document( FILE: nougat/dataset/pdffigures.py function call_pdffigures (line 19) | def call_pdffigures( FILE: nougat/dataset/rasterize.py function rasterize_paper (line 18) | def rasterize_paper( FILE: nougat/dataset/split_htmls_to_pages.py function process_paper (line 29) | def process_paper( function process_htmls (line 130) | def process_htmls(args): FILE: nougat/dataset/split_md_to_pages.py function ratio (line 37) | def ratio(*args, **kwargs): class BagOfWords (line 41) | class BagOfWords: method __init__ (line 51) | def __init__( method train (line 60) | def train(self): method __call__ (line 77) | def __call__( function remove_short_seqs (line 90) | def remove_short_seqs(seqs: List[str], minimum: int = 10) -> List[str]: function find_figures (line 99) | def find_figures( function flatten (line 136) | def flatten(l: List) -> List: function get_doc_text (line 140) | def get_doc_text( function clean_pdf_text (line 176) | def clean_pdf_text(pages: List[List[str]], num_words: int = 10) -> List[... function split_markdown (line 239) | def split_markdown( FILE: nougat/dataset/splitter.py function ratio (line 18) | def ratio(*args, **kwargs): function reverse (line 22) | def reverse(lst: List[str]) -> List[str]: function get_first_last (line 37) | def get_first_last( function get_glob_index (line 66) | def get_glob_index( class Splitter (line 84) | class Splitter: method __init__ (line 87) | def __init__(self, paragraphs: List[str]) -> None: method remove_special_chars (line 95) | def remove_special_chars(string: str) -> str: method count_special_chars (line 129) | def count_special_chars(string: str, char_ind: int) -> int: method split_first_last (line 213) | def split_first_last( method split (line 280) | def split( method _find_match (line 315) | def _find_match( method _fuzzy (line 325) | def _fuzzy( method fuzzysearch (line 338) | def fuzzysearch( method evaluate_split (line 350) | def evaluate_split(self, page_num: int, page_content: str) -> float: FILE: nougat/dataset/staircase.py function stair_func (line 17) | def stair_func(x: np.ndarray, thresholds: np.ndarray) -> np.ndarray: function compute_gini (line 21) | def compute_gini(labels: np.ndarray) -> float: function compute_binary_gini (line 29) | def compute_binary_gini(labels: np.ndarray) -> float: function gini_impurity (line 37) | def gini_impurity( function step_impurity (line 87) | def step_impurity( class PaddedArray (line 112) | class PaddedArray: method __init__ (line 121) | def __init__( method __len__ (line 129) | def __len__(self): method _process_index (line 132) | def _process_index(self, index): method __getitem__ (line 147) | def __getitem__(self, index): method __setitem__ (line 151) | def __setitem__(self, index, value): method copy (line 154) | def copy(self): method toarray (line 157) | def toarray(self): class Staircase (line 161) | class Staircase: method __init__ (line 170) | def __init__(self, domain: int, n_classes: int) -> None: method statistic_fit (line 180) | def statistic_fit( method fit (line 216) | def fit( method score (line 299) | def score(self): method predict (line 307) | def predict(self, x: np.ndarray) -> np.ndarray: method __call__ (line 310) | def __call__(self, *args): method get_boundaries (line 313) | def get_boundaries(self) -> np.ndarray: FILE: nougat/dataset/utils/latex_conversion.py function remove_style (line 60) | def remove_style(string: str) -> str: function replace_duplicate_definitions (line 69) | def replace_duplicate_definitions(string: str) -> str: function unicode_to_latex (line 76) | def unicode_to_latex(s: str) -> str: function remove_line_breaks (line 108) | def remove_line_breaks(string: str) -> str: function normalize_tex (line 113) | def normalize_tex(math: str, inline: bool) -> str: FILE: nougat/dataset/utils/pdf_text_extract.py function replace_ligatures (line 18) | def replace_ligatures(text: str) -> str: function remove_hyphens (line 36) | def remove_hyphens(text: str) -> str: function dehyphenate (line 59) | def dehyphenate(lines: List[str], line_no: int) -> List[str]: function get_pages (line 68) | def get_pages(pdf: str) -> List[str]: function get_paragraphs (line 84) | def get_paragraphs(pdf: str) -> List[List[str]]: FILE: nougat/dataset/utils/utils.py function remove_pretty_linebreaks (line 10) | def remove_pretty_linebreaks(string: str) -> str: FILE: nougat/metrics.py function compute_metrics (line 27) | def compute_metrics(pred, gt, minlen=4): function get_parser (line 47) | def get_parser(): function split_text (line 63) | def split_text(pages: List[str]): function get_metrics (line 86) | def get_metrics(gt: List[str], pred: List[str], pool: bool = True): FILE: nougat/model.py class SwinEncoder (line 37) | class SwinEncoder(nn.Module): method __init__ (line 52) | def __init__( method forward (line 116) | def forward(self, x: torch.Tensor) -> torch.Tensor: method crop_margin (line 127) | def crop_margin(img: Image.Image) -> Image.Image: method to_tensor (line 142) | def to_tensor(self): method prepare_input (line 148) | def prepare_input( class BARTDecoder (line 191) | class BARTDecoder(nn.Module): method __init__ (line 207) | def __init__( method add_special_tokens (line 271) | def add_special_tokens(self, list_of_tokens: List[str]): method prepare_inputs_for_inference (line 281) | def prepare_inputs_for_inference( method forward (line 312) | def forward( method resize_bart_abs_pos_emb (line 337) | def resize_bart_abs_pos_emb(weight: torch.Tensor, max_length: int) -> ... class NougatConfig (line 359) | class NougatConfig(PretrainedConfig): method __init__ (line 385) | def __init__( class RunningVarTorch (line 418) | class RunningVarTorch: method __init__ (line 419) | def __init__(self, L=15, norm=False): method push (line 424) | def push(self, x: torch.Tensor): method variance (line 433) | def variance(self): class StoppingCriteriaScores (line 442) | class StoppingCriteriaScores(StoppingCriteria): method __init__ (line 443) | def __init__(self, threshold: float = 0.015, window_size: int = 200): method __call__ (line 454) | def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTen... function batch (line 477) | def batch(l, b=15): function subdiv (line 484) | def subdiv(l, b=10): class NougatModel (line 491) | class NougatModel(PreTrainedModel): method __init__ (line 501) | def __init__(self, config: NougatConfig): method forward (line 521) | def forward( method _init_weights (line 544) | def _init_weights(self, *args, **kwargs): method inference (line 547) | def inference( method from_pretrained (line 671) | def from_pretrained( FILE: nougat/postprocessing.py function ratio (line 18) | def ratio(*args, **kwargs): function markdown_compatible (line 25) | def markdown_compatible(s: str) -> str: function find_next_punctuation (line 70) | def find_next_punctuation(s: str, start_inx=0): function find_last_punctuation (line 86) | def find_last_punctuation(s: str, start_inx=0): function truncate_repetitions (line 102) | def truncate_repetitions(s: str, min_len=30): function close_envs (line 168) | def close_envs(s: str) -> str: function remove_numbers (line 178) | def remove_numbers(lines): function get_slices (line 190) | def get_slices(lines, clean_lines): function remove_slice_from_lines (line 233) | def remove_slice_from_lines(lines, clean_text, sli) -> str: function remove_hallucinated_references (line 301) | def remove_hallucinated_references(text: str) -> str: function postprocess_single (line 332) | def postprocess_single(generation: str, markdown_fix: bool = True) -> str: function postprocess (line 487) | def postprocess( FILE: nougat/transforms.py function alb_wrapper (line 16) | def alb_wrapper(transform): class Erosion (line 23) | class Erosion(alb.ImageOnlyTransform): method __init__ (line 41) | def __init__(self, scale, always_apply=False, p=0.5): method apply (line 49) | def apply(self, img, **params): class Dilation (line 57) | class Dilation(alb.ImageOnlyTransform): method __init__ (line 75) | def __init__(self, scale, always_apply=False, p=0.5): method apply (line 83) | def apply(self, img, **params): class Bitmap (line 91) | class Bitmap(alb.ImageOnlyTransform): method __init__ (line 107) | def __init__(self, value=0, lower=200, always_apply=False, p=0.5): method apply (line 112) | def apply(self, img, **params): FILE: nougat/utils/checkpoint.py function download_as_bytes_with_progress (line 20) | def download_as_bytes_with_progress(url: str, name: str = None) -> bytes: function download_checkpoint (line 49) | def download_checkpoint(checkpoint: Path, model_tag: str = MODEL_TAG): function torch_hub (line 74) | def torch_hub(model_tag: Optional[str] = MODEL_TAG) -> Path: function get_checkpoint (line 85) | def get_checkpoint( FILE: nougat/utils/dataset.py class ImageDataset (line 25) | class ImageDataset(torch.utils.data.Dataset): method __init__ (line 40) | def __init__(self, img_list, prepare: Callable): method __len__ (line 45) | def __len__(self): method ignore_none_collate (line 49) | def ignore_none_collate(batch): method __getitem__ (line 60) | def __getitem__(self, idx): class LazyDataset (line 68) | class LazyDataset(Dataset): method __init__ (line 83) | def __init__(self, pdf, prepare: Callable, pages: Optional[List[int]] ... method __len__ (line 91) | def __len__(self): method __getitem__ (line 94) | def __getitem__(self, i): method ignore_none_collate (line 103) | def ignore_none_collate(batch): class SciPDFDataset (line 125) | class SciPDFDataset(Dataset): method __init__ (line 144) | def __init__( method __len__ (line 172) | def __len__(self) -> int: method __getitem__ (line 175) | def __getitem__(self, index: int) -> Dict: method __iter__ (line 203) | def __iter__(self): class NougatDataset (line 208) | class NougatDataset(Dataset): method __init__ (line 214) | def __init__( method __len__ (line 234) | def __len__(self) -> int: method __getitem__ (line 237) | def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]: FILE: nougat/utils/device.py function default_batch_size (line 11) | def default_batch_size(): function move_to_device (line 28) | def move_to_device(model, bf16: bool = True, cuda: bool = True): FILE: predict.py function get_args (line 28) | def get_args(): function main (line 125) | def main(): FILE: setup.py function read_version (line 14) | def read_version(): function read_long_description (line 22) | def read_long_description(): FILE: test.py function test (line 27) | def test(args): FILE: train.py class CustomCheckpointIO (line 42) | class CustomCheckpointIO(CheckpointIO): method save_checkpoint (line 62) | def save_checkpoint(self, checkpoint, path, storage_options=None): method load_checkpoint (line 73) | def load_checkpoint(self, path, storage_options=None): method remove_checkpoint (line 101) | def remove_checkpoint(self, path) -> None: class GradNormCallback (line 105) | class GradNormCallback(Callback): method gradient_norm (line 111) | def gradient_norm(model): method on_after_backward (line 120) | def on_after_backward(self, trainer, model): function save_config_file (line 125) | def save_config_file(config, path): function train (line 135) | def train(config):