Full Code of Possum/LumaLocaleSwitcher for AI

master 556cfd184e74 cached
44 files
394.5 KB
122.5k tokens
356 symbols
1 requests
Download .txt
Showing preview only (411K chars total). Download the full file or copy to clipboard to get everything.
Repository: Possum/LumaLocaleSwitcher
Branch: master
Commit: 556cfd184e74
Files: 44
Total size: 394.5 KB

Directory structure:
gitextract_a0o9p4yg/

├── .gitignore
├── .gitmodules
├── LICENSE.txt
├── Makefile
├── README.md
├── meta/
│   ├── banner.xcf
│   └── icon.xcf
├── romfs/
│   ├── logo.xcf
│   └── textcolor.cfg
└── source/
    ├── core/
    │   ├── default.v.pica
    │   ├── screen.c
    │   ├── screen.h
    │   ├── util.c
    │   └── util.h
    ├── locale.c
    ├── locale.h
    ├── main.c
    ├── stb_image/
    │   ├── stb_image.c
    │   └── stb_image.h
    └── ui/
        ├── error.c
        ├── error.h
        ├── info.c
        ├── info.h
        ├── list.c
        ├── list.h
        ├── mainmenu.c
        ├── mainmenu.h
        ├── prompt.c
        ├── prompt.h
        ├── section/
        │   ├── action/
        │   │   ├── action.h
        │   │   ├── change_language.c
        │   │   ├── change_locale_dir.c
        │   │   ├── change_region.c
        │   │   ├── pick_locale_dir.c
        │   │   └── use_system_default.c
        │   ├── config.c
        │   ├── nuke.c
        │   ├── section.h
        │   ├── task/
        │   │   ├── listtitles.c
        │   │   ├── task.c
        │   │   └── task.h
        │   └── titles.c
        ├── ui.c
        └── ui.h

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
.idea
CMakeLists.txt

# Vim swap files
.*.sw*

build
output


================================================
FILE: .gitmodules
================================================
[submodule "buildtools"]
	path = buildtools
	url = git://github.com/Steveice10/buildtools


================================================
FILE: LICENSE.txt
================================================
Copyright (C) 2016 Possum
Copyright (C) 2015 Steveice10

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


================================================
FILE: Makefile
================================================
# TARGET #

TARGET := 3DS
LIBRARY := 0

ifeq ($(TARGET),3DS)
    ifeq ($(strip $(DEVKITPRO)),)
        $(error "Please set DEVKITPRO in your environment. export DEVKITPRO=<path to>devkitPro")
    endif

    ifeq ($(strip $(DEVKITARM)),)
        $(error "Please set DEVKITARM in your environment. export DEVKITARM=<path to>devkitARM")
    endif
endif

# COMMON CONFIGURATION #

NAME := Luma Locale Switcher

BUILD_DIR := build
OUTPUT_DIR := output
INCLUDE_DIRS := include
SOURCE_DIRS := source

BUILD_FILTER := source/svchax/test/test.c

EXTRA_OUTPUT_FILES :=

LIBRARY_DIRS := $(DEVKITPRO)/libctru
LIBRARIES := citro3d ctru m

VERSION := $(shell git describe --tags --abbrev=0)
BUILD_FLAGS := -DLIBKHAX_AS_LIB -DVERSION_STRING="\"$(VERSION)\""
RUN_FLAGS :=

ifeq ($(LUMA_NIGHTLY),1)
    BUILD_FLAGS += -DLUMA_NIGHTLY
endif

# 3DS CONFIGURATION #

TITLE := $(NAME)
DESCRIPTION := Locale Switcher
AUTHOR := Possum
PRODUCT_CODE := LumaLocale
UNIQUE_ID := 0xA0CA1

SYSTEM_MODE := 64MB
SYSTEM_MODE_EXT := Legacy

ICON_FLAGS :=

ROMFS_DIR := romfs
BANNER_AUDIO := meta/audio.wav
BANNER_IMAGE := meta/banner.png
ICON := meta/icon.png

# INTERNAL #

include buildtools/make_base

3DS_IP     := CHANGEME
3DS_PORT   := 5000
SERVEFILES := servefiles.py

network_install: output/LumaLocaleSwitcher-$(VERSION).cia
	$(SERVEFILES) $(3DS_IP) output/LumaLocaleSwitcher-$(VERSION).cia

QRENCODE     := qrencode
PROJECT_NAME := LumaLocaleSwitcher
qrencode:
	$(QRENCODE) -s 5 -o qr/$(VERSION).png https://github.com/$(AUTHOR)/$(PROJECT_NAME)/releases/download/$(VERSION)/LumaLocaleSwitcher-$(VERSION).cia
	rm qr/latest.png
	ln -s $(VERSION).png qr/latest.png


================================================
FILE: README.md
================================================
# LumaLocaleSwitcher

LumaLocaleSwitcher can be used to manage per-title locales for Luma3ds (and
compatible forks such as SaltFW).

## Status: Legacy / Archived.

This project is no longer maintained. It remains here as a historical reference for the 3DS homebrew community.

## Installation

Install the .cia file using a CIA installer such as
[FBI](https://github.com/Steveice10/FBI/releases).

If you are using the latest *stable* version of Luma3DS (6.6 at this time), scan
the QR code below for the latest release (0.04):

![QR](https://raw.githubusercontent.com/Possum/LumaLocaleSwitcher/master/qr/0.04.png)

If you are using the *nightly* version of Luma3DS (or any version newer than
6.6), scan the QR code below for the latest nightly compatible release
(0.04-NIGHTLY):

![QR](https://raw.githubusercontent.com/Possum/LumaLocaleSwitcher/master/qr/0.04-NIGHTLY.png)


## Compiling

Requires [devkitARM](http://sourceforge.net/projects/devkitpro/files/devkitARM/)
and [citro3d](https://github.com/fincs/citro3d) to build. You also need
[ctrulib](https://github.com/smealum/ctrulib).

To build, just call `git submodule sync` (to pull in
[svchax](https://github.com/aliaspider/svchax) and
[buildtools](git://github.com/Steveice10/buildtools)) and `make` and you should
be good.

If you are running nightly Luma3DS, you can run `make LUMA_NIGHTLY=1` to get
sane defaults.

##  Set up

If you use Luma3DS and downloaded the appropriate verison (*nightly* or
*stable*) you can just select "Titles" and make your changes.  Otherwise, you
can choose from the list in the app or write your own custom path to
/locales.conf

## Known Limitations

Currently requires the parent path of the locales directory to exist (i.e., it
will not create the directory for you). If it fails to set the region for a
title, that is the most likely reason. If you are using Luma, and everything is
properly set up, this shouldn't affect you, since the "/luma" directory should
exist anyway.

There is no file chooser built in, so if you need to use a custom directory for
some reason, you will have to write the path to /locales.conf manually.

Changing the region emulation is not always a magic bullet, due to the way the
3ds services are set up. In particular, online play may not function even when
the region emulation is set up properly. If you are receiving 003-0399, either
give up or use a search engine to look for workarounds.

## Bugs

Please report bugs at https://github.com/Possum/LumaLocaleSwitcher/

## Credits

This is largely based on [FBI](https://github.com/Steveice10/FBI) by
[Steveice10](https://github.com/Steveice10).

This also would not be possible without
[AuroraWright](https://github.com/AuroraWright)'s work on the region emulation
feature in [Luma3DS](https://github.com/AuroraWright/Luma3DS/).

### Contributors

* [Possum](https://github.com/Possum) - project maintainer
* CouldBeWolf - beta tester, bug reports
* ericjwg - patches, testing, and building

As always, big thanks to the 3DS homebrew/CFW community!

## License

This software is provided under the MIT license. Please see LICENSE.txt for full
details for the license.


================================================
FILE: romfs/textcolor.cfg
================================================
text=FF000000
nand=FF0000FF
sd=FF00FF00
gamecard=FFFF0000
dstitle=FF82004B
important=FF0000FF
outdated=FF0000FF


================================================
FILE: source/core/default.v.pica
================================================
; Uniforms
.fvec projection[4]

; Constants
.constf myconst(0.0, 1.0, -1.0, 0.1)
.constf myconst2(0.3, 0.0, 0.0, 0.0)
.alias  zeros myconst.xxxx ; Vector full of zeros
.alias  ones  myconst.yyyy ; Vector full of ones

; Outputs
.out outpos position
.out outtc0 texcoord0

; Inputs (defined as aliases for convenience)
.alias inpos v0
.alias intex v1

.bool test

.proc main
	; Force the w component of inpos to be 1.0
	mov r0.xyz, inpos
	mov r0.w,   ones

	; outpos = projectionMatrix * inpos
	dp4 outpos.x, projection[0], r0
	dp4 outpos.y, projection[1], r0
	dp4 outpos.z, projection[2], r0
	dp4 outpos.w, projection[3], r0

 	mov outtc0, intex

	end
.end

================================================
FILE: source/core/screen.c
================================================
#include <errno.h>
#include <malloc.h>
#include <stdio.h>
#include <stdlib.h>

#include <3ds.h>
#include <citro3d.h>

#include "../stb_image/stb_image.h"
#include "screen.h"
#include "util.h"

#include "default_shbin.h"

static GX_TRANSFER_FORMAT gpu_to_gx_format[13] = {
        GX_TRANSFER_FMT_RGBA8,
        GX_TRANSFER_FMT_RGB8,
        GX_TRANSFER_FMT_RGB5A1,
        GX_TRANSFER_FMT_RGB565,
        GX_TRANSFER_FMT_RGBA4,
        GX_TRANSFER_FMT_RGBA8, // Unsupported
        GX_TRANSFER_FMT_RGBA8, // Unsupported
        GX_TRANSFER_FMT_RGBA8, // Unsupported
        GX_TRANSFER_FMT_RGBA8, // Unsupported
        GX_TRANSFER_FMT_RGBA8, // Unsupported
        GX_TRANSFER_FMT_RGBA8, // Unsupported
        GX_TRANSFER_FMT_RGBA8, // Unsupported
        GX_TRANSFER_FMT_RGBA8  // Unsupported
};

static bool c3d_initialized;

static bool shader_initialized;
static DVLB_s* dvlb;
static shaderProgram_s program;

static C3D_RenderTarget* target_top;
static C3D_RenderTarget* target_bottom;
static C3D_Mtx projection_top;
static C3D_Mtx projection_bottom;

static C3D_Tex* glyph_sheets;

static u8 base_alpha = 0xFF;

static u32 color_config[MAX_COLORS] = {0xFF000000};

static struct {
    bool allocated;
    C3D_Tex tex;
    u32 width;
    u32 height;
} textures[MAX_TEXTURES];

static FILE* screen_open_resource(const char* path) {
    u32 realPathSize = strlen(path) + 17;
    char realPath[realPathSize];

    snprintf(realPath, realPathSize, "sdmc:/fbi/theme/%s", path);
    FILE* fd = fopen(realPath, "rb");

    if(fd != NULL) {
        return fd;
    } else {
        snprintf(realPath, realPathSize, "romfs:/%s", path);

        return fopen(realPath, "rb");
    }
}

static void screen_set_blend(u32 color, bool rgb, bool alpha) {
    C3D_TexEnv* env = C3D_GetTexEnv(0);
    if(env == NULL) {
        util_panic("Failed to retrieve combiner settings.");
        return;
    }

    if(rgb) {
        C3D_TexEnvSrc(env, C3D_RGB, GPU_CONSTANT, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR);
        C3D_TexEnvFunc(env, C3D_RGB, GPU_REPLACE);
    } else {
        C3D_TexEnvSrc(env, C3D_RGB, GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR);
        C3D_TexEnvFunc(env, C3D_RGB, GPU_REPLACE);
    }

    if(alpha) {
        C3D_TexEnvSrc(env, C3D_Alpha, GPU_TEXTURE0, GPU_CONSTANT, GPU_PRIMARY_COLOR);
        C3D_TexEnvFunc(env, C3D_Alpha, GPU_MODULATE);
    } else {
        C3D_TexEnvSrc(env, C3D_Alpha, GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR);
        C3D_TexEnvFunc(env, C3D_Alpha, GPU_REPLACE);
    }

    C3D_TexEnvColor(env, color);
}

void screen_init() {
    if(!C3D_Init(C3D_DEFAULT_CMDBUF_SIZE * 4)) {
        util_panic("Failed to initialize the GPU.");
        return;
    }

    c3d_initialized = true;

    u32 displayFlags = GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGB8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO);

    target_top = C3D_RenderTargetCreate(TOP_SCREEN_HEIGHT, TOP_SCREEN_WIDTH, GPU_RB_RGB8, 0);
    if(target_top == NULL) {
        util_panic("Failed to initialize the top screen target.");
        return;
    }

    C3D_RenderTargetSetOutput(target_top, GFX_TOP, GFX_LEFT, displayFlags);
    C3D_RenderTargetSetClear(target_top, C3D_CLEAR_ALL, 0, 0);

    target_bottom = C3D_RenderTargetCreate(BOTTOM_SCREEN_HEIGHT, BOTTOM_SCREEN_WIDTH, GPU_RB_RGB8, 0);
    if(target_bottom == NULL) {
        util_panic("Failed to initialize the bottom screen target.");
        return;
    }

    C3D_RenderTargetSetOutput(target_bottom, GFX_BOTTOM, GFX_LEFT, displayFlags);
    C3D_RenderTargetSetClear(target_bottom, C3D_CLEAR_ALL, 0, 0);

    Mtx_OrthoTilt(&projection_top, 0.0, TOP_SCREEN_WIDTH, TOP_SCREEN_HEIGHT, 0.0, 0.0, 1.0, true);
    Mtx_OrthoTilt(&projection_bottom, 0.0, BOTTOM_SCREEN_WIDTH, BOTTOM_SCREEN_HEIGHT, 0.0, 0.0, 1.0, true);

    dvlb = DVLB_ParseFile((u32*) default_shbin, default_shbin_len);
    if(dvlb == NULL) {
        util_panic("Failed to parse shader.");
        return;
    }

    Result progInitRes = shaderProgramInit(&program);
    if(R_FAILED(progInitRes)) {
        util_panic("Failed to initialize shader program: 0x%08lX", progInitRes);
        return;
    }

    shader_initialized = true;

    Result progSetVshRes = shaderProgramSetVsh(&program, &dvlb->DVLE[0]);
    if(R_FAILED(progSetVshRes)) {
        util_panic("Failed to set up vertex shader: 0x%08lX", progInitRes);
        return;
    }

    C3D_BindProgram(&program);

    C3D_AttrInfo* attrInfo = C3D_GetAttrInfo();
    if(attrInfo == NULL) {
        util_panic("Failed to retrieve attribute info.");
        return;
    }

    AttrInfo_Init(attrInfo);
    AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3);
    AttrInfo_AddLoader(attrInfo, 1, GPU_FLOAT, 2);

    C3D_DepthTest(true, GPU_GEQUAL, GPU_WRITE_ALL);

    screen_set_blend(0, false, false);

    Result fontMapRes = fontEnsureMapped();
    if(R_FAILED(fontMapRes)) {
        util_panic("Failed to map system font: 0x%08lX", fontMapRes);
        return;
    }

    TGLP_s* glyphInfo = fontGetGlyphInfo();
    glyph_sheets = calloc(glyphInfo->nSheets, sizeof(C3D_Tex));
    if(glyph_sheets == NULL) {
        util_panic("Failed to allocate font glyph texture data.");
        return;
    }

    for(int i = 0; i < glyphInfo->nSheets; i++) {
        C3D_Tex* tex = &glyph_sheets[i];
        tex->data = fontGetGlyphSheetTex(i);
        tex->fmt = (GPU_TEXCOLOR) glyphInfo->sheetFmt;
        tex->size = glyphInfo->sheetSize;
        tex->width = glyphInfo->sheetWidth;
        tex->height = glyphInfo->sheetHeight;
        tex->param = GPU_TEXTURE_MAG_FILTER(GPU_LINEAR) | GPU_TEXTURE_MIN_FILTER(GPU_LINEAR) | GPU_TEXTURE_WRAP_S(GPU_CLAMP_TO_EDGE) | GPU_TEXTURE_WRAP_T(GPU_CLAMP_TO_EDGE);
    }

    FILE* fd = screen_open_resource("textcolor.cfg");
    if(fd == NULL) {
        util_panic("Failed to open text color config: %s\n", strerror(errno));
        return;
    }

    char line[128];
    while(fgets(line, sizeof(line), fd) != NULL) {
        char* newline = strchr(line, '\n');
        if(newline != NULL) {
            *newline = '\0';
        }

        char* equals = strchr(line, '=');
        if(equals != NULL) {
            char key[64] = {'\0'};
            char value[64] = {'\0'};

            strncpy(key, line, equals - line);
            strncpy(value, equals + 1, strlen(equals) - 1);

            u32 color = strtoul(value, NULL, 16);

            if(strcasecmp(key, "text") == 0) {
                color_config[COLOR_TEXT] = color;
            } else if(strcasecmp(key, "nand") == 0) {
                color_config[COLOR_NAND] = color;
            } else if(strcasecmp(key, "sd") == 0) {
                color_config[COLOR_SD] = color;
            } else if(strcasecmp(key, "gamecard") == 0) {
                color_config[COLOR_GAME_CARD] = color;
            } else if(strcasecmp(key, "dstitle") == 0) {
                color_config[COLOR_DS_TITLE] = color;
            } else if(strcasecmp(key, "important") == 0) {
                color_config[COLOR_IMPORTANT] = color;
            } else if(strcasecmp(key, "outdated") == 0) {
                color_config[COLOR_OUTDATED] = color;
            }
        }
    }

    fclose(fd);

    screen_load_texture_file(TEXTURE_BOTTOM_SCREEN_BG, "bottom_screen_bg.png", true);
    screen_load_texture_file(TEXTURE_BOTTOM_SCREEN_TOP_BAR, "bottom_screen_top_bar.png", true);
    screen_load_texture_file(TEXTURE_BOTTOM_SCREEN_TOP_BAR_SHADOW, "bottom_screen_top_bar_shadow.png", true);
    screen_load_texture_file(TEXTURE_BOTTOM_SCREEN_BOTTOM_BAR, "bottom_screen_bottom_bar.png", true);
    screen_load_texture_file(TEXTURE_BOTTOM_SCREEN_BOTTOM_BAR_SHADOW, "bottom_screen_bottom_bar_shadow.png", true);
    screen_load_texture_file(TEXTURE_TOP_SCREEN_BG, "top_screen_bg.png", true);
    screen_load_texture_file(TEXTURE_TOP_SCREEN_TOP_BAR, "top_screen_top_bar.png", true);
    screen_load_texture_file(TEXTURE_TOP_SCREEN_TOP_BAR_SHADOW, "top_screen_top_bar_shadow.png", true);
    screen_load_texture_file(TEXTURE_TOP_SCREEN_BOTTOM_BAR, "top_screen_bottom_bar.png", true);
    screen_load_texture_file(TEXTURE_TOP_SCREEN_BOTTOM_BAR_SHADOW, "top_screen_bottom_bar_shadow.png", true);
    screen_load_texture_file(TEXTURE_LOGO, "logo.png", true);
    screen_load_texture_file(TEXTURE_SELECTION_OVERLAY, "selection_overlay.png", true);
    screen_load_texture_file(TEXTURE_SCROLL_BAR, "scroll_bar.png", true);
    screen_load_texture_file(TEXTURE_BUTTON_SMALL, "button_small.png", true);
    screen_load_texture_file(TEXTURE_BUTTON_LARGE, "button_large.png", true);
    screen_load_texture_file(TEXTURE_PROGRESS_BAR_BG, "progress_bar_bg.png", true);
    screen_load_texture_file(TEXTURE_PROGRESS_BAR_CONTENT, "progress_bar_content.png", true);
    screen_load_texture_file(TEXTURE_META_INFO_BOX, "meta_info_box.png", true);
    screen_load_texture_file(TEXTURE_META_INFO_BOX_SHADOW, "meta_info_box_shadow.png", true);
    screen_load_texture_file(TEXTURE_BATTERY_CHARGING, "battery_charging.png", true);
    screen_load_texture_file(TEXTURE_BATTERY_0, "battery0.png", true);
    screen_load_texture_file(TEXTURE_BATTERY_1, "battery1.png", true);
    screen_load_texture_file(TEXTURE_BATTERY_2, "battery2.png", true);
    screen_load_texture_file(TEXTURE_BATTERY_3, "battery3.png", true);
    screen_load_texture_file(TEXTURE_BATTERY_4, "battery4.png", true);
    screen_load_texture_file(TEXTURE_BATTERY_5, "battery5.png", true);
    screen_load_texture_file(TEXTURE_WIFI_DISCONNECTED, "wifi_disconnected.png", true);
    screen_load_texture_file(TEXTURE_WIFI_0, "wifi0.png", true);
    screen_load_texture_file(TEXTURE_WIFI_1, "wifi1.png", true);
    screen_load_texture_file(TEXTURE_WIFI_2, "wifi2.png", true);
    screen_load_texture_file(TEXTURE_WIFI_3, "wifi3.png", true);
}

void screen_exit() {
    for(u32 id = 0; id < MAX_TEXTURES; id++) {
        screen_unload_texture(id);
    }

    if(glyph_sheets != NULL) {
        free(glyph_sheets);
        glyph_sheets = NULL;
    }

    if(shader_initialized) {
        shaderProgramFree(&program);
        shader_initialized = false;
    }

    if(dvlb != NULL) {
        DVLB_Free(dvlb);
        dvlb = NULL;
    }

    if(target_top != NULL) {
        C3D_RenderTargetDelete(target_top);
        target_top = NULL;
    }

    if(target_bottom != NULL) {
        C3D_RenderTargetDelete(target_bottom);
        target_bottom = NULL;
    }

    if(c3d_initialized) {
        C3D_Fini();
        c3d_initialized = false;
    }
}

void screen_set_base_alpha(u8 alpha) {
    base_alpha = alpha;
}

static u32 screen_next_pow_2(u32 i) {
    i--;
    i |= i >> 1;
    i |= i >> 2;
    i |= i >> 4;
    i |= i >> 8;
    i |= i >> 16;
    i++;

    return i;
}

u32 screen_allocate_free_texture() {
    u32 id = 0;
    for(u32 i = 1; i < MAX_TEXTURES; i++) {
        if(!textures[i].allocated) {
            textures[i].allocated = true;

            id = i;
            break;
        }
    }

    if(id == 0) {
        util_panic("Out of free textures.");
        return 0;
    }

    return id;
}

void screen_load_texture(u32 id, void* data, u32 size, u32 width, u32 height, GPU_TEXCOLOR format, bool linearFilter) {
    if(id >= MAX_TEXTURES) {
        util_panic("Attempted to load buffer to invalid texture ID \"%lu\".", id);
        return;
    }

    u32 pow2Width = screen_next_pow_2(width);
    if(pow2Width < 64) {
        pow2Width = 64;
    }

    u32 pow2Height = screen_next_pow_2(height);
    if(pow2Height < 64) {
        pow2Height = 64;
    }

    u32 pixelSize = size / width / height;

    u8* pow2Tex = linearAlloc(pow2Width * pow2Height * pixelSize);
    if(pow2Tex == NULL) {
        util_panic("Failed to allocate temporary texture buffer.");
        return;
    }

    memset(pow2Tex, 0, pow2Width * pow2Height * pixelSize);

    for(u32 x = 0; x < width; x++) {
        for(u32 y = 0; y < height; y++) {
            u32 dataPos = (y * width + x) * pixelSize;
            u32 pow2TexPos = (y * pow2Width + x) * pixelSize;

            for(u32 i = 0; i < pixelSize; i++) {
                pow2Tex[pow2TexPos + i] = ((u8*) data)[dataPos + i];
            }
        }
    }

    if(textures[id].tex.data != NULL && (textures[id].tex.size != size || textures[id].tex.width != pow2Width || textures[id].tex.height != pow2Height || textures[id].tex.fmt != format)) {
        C3D_TexDelete(&textures[id].tex);
    }

    if(textures[id].tex.data == NULL && !C3D_TexInit(&textures[id].tex, (int) pow2Width, (int) pow2Height, format)) {
        util_panic("Failed to initialize texture with ID \"%lu\".", id);
        return;
    }

    C3D_TexSetFilter(&textures[id].tex, linearFilter ? GPU_LINEAR : GPU_NEAREST, GPU_NEAREST);

    Result flushRes = GSPGPU_FlushDataCache(pow2Tex, pow2Width * pow2Height * pixelSize);
    if(R_FAILED(flushRes)) {
        util_panic("Failed to flush buffer for texture ID \"%lu\": 0x%08lX", id, flushRes);
        return;
    }

    C3D_SafeDisplayTransfer((u32*) pow2Tex, GX_BUFFER_DIM(pow2Width, pow2Height), (u32*) textures[id].tex.data, GX_BUFFER_DIM(pow2Width, pow2Height), GX_TRANSFER_FLIP_VERT(1) | GX_TRANSFER_OUT_TILED(1) | GX_TRANSFER_RAW_COPY(0) | GX_TRANSFER_IN_FORMAT((u32) gpu_to_gx_format[format]) | GX_TRANSFER_OUT_FORMAT((u32) gpu_to_gx_format[format]) | GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO));
    gspWaitForPPF();

    textures[id].allocated = true;
    textures[id].width = width;
    textures[id].height = height;

    linearFree(pow2Tex);
}

void screen_load_texture_file(u32 id, const char* path, bool linearFilter) {
    if(id >= MAX_TEXTURES) {
        util_panic("Attempted to load path \"%s\" to invalid texture ID \"%lu\".", path, id);
        return;
    }

    FILE* fd = screen_open_resource(path);
    if(fd == NULL) {
        util_panic("Failed to load PNG file \"%s\": %s", path, strerror(errno));
        return;
    }

    int width;
    int height;
    int depth;
    u8* image = stbi_load_from_file(fd, &width, &height, &depth, STBI_rgb_alpha);
    fclose(fd);

    if(image == NULL || depth != STBI_rgb_alpha) {
        util_panic("Failed to load PNG file \"%s\".", path);
        return;
    }

    for(u32 x = 0; x < width; x++) {
        for(u32 y = 0; y < height; y++) {
            u32 pos = (y * width + x) * 4;

            u8 c1 = image[pos + 0];
            u8 c2 = image[pos + 1];
            u8 c3 = image[pos + 2];
            u8 c4 = image[pos + 3];

            image[pos + 0] = c4;
            image[pos + 1] = c3;
            image[pos + 2] = c2;
            image[pos + 3] = c1;
        }
    }

    screen_load_texture(id, image, (u32) (width * height * 4), (u32) width, (u32) height, GPU_RGBA8, linearFilter);

    free(image);
}

static u32 screen_tiled_texture_index(u32 x, u32 y, u32 w, u32 h) {
    return (((y >> 3) * (w >> 3) + (x >> 3)) << 6) + ((x & 1) | ((y & 1) << 1) | ((x & 2) << 1) | ((y & 2) << 2) | ((x & 4) << 2) | ((y & 4) << 3));
}

void screen_load_texture_tiled(u32 id, void* tiledData, u32 size, u32 width, u32 height, GPU_TEXCOLOR format, bool linearFilter) {
    if(id >= MAX_TEXTURES) {
        util_panic("Attempted to load tiled data to invalid texture ID \"%lu\".", id);
        return;
    }

    u8* untiledData = (u8*) calloc(size, sizeof(u8));
    if(untiledData == NULL) {
        util_panic("Failed to allocate buffer for texture untiling.");
        return;
    }

    u32 pixelSize = size / width / height;

    for(u32 x = 0; x < width; x++) {
        for(u32 y = 0; y < height; y++) {
            u32 tiledDataPos = screen_tiled_texture_index(x, y, width, height) * pixelSize;
            u32 untiledDataPos = (y * width + x) * pixelSize;

            for(u32 i = 0; i < pixelSize; i++) {
                untiledData[untiledDataPos + i] = ((u8*) tiledData)[tiledDataPos + i];
            }
        }
    }

    screen_load_texture(id, untiledData, size, width, height, format, linearFilter);

    free(untiledData);
}

void screen_unload_texture(u32 id) {
    if(id >= MAX_TEXTURES) {
        util_panic("Attempted to unload invalid texture ID \"%lu\".", id);
        return;
    }

    C3D_TexDelete(&textures[id].tex);

    textures[id].allocated = false;
    textures[id].width = 0;
    textures[id].height = 0;
}

void screen_get_texture_size(u32* width, u32* height, u32 id) {
    if(id >= MAX_TEXTURES) {
        util_panic("Attempted to get size of invalid texture ID \"%lu\".", id);
        return;
    }

    if(width) {
        *width = textures[id].width;
    }

    if(height) {
        *height = textures[id].height;
    }
}

static void screen_draw_quad(float x1, float y1, float x2, float y2, float tx1, float ty1, float tx2, float ty2) {
    C3D_ImmDrawBegin(GPU_TRIANGLES);

    C3D_ImmSendAttrib(x1, y1, 0.5f, 0.0f);
    C3D_ImmSendAttrib(tx1, ty1, 0.0f, 0.0f);

    C3D_ImmSendAttrib(x2, y2, 0.5f, 0.0f);
    C3D_ImmSendAttrib(tx2, ty2, 0.0f, 0.0f);

    C3D_ImmSendAttrib(x2, y1, 0.5f, 0.0f);
    C3D_ImmSendAttrib(tx2, ty1, 0.0f, 0.0f);

    C3D_ImmSendAttrib(x1, y1, 0.5f, 0.0f);
    C3D_ImmSendAttrib(tx1, ty1, 0.0f, 0.0f);

    C3D_ImmSendAttrib(x1, y2, 0.5f, 0.0f);
    C3D_ImmSendAttrib(tx1, ty2, 0.0f, 0.0f);

    C3D_ImmSendAttrib(x2, y2, 0.5f, 0.0f);
    C3D_ImmSendAttrib(tx2, ty2, 0.0f, 0.0f);

    C3D_ImmDrawEnd();
}

void screen_begin_frame() {
    if(!C3D_FrameBegin(C3D_FRAME_SYNCDRAW)) {
        util_panic("Failed to begin frame.");
        return;
    }
}

void screen_end_frame() {
    C3D_FrameEnd(0);
}

void screen_select(gfxScreen_t screen) {
    if(!C3D_FrameDrawOn(screen == GFX_TOP ? target_top : target_bottom)) {
        util_panic("Failed to select render target.");
        return;
    }

    C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, shaderInstanceGetUniformLocation(program.vertexShader, "projection"), screen == GFX_TOP ? &projection_top : &projection_bottom);
}

void screen_draw_texture(u32 id, float x, float y, float width, float height) {
    if(id >= MAX_TEXTURES) {
        util_panic("Attempted to draw invalid texture ID \"%lu\".", id);
        return;
    }

    if(textures[id].tex.data == NULL) {
        return;
    }

    if(base_alpha != 0xFF) {
        screen_set_blend(base_alpha << 24, false, true);
    }

    C3D_TexBind(0, &textures[id].tex);
    screen_draw_quad(x, y, x + width, y + height, 0, 0, (float) textures[id].width / (float) textures[id].tex.width, (float) textures[id].height / (float) textures[id].tex.height);

    if(base_alpha != 0xFF) {
        screen_set_blend(0, false, false);
    }
}

void screen_draw_texture_crop(u32 id, float x, float y, float width, float height) {
    if(id >= MAX_TEXTURES) {
        util_panic("Attempted to draw invalid texture ID \"%lu\".", id);
        return;
    }

    if(textures[id].tex.data == NULL) {
        return;
    }

    if(base_alpha != 0xFF) {
        screen_set_blend(base_alpha << 24, false, true);
    }

    C3D_TexBind(0, &textures[id].tex);
    screen_draw_quad(x, y, x + width, y + height, 0, 0, width / (float) textures[id].tex.width, height / (float) textures[id].tex.height);

    if(base_alpha != 0xFF) {
        screen_set_blend(0, false, false);
    }
}

float screen_get_font_height(float scaleY) {
    return scaleY * fontGetInfo()->lineFeed;
}

static void screen_get_string_size_internal(float* width, float* height, const char* text, float scaleX, float scaleY, bool oneLine, bool wrap, float wrapX) {
    float w = 0;
    float h = 0;
    float lineWidth = 0;

    if(text != NULL) {
        h = scaleY * fontGetInfo()->lineFeed;

        const uint8_t* p = (const uint8_t*) text;
        const uint8_t* lastAlign = p;
        u32 code = 0;
        ssize_t units = -1;
        while(*p && (units = decode_utf8(&code, p)) != -1 && code > 0) {
            p += units;

            if(code == '\n' || (wrap && lineWidth + scaleX * fontGetCharWidthInfo(fontGlyphIndexFromCodePoint(code))->charWidth >= wrapX)) {
                lastAlign = p;

                if(lineWidth > w) {
                    w = lineWidth;
                }

                lineWidth = 0;

                if(oneLine) {
                    break;
                }

                h += scaleY * fontGetInfo()->lineFeed;
            }

            if(code != '\n') {
                u32 num = 1;
                if(code == '\t') {
                    code = ' ';
                    num = 4 - (p - units - lastAlign) % 4;

                    lastAlign = p;
                }

                lineWidth += (scaleX * fontGetCharWidthInfo(fontGlyphIndexFromCodePoint(code))->charWidth) * num;
            }
        }
    }

    if(width) {
        *width = lineWidth > w ? lineWidth : w;
    }

    if(height) {
        *height = h;
    }
}

void screen_get_string_size(float* width, float* height, const char* text, float scaleX, float scaleY) {
    screen_get_string_size_internal(width, height, text, scaleX, scaleY, false, false, 0);
}

void screen_get_string_size_wrap(float* width, float* height, const char* text, float scaleX, float scaleY, float wrapX) {
    screen_get_string_size_internal(width, height, text, scaleX, scaleY, false, true, wrapX);
}

static void screen_draw_string_internal(const char* text, float x, float y, float scaleX, float scaleY, u32 colorId, bool centerLines, bool wrap, float wrapX) {
    if(text == NULL) {
        return;
    }

    if(colorId >= MAX_COLORS) {
        util_panic("Attempted to draw string with invalid color ID \"%lu\".", colorId);
        return;
    }

    u32 blendColor = color_config[colorId];
    if(base_alpha != 0xFF) {
        float alpha1 = ((blendColor >> 24) & 0xFF) / 255.0f;
        float alpha2 = base_alpha / 255.0f;
        float blendedAlpha = alpha1 * alpha2;

        blendColor = (((u32) (blendedAlpha * 0xFF)) << 24) | (blendColor & 0x00FFFFFF);
    }

    screen_set_blend(blendColor, true, true);

    float stringWidth;
    screen_get_string_size_internal(&stringWidth, NULL, text, scaleX, scaleY, false, wrap, wrapX);

    float lineWidth;
    screen_get_string_size_internal(&lineWidth, NULL, text, scaleX, scaleY, true, wrap, wrapX);

    float currX = x;
    if(centerLines) {
        currX += (stringWidth - lineWidth) / 2;
    }

    int lastSheet = -1;

    const uint8_t* p = (const uint8_t*) text;
    const uint8_t* lastAlign = p;
    u32 code = 0;
    ssize_t units = -1;
    while(*p && (units = decode_utf8(&code, p)) != -1 && code > 0) {
        p += units;

        if(code == '\n' || (wrap && currX + scaleX * fontGetCharWidthInfo(fontGlyphIndexFromCodePoint(code))->charWidth >= wrapX)) {
            lastAlign = p;

            screen_get_string_size_internal(&lineWidth, NULL, (const char*) p, scaleX, scaleY, true, wrap, wrapX);

            currX = x;
            if(centerLines) {
                currX += (stringWidth - lineWidth) / 2;
            }

            y += scaleY * fontGetInfo()->lineFeed;
        }

        if(code != '\n') {
            u32 num = 1;
            if(code == '\t') {
                code = ' ';
                num = 4 - (p - units - lastAlign) % 4;

                lastAlign = p;
            }

            fontGlyphPos_s data;
            fontCalcGlyphPos(&data, fontGlyphIndexFromCodePoint(code), GLYPH_POS_CALC_VTXCOORD, scaleX, scaleY);

            if(data.sheetIndex != lastSheet) {
                lastSheet = data.sheetIndex;
                C3D_TexBind(0, &glyph_sheets[lastSheet]);
            }

            for(u32 i = 0; i < num; i++) {
                screen_draw_quad(currX + data.vtxcoord.left, y + data.vtxcoord.top, currX + data.vtxcoord.right, y + data.vtxcoord.bottom, data.texcoord.left, data.texcoord.top, data.texcoord.right, data.texcoord.bottom);

                currX += data.xAdvance;
            }
        }
    }

    screen_set_blend(0, false, false);
}

void screen_draw_string(const char* text, float x, float y, float scaleX, float scaleY, u32 colorId, bool centerLines) {
    screen_draw_string_internal(text, x, y, scaleX, scaleY, colorId, centerLines, false, 0);
}

void screen_draw_string_wrap(const char* text, float x, float y, float scaleX, float scaleY, u32 colorId, bool centerLines, float wrapX) {
    screen_draw_string_internal(text, x, y, scaleX, scaleY, colorId, centerLines, true, wrapX);
}


================================================
FILE: source/core/screen.h
================================================
#pragma once

#define TOP_SCREEN_WIDTH 400
#define TOP_SCREEN_HEIGHT 240

#define BOTTOM_SCREEN_WIDTH 320
#define BOTTOM_SCREEN_HEIGHT 240

#define MAX_TEXTURES 1024

#define TEXTURE_BOTTOM_SCREEN_BG 1
#define TEXTURE_BOTTOM_SCREEN_TOP_BAR 2
#define TEXTURE_BOTTOM_SCREEN_TOP_BAR_SHADOW 3
#define TEXTURE_BOTTOM_SCREEN_BOTTOM_BAR 4
#define TEXTURE_BOTTOM_SCREEN_BOTTOM_BAR_SHADOW 5
#define TEXTURE_TOP_SCREEN_BG 6
#define TEXTURE_TOP_SCREEN_TOP_BAR 7
#define TEXTURE_TOP_SCREEN_TOP_BAR_SHADOW 8
#define TEXTURE_TOP_SCREEN_BOTTOM_BAR 9
#define TEXTURE_TOP_SCREEN_BOTTOM_BAR_SHADOW 10
#define TEXTURE_LOGO 11
#define TEXTURE_SELECTION_OVERLAY 12
#define TEXTURE_SCROLL_BAR 13
#define TEXTURE_BUTTON_SMALL 14
#define TEXTURE_BUTTON_LARGE 15
#define TEXTURE_PROGRESS_BAR_BG 16
#define TEXTURE_PROGRESS_BAR_CONTENT 17
#define TEXTURE_META_INFO_BOX 18
#define TEXTURE_META_INFO_BOX_SHADOW 19
#define TEXTURE_BATTERY_CHARGING 20
#define TEXTURE_BATTERY_0 21
#define TEXTURE_BATTERY_1 22
#define TEXTURE_BATTERY_2 23
#define TEXTURE_BATTERY_3 24
#define TEXTURE_BATTERY_4 25
#define TEXTURE_BATTERY_5 26
#define TEXTURE_WIFI_DISCONNECTED 27
#define TEXTURE_WIFI_0 28
#define TEXTURE_WIFI_1 29
#define TEXTURE_WIFI_2 30
#define TEXTURE_WIFI_3 31

enum {
    COLOR_TEXT = 0,
    COLOR_NAND,
    COLOR_SD,
    COLOR_GAME_CARD,
    COLOR_DS_TITLE,
    COLOR_IMPORTANT,
    COLOR_OUTDATED,
    MAX_COLORS
};

void screen_init();
void screen_exit();
void screen_set_base_alpha(u8 alpha);
u32 screen_allocate_free_texture();
void screen_load_texture(u32 id, void* data, u32 size, u32 width, u32 height, GPU_TEXCOLOR format, bool linearFilter);
void screen_load_texture_file(u32 id, const char* path, bool linearFilter);
void screen_load_texture_tiled(u32 id, void* tiledData, u32 size, u32 width, u32 height, GPU_TEXCOLOR format, bool linearFilter);
void screen_load_texture_screenshot(u32 id, gfxScreen_t screen);
void screen_unload_texture(u32 id);
void screen_get_texture_size(u32* width, u32* height, u32 id);
void screen_begin_frame();
void screen_end_frame();
void screen_select(gfxScreen_t screen);
void screen_draw_texture(u32 id, float x, float y, float width, float height);
void screen_draw_texture_crop(u32 id, float x, float y, float width, float height);
float screen_get_font_height(float scaleY);
void screen_get_string_size(float* width, float* height, const char* text, float scaleX, float scaleY);
void screen_get_string_size_wrap(float* width, float* height, const char* text, float scaleX, float scaleY, float wrapX);
void screen_draw_string(const char* text, float x, float y, float scaleX, float scaleY, u32 colorId, bool centerLines);
void screen_draw_string_wrap(const char* text, float x, float y, float scaleX, float scaleY, u32 colorId, bool centerLines, float wrapX);


================================================
FILE: source/core/util.c
================================================
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <3ds.h>
#include <3ds/services/fs.h>

#include "util.h"
#include "../ui/section/task/task.h"

extern void cleanup();

static int util_get_line_length(PrintConsole* console, const char* str) {
    int lineLength = 0;
    while(*str != 0) {
        if(*str == '\n') {
            break;
        }

        lineLength++;
        if(lineLength >= console->consoleWidth - 1) {
            break;
        }

        str++;
    }

    return lineLength;
}

static int util_get_lines(PrintConsole* console, const char* str) {
    int lines = 1;
    int lineLength = 0;
    while(*str != 0) {
        if(*str == '\n') {
            lines++;
            lineLength = 0;
        } else {
            lineLength++;
            if(lineLength >= console->consoleWidth - 1) {
                lines++;
                lineLength = 0;
            }
        }

        str++;
    }

    return lines;
}

void util_panic(const char* s, ...) {
    va_list list;
    va_start(list, s);

    char buf[1024];
    vsnprintf(buf, 1024, s, list);

    va_end(list);

    gspWaitForVBlank();

    u16 width;
    u16 height;
    for(int i = 0; i < 2; i++) {
        memset(gfxGetFramebuffer(GFX_TOP, GFX_LEFT, &width, &height), 0, (size_t) (width * height * 3));
        memset(gfxGetFramebuffer(GFX_TOP, GFX_RIGHT, &width, &height), 0, (size_t) (width * height * 3));
        memset(gfxGetFramebuffer(GFX_BOTTOM, GFX_LEFT, &width, &height), 0, (size_t) (width * height * 3));

        gfxSwapBuffers();
    }

    PrintConsole* console = consoleInit(GFX_TOP, NULL);

    const char* header = "Application has encountered a fatal error!";
    const char* footer = "Press any button to exit.";

    printf("\x1b[0;0H");
    for(int i = 0; i < console->consoleWidth; i++) {
        printf("-");
    }

    printf("\x1b[%d;0H", console->consoleHeight - 1);
    for(int i = 0; i < console->consoleWidth; i++) {
        printf("-");
    }

    printf("\x1b[0;%dH%s", (console->consoleWidth - util_get_line_length(console, header)) / 2, header);
    printf("\x1b[%d;%dH%s", console->consoleHeight - 1, (console->consoleWidth - util_get_line_length(console, footer)) / 2, footer);

    int bufRow = (console->consoleHeight - util_get_lines(console, buf)) / 2;
    char* str = buf;
    while(*str != 0) {
        if(*str == '\n') {
            bufRow++;
            str++;
            continue;
        } else {
            int lineLength = util_get_line_length(console, str);

            char old = *(str + lineLength);
            *(str + lineLength) = '\0';
            printf("\x1b[%d;%dH%s", bufRow, (console->consoleWidth - lineLength) / 2, str);
            *(str + lineLength) = old;

            bufRow++;
            str += lineLength;
        }
    }

    gfxFlushBuffers();
    gspWaitForVBlank();

    while(aptMainLoop()) {
        hidScanInput();
        if(hidKeysDown() & ~KEY_TOUCH) {
            break;
        }

        gspWaitForVBlank();
    }

    cleanup();
    exit(1);
}

bool util_is_dir(FS_Archive* archive, const char* path) {
    Result res = 0;

    FS_Path* fsPath = util_make_path_utf8(path);
    if(fsPath != NULL) {
        Handle dirHandle = 0;
        if(R_SUCCEEDED(res = FSUSER_OpenDirectory(&dirHandle, *archive, *fsPath))) {
            FSDIR_Close(dirHandle);
        }

        util_free_path_utf8(fsPath);
    } else {
        res = R_OUT_OF_MEMORY;
    }

    return R_SUCCEEDED(res);
}

typedef struct {
    u32* count;
    void* data;
    bool (*filter)(void* data, FS_Archive* archive, const char* path, u32 attributes);
} count_data;

typedef struct {
    char*** contents;
    u32 index;
    void* data;
    bool (*filter)(void* data, FS_Archive* archive, const char* path, u32 attributes);
} populate_data;

void util_get_path_file(char* out, const char* path, u32 size) {
    const char* start = NULL;
    const char* end = NULL;
    const char* curr = path - 1;
    while((curr = strchr(curr + 1, '/')) != NULL) {
        start = end != NULL ? end : path;
        end = curr;
    }

    if(end - start == 0) {
        strncpy(out, "/", size);
    } else {
        u32 terminatorPos = end - start - 1 < size - 1 ? end - start - 1 : size - 1;
        strncpy(out, start + 1, terminatorPos);
        out[terminatorPos] = '\0';
    }
}

void util_get_parent_path(char* out, const char* path, u32 size) {
    size_t pathLen = strlen(path);

    const char* start = NULL;
    const char* end = NULL;
    const char* curr = path - 1;
    while((curr = strchr(curr + 1, '/')) != NULL && (start == NULL || curr != path + pathLen - 1)) {
        start = end != NULL ? end : path;
        end = curr;
    }

    u32 terminatorPos = end - path + 1 < size - 1 ? end - path + 1 : size - 1;
    strncpy(out, path, terminatorPos);
    out[terminatorPos] = '\0';
}

// Returns whether directory exists
Result util_get_locale_path(char* out, size_t size) {
    FILE* config_file = util_open_resource("/locales.conf");
    if (config_file != NULL) {
        char *buffer = (char*) calloc(size, sizeof(char));

        while(fgets(buffer, size, config_file) != NULL) {
            char* newline = strchr(buffer, '\n');
            buffer[size-1] = '\0';
            if(newline != NULL) {
                *newline = '\0';
            }

            // Some backwards compatibility stuff here..
            if (strstr(buffer, "%s") == NULL) {
                if (buffer[size-2] == '/')
                    strcat(buffer, "%s.txt");
                else
                    strcat(buffer, "/%s.txt");
            }

            strncpy(out, buffer, size);
            out[strlen(buffer)] = '\0';
            free(buffer);
            return 1;
        }
    }

    // Default to Luma
#ifdef LUMA_NIGHTLY
    char* fallback = "/luma/titles/%s/locale.txt";
#else
    char* fallback = "/luma/locales/%s.txt";
#endif
    strncpy(out, fallback, strlen(fallback));
    out[strlen(fallback)] = '\0';
    return 0;
}

Result util_ensure_dir(FS_Archive* archive, const char* path) {
    Result res = 0;

    if(!util_is_dir(archive, path)) {
        FS_Path* fsPath = util_make_path_utf8(path);
        if(fsPath != NULL) {
            FSUSER_DeleteFile(*archive, *fsPath);
            if (!R_SUCCEEDED(res = FSUSER_CreateDirectory(*archive, *fsPath, 0))) {
                // Try recursive
                int pathlen = strlen(path);

                for (int i = 0; i < pathlen; i++) {
                    if (path[i] == '/') {
                        char* parent_path = (char*) malloc(sizeof(char) * (i + 1));
                        strncpy(parent_path, path, i);
                        parent_path[i] = '\0';
                        FS_Path* fsParentPath = util_make_path_utf8(parent_path);
                        if (!R_SUCCEEDED(res = FSUSER_CreateDirectory(*archive, *fsParentPath, 0))) {
                            return res; // :(
                        }
                        util_free_path_utf8(fsParentPath);
                        free(parent_path);
                    }
                }

            }

            util_free_path_utf8(fsPath);
        } else {
            res = R_OUT_OF_MEMORY;
        }
    }

    return res;
}

// Populates `out` with the directory that is the most common ancestor
Result util_get_locale_dir(char* out, size_t size) {
    Result res = 0;

    // Probably not very efficient...
    if (R_SUCCEEDED(res = util_get_locale_path(out, size))) {
        char* substr = strstr(out, "%s"); // Finds the first instance of "%s"
        if (substr != NULL)
            out[strlen(out) - strlen(substr)] = '\0'; // Chops it off using "\0"
    }
    return res;
}

FS_Path* util_make_path_utf8(const char* path) {
    size_t len = strlen(path);

    u16* utf16 = (u16*) calloc(len + 1, sizeof(u16));
    if(utf16 == NULL) {
        return NULL;
    }

    ssize_t utf16Len = utf8_to_utf16(utf16, (const uint8_t*) path, len);

    FS_Path* fsPath = (FS_Path*) calloc(1, sizeof(FS_Path));
    if(fsPath == NULL) {
        free(utf16);
        return NULL;
    }

    fsPath->type = PATH_UTF16;
    fsPath->size = (utf16Len + 1) * sizeof(u16);
    fsPath->data = utf16;

    return fsPath;
}

void util_free_path_utf8(FS_Path* path) {
    free((void*) path->data);
    free(path);
}

FS_Path util_make_binary_path(const void* data, u32 size) {
    FS_Path path = {PATH_BINARY, size, data};
    return path;
}

int util_compare_u32(const void* e1, const void* e2) {
    u32 id1 = *(u32*) e1;
    u32 id2 = *(u32*) e2;

    return id1 > id2 ? 1 : id1 < id2 ? -1 : 0;
}

int util_compare_u64(const void* e1, const void* e2) {
    u64 id1 = *(u64*) e1;
    u64 id2 = *(u64*) e2;

    return id1 > id2 ? 1 : id1 < id2 ? -1 : 0;
}

FILE* util_open_resource(const char* path) {
    u32 realPathSize = strlen(path) + 16;
    char realPath[realPathSize];

    snprintf(realPath, realPathSize, "sdmc:/%s", path);
    FILE* fd = fopen(realPath, "rb");

    if(fd != NULL) {
        return fd;
    } else {
        snprintf(realPath, realPathSize, "romfs:/%s", path);

        return fopen(realPath, "rb");
    }
}


================================================
FILE: source/core/util.h
================================================
#pragma once

#include <stdio.h>

typedef struct {
    u16 shortDescription[0x40];
    u16 longDescription[0x80];
    u16 publisher[0x40];
} SMDH_title;

typedef struct {
    char magic[0x04];
    u16 version;
    u16 reserved1;
    SMDH_title titles[0x10];
    u8 ratings[0x10];
    u32 region;
    u32 matchMakerId;
    u64 matchMakerBitId;
    u32 flags;
    u16 eulaVersion;
    u16 reserved;
    u32 optimalBannerFrame;
    u32 streetpassId;
    u64 reserved2;
    u8 smallIcon[0x480];
    u8 largeIcon[0x1200];
} SMDH;

typedef struct {
    u8 version;
    bool animated;
    u16 crc16[4];
    u8 reserved[0x16];
    u8 mainIconBitmap[0x200];
    u16 mainIconPalette[0x10];
    u16 titles[16][0x80];
    u8 animatedFrameBitmaps[8][0x200];
    u16 animatedFramePalettes[8][0x10];
    u16 animationSequence[0x40];
} BNR;

void util_panic(const char* s, ...);

bool util_is_dir(FS_Archive* archive, const char* path);
void util_get_path_file(char* out, const char* path, u32 size);
void util_get_parent_path(char* out, const char* path, u32 size);
Result util_get_locale_path(char* out, size_t size);
Result util_get_locale_dir(char* out, size_t size);
Result util_ensure_dir(FS_Archive* archive, const char* path);

FS_Path* util_make_path_utf8(const char* path);
void util_free_path_utf8(FS_Path* path);
FS_Path util_make_binary_path(const void* data, u32 size);

int util_compare_u32(const void* e1, const void* e2);
int util_compare_u64(const void* e1, const void* e2);

FILE* util_open_resource(const char* path);
FS_Archive* util_get_sdmc_archive();


================================================
FILE: source/locale.c
================================================
#include <3ds.h>
#include <sys/syslimits.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "locale.h"
#include "core/util.h"

Region region_from_string(char* string) {
    return
        strcmp(string, "JPN") == 0 ? JPN :
        strcmp(string, "USA") == 0 ? USA :
        strcmp(string, "EUR") == 0 ? EUR :
        strcmp(string, "AUS") == 0 ? AUS :
        strcmp(string, "CHN") == 0 ? CHN :
        strcmp(string, "KOR") == 0 ? KOR :
        strcmp(string, "TWN") == 0 ? TWN :
        RGN_NONE;
}

Language language_from_string(char* string) {
    return
        strcmp(string, "JP") == 0 ? JP :
        strcmp(string, "EN") == 0 ? EN :
        strcmp(string, "FR") == 0 ? FR :
        strcmp(string, "DE") == 0 ? DE :
        strcmp(string, "IT") == 0 ? IT :
        strcmp(string, "ES") == 0 ? ES :
        strcmp(string, "ZH") == 0 ? ZH :
        strcmp(string, "KO") == 0 ? KO :
        strcmp(string, "NL") == 0 ? NL :
        strcmp(string, "PT") == 0 ? PT :
        strcmp(string, "RU") == 0 ? RU :
        strcmp(string, "TW") == 0 ? TW :
        LNG_NONE;
}

static const char* _Region_Strings[] = { "JPN", "USA", "EUR", "AUS", "CHN", "KOR", "TWN" };

const char* region_to_string(Region region) {
    if (region == RGN_NONE || region >= RGN_MAX)
        return "System Default";
    return _Region_Strings[region];
}

static const char* _Language_Strings[] = { "JP", "EN", "FR", "DE", "IT", "ES", "ZH", "KO", "NL", "PT", "RU", "TW"};

const char* language_to_string(Language language) {
    if (language == LNG_NONE || language >= LNG_MAX)
        return "System Default";
    return _Language_Strings[language];
}

char* locale_path_for_title(u64 titleId) {
    char* path = calloc(PATH_MAX, sizeof(char));
    char* cfg_path = calloc(PATH_MAX, sizeof(char));
    util_get_locale_path(cfg_path, PATH_MAX);

    char title_id_str[17];
    snprintf(title_id_str, 17, "%016llX", titleId);
    title_id_str[16] = '\0';

    snprintf(path, PATH_MAX, cfg_path, title_id_str);
    free(cfg_path);

    return path;
}

Locale* locale_for_title(u64 titleId) {

    FS_Archive sdmc_archive;

    Locale *locale_info = (Locale*) calloc(1, sizeof(Locale));

    // Defaults
    locale_info->region = RGN_NONE;
    locale_info->language = LNG_NONE;

    Result res;
    if (R_FAILED(res = FSUSER_OpenArchive(&sdmc_archive, ARCHIVE_SDMC, fsMakePath(PATH_EMPTY,"")))) {
        return locale_info;
    }

    Handle handle;
    char* path = locale_path_for_title(titleId);
    FS_Path* fs_path = util_make_path_utf8(path);
    if(R_FAILED(res = FSUSER_OpenFileDirectly(&handle, ARCHIVE_SDMC, fsMakePath(PATH_EMPTY,""), *fs_path, FS_OPEN_READ, 0))) {
        free(fs_path);
        free(path);
        return locale_info;
    }

    char* buffer = (char*) calloc(7, sizeof(char)); // ex., "JPN JP\0"
    u32 bytes_read;
    FSFILE_Read(handle, &bytes_read, 0, buffer, 6);
    FSFILE_Close(handle);

    util_free_path_utf8(fs_path);
    free(path);

    FSUSER_CloseArchive(sdmc_archive);

    if (bytes_read < 6) { // we need at least "JPN JP"
        locale_info->region = RGN_NONE;
        locale_info->language = LNG_NONE;
        return locale_info;
    }

    buffer[bytes_read] = '\0';

    char* region_str = (char*) calloc(4, sizeof(char));
    char* lang_str = (char*) calloc(3, sizeof(char));
    if (sscanf(buffer, "%3s %2s", region_str, lang_str) < 2) {
        locale_info->region = RGN_NONE;
        locale_info->language = LNG_NONE;
    }
    else {
        locale_info->region = region_from_string(region_str);
        locale_info->language = language_from_string(lang_str);
    }

    return locale_info;
}

Region region_for_title(u64 titleId) {
    Locale* locale = locale_for_title(titleId);
    return locale->region;
}

Language language_for_title(u64 titleId) {
    Locale* locale = locale_for_title(titleId);
    return locale->language;
}

Result _set_locale_for_title(u64 titleId, Locale* locale) {
    char* locale_dir = (char*) calloc(PATH_MAX, sizeof(char));
    util_get_locale_dir(locale_dir, PATH_MAX);
    FS_Archive sdmc_archive;

    Result res;
    if (R_FAILED(res = FSUSER_OpenArchive(&sdmc_archive, ARCHIVE_SDMC, fsMakePath(PATH_EMPTY,"")))) return res;

    // Create the locale directory if it doesn't exist
    // XXX This probably doesn't work if more than one path in the hierarchy DNE
    util_ensure_dir(&sdmc_archive, locale_dir);
    free(locale_dir);

    char* locale_path = locale_path_for_title(titleId);
    FS_Path* fs_path = util_make_path_utf8(locale_path);

    // Make sure all directories exist
    int pathlen = strlen(locale_path);
    int last_virgule = pathlen;

    // Find the deepest directory
    for (int i = 0; i < pathlen; i++)
        if (locale_path[i] == '/')
            last_virgule = i;

    char* deepest_path = (char*) malloc(sizeof(char) * (last_virgule + 1));
    strncpy(deepest_path, locale_path, last_virgule);
    deepest_path[last_virgule] = '\0';
    util_ensure_dir(&sdmc_archive, deepest_path);

    Handle handle;
    // If this fails, probably means locale directory does not exist
    // TODO create locale directory if not exist
    if(R_SUCCEEDED(
        FSUSER_OpenFileDirectly(&handle, ARCHIVE_SDMC, fsMakePath(PATH_EMPTY,""), *fs_path, FS_OPEN_WRITE | FS_OPEN_CREATE, 0)
    )) {
        char* buffer = (char*) calloc(8, sizeof(char)); // ex: "JPN JP\0"
        snprintf(buffer, 7, "%s %s\n", region_to_string(locale->region),
                language_to_string(locale->language));
        buffer[7] = '\0';

        u32 bytes_written;
        FSFILE_Write(handle, &bytes_written, 0, buffer, 6, FS_WRITE_FLUSH);
        FSFILE_Close(handle);

        util_free_path_utf8(fs_path);
        FSUSER_CloseArchive(sdmc_archive);

        return bytes_written == 6 ? true : -1;
    }
    return -1;
}

Result set_region_and_language_for_title(u64 titleId, Region region, Language language) {
    Locale* locale = locale_for_title(titleId);

    Result result;

    // Default to system region/language
    if (region == RGN_NONE) {
        result = CFGU_SecureInfoGetRegion((u8*)&region);
        if (region <= RGN_NONE || region >= RGN_MAX)
            return result;
    }
    if (language == LNG_NONE) {
        result = CFGU_GetSystemLanguage((u8*)&language);
        if (language <= LNG_NONE || language >= LNG_MAX)
            return result;
    }

    locale->region = region;
    locale->language = language;
    return _set_locale_for_title(titleId, locale);
}

Result set_region_for_title(u64 titleId, Region region) {
    Locale* locale = locale_for_title(titleId);

    return set_region_and_language_for_title(titleId, region, locale->language);
}

Result set_language_for_title(u64 titleId, Language language) {
    Locale* locale = locale_for_title(titleId);

    return set_region_and_language_for_title(titleId, locale->region, language);
}


================================================
FILE: source/locale.h
================================================
#pragma once
#include <3ds/services/cfgu.h>

// These align with CFG_Region in 3ds/services/cfgu.h
typedef enum {
    JPN = 0,
    USA,
    EUR,
    AUS,
    CHN,
    KOR,
    TWN,
    RGN_MAX,
    RGN_NONE = -1
} Region;

// These align with CFG_Language in 3ds/services/cfgu.h
typedef enum {
    JP = 0,
    EN,
    FR,
    DE,
    IT,
    ES,
    ZH,
    KO,
    NL,
    PT,
    RU,
    TW,
    LNG_MAX,
    LNG_NONE = -1
} Language;

typedef struct {
    u64 title_id;
    char* title_id_str;
    Region region;
    Language language;
} Locale;

Region region_from_string(char* string);
Language language_from_string(char* string);
const char* region_to_string(Region region);
const char* language_to_string(Language language);

char* locale_path_for_title(u64 titleId);
Locale* locale_for_title(u64 titleId);
Region region_for_title(u64 titleId);
Language language_for_title(u64 titleId);

Result set_region_and_language_for_title(u64 titleId, Region region, Language language);
Result set_region_for_title(u64 titleId, Region region);
Result set_language_for_title(u64 titleId, Language language);


================================================
FILE: source/main.c
================================================
#include <malloc.h>

#include <3ds.h>

#include "core/screen.h"
#include "core/util.h"
#include "ui/mainmenu.h"
#include "ui/section/task/task.h"

static void* soc_buffer;

void cleanup() {
    task_quit_all();

    ui_exit();
    screen_exit();

    socExit();
    if(soc_buffer != NULL) {
        free(soc_buffer);
        soc_buffer = NULL;
    }

    amExit();
    httpcExit();
    ptmuExit();
    acExit();
    cfguExit();
    romfsExit();
    gfxExit();
}

int main(int argc, const char* argv[]) {
    gfxInitDefault();

    Result setCpuTimeRes = APT_SetAppCpuTimeLimit(30);

    if(R_FAILED(setCpuTimeRes)) {
        util_panic("Failed to set syscore CPU time limit: %08lX", setCpuTimeRes);
        return 1;
    }

    romfsInit();
    cfguInit();
    acInit();
    ptmuInit();
    httpcInit(0);

    amInit();
    AM_InitializeExternalTitleDatabase(false);

    soc_buffer = memalign(0x1000, 0x100000);
    if(soc_buffer != NULL) {
        socInit(soc_buffer, 0x100000);
    }

    screen_init();
    ui_init();

    mainmenu_open();

    while(aptMainLoop() && ui_update());

    cleanup();
    return 0;
}


================================================
FILE: source/stb_image/stb_image.c
================================================
#define STB_IMAGE_IMPLEMENTATION
#include "stb_image.h"

================================================
FILE: source/stb_image/stb_image.h
================================================
/* stb_image - v2.14 - public domain image loader - http://nothings.org/stb_image.h
                                     no warranty implied; use at your own risk

   Do this:
      #define STB_IMAGE_IMPLEMENTATION
   before you include this file in *one* C or C++ file to create the implementation.

   // i.e. it should look like this:
   #include ...
   #include ...
   #include ...
   #define STB_IMAGE_IMPLEMENTATION
   #include "stb_image.h"

   You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
   And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free


   QUICK NOTES:
      Primarily of interest to game developers and other people who can
          avoid problematic images and only need the trivial interface

      JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
      PNG 1/2/4/8/16-bit-per-channel

      TGA (not sure what subset, if a subset)
      BMP non-1bpp, non-RLE
      PSD (composited view only, no extra channels, 8/16 bit-per-channel)

      GIF (*comp always reports as 4-channel)
      HDR (radiance rgbE format)
      PIC (Softimage PIC)
      PNM (PPM and PGM binary only)

      Animated GIF still needs a proper API, but here's one way to do it:
          http://gist.github.com/urraka/685d9a6340b26b830d49

      - decode from memory or through FILE (define STBI_NO_STDIO to remove code)
      - decode from arbitrary I/O callbacks
      - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)

   Full documentation under "DOCUMENTATION" below.


LICENSE

  See end of file for license information.

RECENT REVISION HISTORY:

      2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
      2.13  (2016-12-04) experimental 16-bit API, only for PNG so far; fixes
      2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
      2.11  (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
                         RGB-format JPEG; remove white matting in PSD;
                         allocate large structures on the stack; 
                         correct channel count for PNG & BMP
      2.10  (2016-01-22) avoid warning introduced in 2.09
      2.09  (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED
      2.08  (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
      2.07  (2015-09-13) partial animated GIF support
                         limited 16-bit PSD support
                         minor bugs, code cleanup, and compiler warnings

   See end of file for full revision history.


 ============================    Contributors    =========================

 Image formats                          Extensions, features
    Sean Barrett (jpeg, png, bmp)          Jetro Lauha (stbi_info)
    Nicolas Schulz (hdr, psd)              Martin "SpartanJ" Golini (stbi_info)
    Jonathan Dummer (tga)                  James "moose2000" Brown (iPhone PNG)
    Jean-Marc Lienher (gif)                Ben "Disch" Wenger (io callbacks)
    Tom Seddon (pic)                       Omar Cornut (1/2/4-bit PNG)
    Thatcher Ulrich (psd)                  Nicolas Guillemot (vertical flip)
    Ken Miller (pgm, ppm)                  Richard Mitton (16-bit PSD)
    github:urraka (animated gif)           Junggon Kim (PNM comments)
                                           Daniel Gibson (16-bit TGA)
                                           socks-the-fox (16-bit TGA)
                                           Jeremy Sawicki (handle all ImageNet JPGs)
 Optimizations & bugfixes
    Fabian "ryg" Giesen
    Arseny Kapoulkine

 Bug & warning fixes
    Marc LeBlanc            David Woo          Guillaume George   Martins Mozeiko
    Christpher Lloyd        Martin Golini      Jerry Jansson      Joseph Thomson
    Dave Moore              Roy Eltham         Hayaki Saito       Phil Jordan
    Won Chun                Luke Graham        Johan Duparc       Nathan Reed
    the Horde3D community   Thomas Ruf         Ronny Chevalier    Nick Verigakis
    Janez Zemva             John Bartholomew   Michal Cichon      github:svdijk
    Jonathan Blow           Ken Hamada         Tero Hanninen      Baldur Karlsson
    Laurent Gomila          Cort Stratton      Sergio Gonzalez    github:romigrou
    Aruelien Pocheville     Thibault Reuille   Cass Everitt       Matthew Gregan
    Ryamond Barbiero        Paul Du Bois       Engin Manap        github:snagar
    Michaelangel007@github  Oriol Ferrer Mesia Dale Weiler        github:Zelex
    Philipp Wiesemann       Josh Tobin         github:rlyeh       github:grim210@github
    Blazej Dariusz Roszkowski                  github:sammyhw

*/

#ifndef STBI_INCLUDE_STB_IMAGE_H
#define STBI_INCLUDE_STB_IMAGE_H

// DOCUMENTATION
//
// Limitations:
//    - no 16-bit-per-channel PNG
//    - no 12-bit-per-channel JPEG
//    - no JPEGs with arithmetic coding
//    - no 1-bit BMP
//    - GIF always returns *comp=4
//
// Basic usage (see HDR discussion below for HDR usage):
//    int x,y,n;
//    unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
//    // ... process data if not NULL ...
//    // ... x = width, y = height, n = # 8-bit components per pixel ...
//    // ... replace '0' with '1'..'4' to force that many components per pixel
//    // ... but 'n' will always be the number that it would have been if you said 0
//    stbi_image_free(data)
//
// Standard parameters:
//    int *x                 -- outputs image width in pixels
//    int *y                 -- outputs image height in pixels
//    int *channels_in_file  -- outputs # of image components in image file
//    int desired_channels   -- if non-zero, # of image components requested in result
//
// The return value from an image loader is an 'unsigned char *' which points
// to the pixel data, or NULL on an allocation failure or if the image is
// corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
// with each pixel consisting of N interleaved 8-bit components; the first
// pixel pointed to is top-left-most in the image. There is no padding between
// image scanlines or between pixels, regardless of format. The number of
// components N is 'req_comp' if req_comp is non-zero, or *comp otherwise.
// If req_comp is non-zero, *comp has the number of components that _would_
// have been output otherwise. E.g. if you set req_comp to 4, you will always
// get RGBA output, but you can check *comp to see if it's trivially opaque
// because e.g. there were only 3 channels in the source image.
//
// An output image with N components has the following components interleaved
// in this order in each pixel:
//
//     N=#comp     components
//       1           grey
//       2           grey, alpha
//       3           red, green, blue
//       4           red, green, blue, alpha
//
// If image loading fails for any reason, the return value will be NULL,
// and *x, *y, *comp will be unchanged. The function stbi_failure_reason()
// can be queried for an extremely brief, end-user unfriendly explanation
// of why the load failed. Define STBI_NO_FAILURE_STRINGS to avoid
// compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
// more user-friendly ones.
//
// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
//
// ===========================================================================
//
// Philosophy
//
// stb libraries are designed with the following priorities:
//
//    1. easy to use
//    2. easy to maintain
//    3. good performance
//
// Sometimes I let "good performance" creep up in priority over "easy to maintain",
// and for best performance I may provide less-easy-to-use APIs that give higher
// performance, in addition to the easy to use ones. Nevertheless, it's important
// to keep in mind that from the standpoint of you, a client of this library,
// all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all.
//
// Some secondary priorities arise directly from the first two, some of which
// make more explicit reasons why performance can't be emphasized.
//
//    - Portable ("ease of use")
//    - Small source code footprint ("easy to maintain")
//    - No dependencies ("ease of use")
//
// ===========================================================================
//
// I/O callbacks
//
// I/O callbacks allow you to read from arbitrary sources, like packaged
// files or some other source. Data read from callbacks are processed
// through a small internal buffer (currently 128 bytes) to try to reduce
// overhead.
//
// The three functions you must define are "read" (reads some bytes of data),
// "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
//
// ===========================================================================
//
// SIMD support
//
// The JPEG decoder will try to automatically use SIMD kernels on x86 when
// supported by the compiler. For ARM Neon support, you must explicitly
// request it.
//
// (The old do-it-yourself SIMD API is no longer supported in the current
// code.)
//
// On x86, SSE2 will automatically be used when available based on a run-time
// test; if not, the generic C versions are used as a fall-back. On ARM targets,
// the typical path is to have separate builds for NEON and non-NEON devices
// (at least this is true for iOS and Android). Therefore, the NEON support is
// toggled by a build flag: define STBI_NEON to get NEON loops.
//
// If for some reason you do not want to use any of SIMD code, or if
// you have issues compiling it, you can disable it entirely by
// defining STBI_NO_SIMD.
//
// ===========================================================================
//
// HDR image support   (disable by defining STBI_NO_HDR)
//
// stb_image now supports loading HDR images in general, and currently
// the Radiance .HDR file format, although the support is provided
// generically. You can still load any file through the existing interface;
// if you attempt to load an HDR file, it will be automatically remapped to
// LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
// both of these constants can be reconfigured through this interface:
//
//     stbi_hdr_to_ldr_gamma(2.2f);
//     stbi_hdr_to_ldr_scale(1.0f);
//
// (note, do not use _inverse_ constants; stbi_image will invert them
// appropriately).
//
// Additionally, there is a new, parallel interface for loading files as
// (linear) floats to preserve the full dynamic range:
//
//    float *data = stbi_loadf(filename, &x, &y, &n, 0);
//
// If you load LDR images through this interface, those images will
// be promoted to floating point values, run through the inverse of
// constants corresponding to the above:
//
//     stbi_ldr_to_hdr_scale(1.0f);
//     stbi_ldr_to_hdr_gamma(2.2f);
//
// Finally, given a filename (or an open file or memory block--see header
// file for details) containing image data, you can query for the "most
// appropriate" interface to use (that is, whether the image is HDR or
// not), using:
//
//     stbi_is_hdr(char *filename);
//
// ===========================================================================
//
// iPhone PNG support:
//
// By default we convert iphone-formatted PNGs back to RGB, even though
// they are internally encoded differently. You can disable this conversion
// by by calling stbi_convert_iphone_png_to_rgb(0), in which case
// you will always just get the native iphone "format" through (which
// is BGR stored in RGB).
//
// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
// pixel to remove any premultiplied alpha *only* if the image file explicitly
// says there's premultiplied data (currently only happens in iPhone images,
// and only if iPhone convert-to-rgb processing is on).
//
// ===========================================================================
//
// ADDITIONAL CONFIGURATION
// 
//  - You can suppress implementation of any of the decoders to reduce
//    your code footprint by #defining one or more of the following
//    symbols before creating the implementation.
// 
//        STBI_NO_JPEG
//        STBI_NO_PNG
//        STBI_NO_BMP
//        STBI_NO_PSD
//        STBI_NO_TGA
//        STBI_NO_GIF
//        STBI_NO_HDR
//        STBI_NO_PIC
//        STBI_NO_PNM   (.ppm and .pgm)
// 
//  - You can request *only* certain decoders and suppress all other ones
//    (this will be more forward-compatible, as addition of new decoders
//    doesn't require you to disable them explicitly):
// 
//        STBI_ONLY_JPEG
//        STBI_ONLY_PNG
//        STBI_ONLY_BMP
//        STBI_ONLY_PSD
//        STBI_ONLY_TGA
//        STBI_ONLY_GIF
//        STBI_ONLY_HDR
//        STBI_ONLY_PIC
//        STBI_ONLY_PNM   (.ppm and .pgm)
// 
//   - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
//     want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
//


#ifndef STBI_NO_STDIO
#include <stdio.h>
#endif // STBI_NO_STDIO

#define STBI_VERSION 1

enum
{
   STBI_default = 0, // only used for req_comp

   STBI_grey       = 1,
   STBI_grey_alpha = 2,
   STBI_rgb        = 3,
   STBI_rgb_alpha  = 4
};

typedef unsigned char stbi_uc;
typedef unsigned short stbi_us;

#ifdef __cplusplus
extern "C" {
#endif

#ifdef STB_IMAGE_STATIC
#define STBIDEF static
#else
#define STBIDEF extern
#endif

//////////////////////////////////////////////////////////////////////////////
//
// PRIMARY API - works on images of any type
//

//
// load image by filename, open file, or memory buffer
//

typedef struct
{
   int      (*read)  (void *user,char *data,int size);   // fill 'data' with 'size' bytes.  return number of bytes actually read
   void     (*skip)  (void *user,int n);                 // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
   int      (*eof)   (void *user);                       // returns nonzero if we are at end of file/data
} stbi_io_callbacks;

////////////////////////////////////
//
// 8-bits-per-channel interface
//

STBIDEF stbi_uc *stbi_load               (char              const *filename,           int *x, int *y, int *channels_in_file, int desired_channels);
STBIDEF stbi_uc *stbi_load_from_memory   (stbi_uc           const *buffer, int len   , int *x, int *y, int *channels_in_file, int desired_channels);
STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk  , void *user, int *x, int *y, int *channels_in_file, int desired_channels);

#ifndef STBI_NO_STDIO
STBIDEF stbi_uc *stbi_load_from_file   (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
// for stbi_load_from_file, file pointer is left pointing immediately after image
#endif

////////////////////////////////////
//
// 16-bits-per-channel interface
//

STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
#ifndef STBI_NO_STDIO
STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
#endif
// @TODO the other variants

////////////////////////////////////
//
// float-per-channel interface
//
#ifndef STBI_NO_LINEAR
   STBIDEF float *stbi_loadf                 (char const *filename,           int *x, int *y, int *channels_in_file, int desired_channels);
   STBIDEF float *stbi_loadf_from_memory     (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
   STBIDEF float *stbi_loadf_from_callbacks  (stbi_io_callbacks const *clbk, void *user, int *x, int *y,  int *channels_in_file, int desired_channels);

   #ifndef STBI_NO_STDIO
   STBIDEF float *stbi_loadf_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
   #endif
#endif

#ifndef STBI_NO_HDR
   STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma);
   STBIDEF void   stbi_hdr_to_ldr_scale(float scale);
#endif // STBI_NO_HDR

#ifndef STBI_NO_LINEAR
   STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma);
   STBIDEF void   stbi_ldr_to_hdr_scale(float scale);
#endif // STBI_NO_LINEAR

// stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
STBIDEF int    stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user);
STBIDEF int    stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
#ifndef STBI_NO_STDIO
STBIDEF int      stbi_is_hdr          (char const *filename);
STBIDEF int      stbi_is_hdr_from_file(FILE *f);
#endif // STBI_NO_STDIO


// get a VERY brief reason for failure
// NOT THREADSAFE
STBIDEF const char *stbi_failure_reason  (void);

// free the loaded image -- this is just free()
STBIDEF void     stbi_image_free      (void *retval_from_stbi_load);

// get image dimensions & components without fully decoding
STBIDEF int      stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
STBIDEF int      stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp);

#ifndef STBI_NO_STDIO
STBIDEF int      stbi_info            (char const *filename,     int *x, int *y, int *comp);
STBIDEF int      stbi_info_from_file  (FILE *f,                  int *x, int *y, int *comp);

#endif



// for image formats that explicitly notate that they have premultiplied alpha,
// we just return the colors as stored in the file. set this flag to force
// unpremultiplication. results are undefined if the unpremultiply overflow.
STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);

// indicate whether we should process iphone images back to canonical format,
// or just pass them through "as-is"
STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);

// flip the image vertically, so the first pixel in the output array is the bottom left
STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);

// ZLIB client - used by PNG, available for other purposes

STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header);
STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
STBIDEF int   stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);

STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen);
STBIDEF int   stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);


#ifdef __cplusplus
}
#endif

//
//
////   end header file   /////////////////////////////////////////////////////
#endif // STBI_INCLUDE_STB_IMAGE_H

#ifdef STB_IMAGE_IMPLEMENTATION

#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
  || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
  || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
  || defined(STBI_ONLY_ZLIB)
   #ifndef STBI_ONLY_JPEG
   #define STBI_NO_JPEG
   #endif
   #ifndef STBI_ONLY_PNG
   #define STBI_NO_PNG
   #endif
   #ifndef STBI_ONLY_BMP
   #define STBI_NO_BMP
   #endif
   #ifndef STBI_ONLY_PSD
   #define STBI_NO_PSD
   #endif
   #ifndef STBI_ONLY_TGA
   #define STBI_NO_TGA
   #endif
   #ifndef STBI_ONLY_GIF
   #define STBI_NO_GIF
   #endif
   #ifndef STBI_ONLY_HDR
   #define STBI_NO_HDR
   #endif
   #ifndef STBI_ONLY_PIC
   #define STBI_NO_PIC
   #endif
   #ifndef STBI_ONLY_PNM
   #define STBI_NO_PNM
   #endif
#endif

#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
#define STBI_NO_ZLIB
#endif


#include <stdarg.h>
#include <stddef.h> // ptrdiff_t on osx
#include <stdlib.h>
#include <string.h>
#include <limits.h>

#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
#include <math.h>  // ldexp
#endif

#ifndef STBI_NO_STDIO
#include <stdio.h>
#endif

#ifndef STBI_ASSERT
#include <assert.h>
#define STBI_ASSERT(x) assert(x)
#endif


#ifndef _MSC_VER
   #ifdef __cplusplus
   #define stbi_inline inline
   #else
   #define stbi_inline
   #endif
#else
   #define stbi_inline __forceinline
#endif


#ifdef _MSC_VER
typedef unsigned short stbi__uint16;
typedef   signed short stbi__int16;
typedef unsigned int   stbi__uint32;
typedef   signed int   stbi__int32;
#else
#include <stdint.h>
typedef uint16_t stbi__uint16;
typedef int16_t  stbi__int16;
typedef uint32_t stbi__uint32;
typedef int32_t  stbi__int32;
#endif

// should produce compiler error if size is wrong
typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];

#ifdef _MSC_VER
#define STBI_NOTUSED(v)  (void)(v)
#else
#define STBI_NOTUSED(v)  (void)sizeof(v)
#endif

#ifdef _MSC_VER
#define STBI_HAS_LROTL
#endif

#ifdef STBI_HAS_LROTL
   #define stbi_lrot(x,y)  _lrotl(x,y)
#else
   #define stbi_lrot(x,y)  (((x) << (y)) | ((x) >> (32 - (y))))
#endif

#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
// ok
#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
// ok
#else
#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
#endif

#ifndef STBI_MALLOC
#define STBI_MALLOC(sz)           malloc(sz)
#define STBI_REALLOC(p,newsz)     realloc(p,newsz)
#define STBI_FREE(p)              free(p)
#endif

#ifndef STBI_REALLOC_SIZED
#define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz)
#endif

// x86/x64 detection
#if defined(__x86_64__) || defined(_M_X64)
#define STBI__X64_TARGET
#elif defined(__i386) || defined(_M_IX86)
#define STBI__X86_TARGET
#endif

#if defined(__GNUC__) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET)) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
// NOTE: not clear do we actually need this for the 64-bit path?
// gcc doesn't support sse2 intrinsics unless you compile with -msse2,
// (but compiling with -msse2 allows the compiler to use SSE2 everywhere;
// this is just broken and gcc are jerks for not fixing it properly
// http://www.virtualdub.org/blog/pivot/entry.php?id=363 )
#define STBI_NO_SIMD
#endif

#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
//
// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
// simultaneously enabling "-mstackrealign".
//
// See https://github.com/nothings/stb/issues/81 for more information.
//
// So default to no SSE2 on 32-bit MinGW. If you've read this far and added
// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
#define STBI_NO_SIMD
#endif

#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
#define STBI_SSE2
#include <emmintrin.h>

#ifdef _MSC_VER

#if _MSC_VER >= 1400  // not VC6
#include <intrin.h> // __cpuid
static int stbi__cpuid3(void)
{
   int info[4];
   __cpuid(info,1);
   return info[3];
}
#else
static int stbi__cpuid3(void)
{
   int res;
   __asm {
      mov  eax,1
      cpuid
      mov  res,edx
   }
   return res;
}
#endif

#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name

static int stbi__sse2_available()
{
   int info3 = stbi__cpuid3();
   return ((info3 >> 26) & 1) != 0;
}
#else // assume GCC-style if not VC++
#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))

static int stbi__sse2_available()
{
#if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 // GCC 4.8 or later
   // GCC 4.8+ has a nice way to do this
   return __builtin_cpu_supports("sse2");
#else
   // portable way to do this, preferably without using GCC inline ASM?
   // just bail for now.
   return 0;
#endif
}
#endif
#endif

// ARM NEON
#if defined(STBI_NO_SIMD) && defined(STBI_NEON)
#undef STBI_NEON
#endif

#ifdef STBI_NEON
#include <arm_neon.h>
// assume GCC or Clang on ARM targets
#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
#endif

#ifndef STBI_SIMD_ALIGN
#define STBI_SIMD_ALIGN(type, name) type name
#endif

///////////////////////////////////////////////
//
//  stbi__context struct and start_xxx functions

// stbi__context structure is our basic context used by all images, so it
// contains all the IO context, plus some basic image information
typedef struct
{
   stbi__uint32 img_x, img_y;
   int img_n, img_out_n;

   stbi_io_callbacks io;
   void *io_user_data;

   int read_from_callbacks;
   int buflen;
   stbi_uc buffer_start[128];

   stbi_uc *img_buffer, *img_buffer_end;
   stbi_uc *img_buffer_original, *img_buffer_original_end;
} stbi__context;


static void stbi__refill_buffer(stbi__context *s);

// initialize a memory-decode context
static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
{
   s->io.read = NULL;
   s->read_from_callbacks = 0;
   s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer;
   s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len;
}

// initialize a callback-based context
static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user)
{
   s->io = *c;
   s->io_user_data = user;
   s->buflen = sizeof(s->buffer_start);
   s->read_from_callbacks = 1;
   s->img_buffer_original = s->buffer_start;
   stbi__refill_buffer(s);
   s->img_buffer_original_end = s->img_buffer_end;
}

#ifndef STBI_NO_STDIO

static int stbi__stdio_read(void *user, char *data, int size)
{
   return (int) fread(data,1,size,(FILE*) user);
}

static void stbi__stdio_skip(void *user, int n)
{
   fseek((FILE*) user, n, SEEK_CUR);
}

static int stbi__stdio_eof(void *user)
{
   return feof((FILE*) user);
}

static stbi_io_callbacks stbi__stdio_callbacks =
{
   stbi__stdio_read,
   stbi__stdio_skip,
   stbi__stdio_eof,
};

static void stbi__start_file(stbi__context *s, FILE *f)
{
   stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f);
}

//static void stop_file(stbi__context *s) { }

#endif // !STBI_NO_STDIO

static void stbi__rewind(stbi__context *s)
{
   // conceptually rewind SHOULD rewind to the beginning of the stream,
   // but we just rewind to the beginning of the initial buffer, because
   // we only use it after doing 'test', which only ever looks at at most 92 bytes
   s->img_buffer = s->img_buffer_original;
   s->img_buffer_end = s->img_buffer_original_end;
}

enum
{
   STBI_ORDER_RGB,
   STBI_ORDER_BGR
};

typedef struct
{
   int bits_per_channel;
   int num_channels;
   int channel_order;
} stbi__result_info;

#ifndef STBI_NO_JPEG
static int      stbi__jpeg_test(stbi__context *s);
static void    *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
static int      stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
#endif

#ifndef STBI_NO_PNG
static int      stbi__png_test(stbi__context *s);
static void    *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
static int      stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
#endif

#ifndef STBI_NO_BMP
static int      stbi__bmp_test(stbi__context *s);
static void    *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
static int      stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
#endif

#ifndef STBI_NO_TGA
static int      stbi__tga_test(stbi__context *s);
static void    *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
static int      stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
#endif

#ifndef STBI_NO_PSD
static int      stbi__psd_test(stbi__context *s);
static void    *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc);
static int      stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
#endif

#ifndef STBI_NO_HDR
static int      stbi__hdr_test(stbi__context *s);
static float   *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
static int      stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
#endif

#ifndef STBI_NO_PIC
static int      stbi__pic_test(stbi__context *s);
static void    *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
static int      stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
#endif

#ifndef STBI_NO_GIF
static int      stbi__gif_test(stbi__context *s);
static void    *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
static int      stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
#endif

#ifndef STBI_NO_PNM
static int      stbi__pnm_test(stbi__context *s);
static void    *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
#endif

// this is not threadsafe
static const char *stbi__g_failure_reason;

STBIDEF const char *stbi_failure_reason(void)
{
   return stbi__g_failure_reason;
}

static int stbi__err(const char *str)
{
   stbi__g_failure_reason = str;
   return 0;
}

static void *stbi__malloc(size_t size)
{
    return STBI_MALLOC(size);
}

// stb_image uses ints pervasively, including for offset calculations.
// therefore the largest decoded image size we can support with the
// current code, even on 64-bit targets, is INT_MAX. this is not a
// significant limitation for the intended use case.
//
// we do, however, need to make sure our size calculations don't
// overflow. hence a few helper functions for size calculations that
// multiply integers together, making sure that they're non-negative
// and no overflow occurs.

// return 1 if the sum is valid, 0 on overflow.
// negative terms are considered invalid.
static int stbi__addsizes_valid(int a, int b)
{
   if (b < 0) return 0;
   // now 0 <= b <= INT_MAX, hence also
   // 0 <= INT_MAX - b <= INTMAX.
   // And "a + b <= INT_MAX" (which might overflow) is the
   // same as a <= INT_MAX - b (no overflow)
   return a <= INT_MAX - b;
}

// returns 1 if the product is valid, 0 on overflow.
// negative factors are considered invalid.
static int stbi__mul2sizes_valid(int a, int b)
{
   if (a < 0 || b < 0) return 0;
   if (b == 0) return 1; // mul-by-0 is always safe
   // portable way to check for no overflows in a*b
   return a <= INT_MAX/b;
}

// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
static int stbi__mad2sizes_valid(int a, int b, int add)
{
   return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add);
}

// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
static int stbi__mad3sizes_valid(int a, int b, int c, int add)
{
   return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
      stbi__addsizes_valid(a*b*c, add);
}

// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
{
   return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
      stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add);
}

// mallocs with size overflow checking
static void *stbi__malloc_mad2(int a, int b, int add)
{
   if (!stbi__mad2sizes_valid(a, b, add)) return NULL;
   return stbi__malloc(a*b + add);
}

static void *stbi__malloc_mad3(int a, int b, int c, int add)
{
   if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL;
   return stbi__malloc(a*b*c + add);
}

static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
{
   if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
   return stbi__malloc(a*b*c*d + add);
}

// stbi__err - error
// stbi__errpf - error returning pointer to float
// stbi__errpuc - error returning pointer to unsigned char

#ifdef STBI_NO_FAILURE_STRINGS
   #define stbi__err(x,y)  0
#elif defined(STBI_FAILURE_USERMSG)
   #define stbi__err(x,y)  stbi__err(y)
#else
   #define stbi__err(x,y)  stbi__err(x)
#endif

#define stbi__errpf(x,y)   ((float *)(size_t) (stbi__err(x,y)?NULL:NULL))
#define stbi__errpuc(x,y)  ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL))

STBIDEF void stbi_image_free(void *retval_from_stbi_load)
{
   STBI_FREE(retval_from_stbi_load);
}

#ifndef STBI_NO_LINEAR
static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
#endif

#ifndef STBI_NO_HDR
static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp);
#endif

static int stbi__vertically_flip_on_load = 0;

STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
{
    stbi__vertically_flip_on_load = flag_true_if_should_flip;
}

static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
{
   memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
   ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed
   ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
   ri->num_channels = 0;

   #ifndef STBI_NO_JPEG
   if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
   #endif
   #ifndef STBI_NO_PNG
   if (stbi__png_test(s))  return stbi__png_load(s,x,y,comp,req_comp, ri);
   #endif
   #ifndef STBI_NO_BMP
   if (stbi__bmp_test(s))  return stbi__bmp_load(s,x,y,comp,req_comp, ri);
   #endif
   #ifndef STBI_NO_GIF
   if (stbi__gif_test(s))  return stbi__gif_load(s,x,y,comp,req_comp, ri);
   #endif
   #ifndef STBI_NO_PSD
   if (stbi__psd_test(s))  return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc);
   #endif
   #ifndef STBI_NO_PIC
   if (stbi__pic_test(s))  return stbi__pic_load(s,x,y,comp,req_comp, ri);
   #endif
   #ifndef STBI_NO_PNM
   if (stbi__pnm_test(s))  return stbi__pnm_load(s,x,y,comp,req_comp, ri);
   #endif

   #ifndef STBI_NO_HDR
   if (stbi__hdr_test(s)) {
      float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri);
      return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
   }
   #endif

   #ifndef STBI_NO_TGA
   // test tga last because it's a crappy test!
   if (stbi__tga_test(s))
      return stbi__tga_load(s,x,y,comp,req_comp, ri);
   #endif

   return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
}

static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels)
{
   int i;
   int img_len = w * h * channels;
   stbi_uc *reduced;

   reduced = (stbi_uc *) stbi__malloc(img_len);
   if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory");

   for (i = 0; i < img_len; ++i)
      reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling

   STBI_FREE(orig);
   return reduced;
}

static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels)
{
   int i;
   int img_len = w * h * channels;
   stbi__uint16 *enlarged;

   enlarged = (stbi__uint16 *) stbi__malloc(img_len*2);
   if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");

   for (i = 0; i < img_len; ++i)
      enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff

   STBI_FREE(orig);
   return enlarged;
}

static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
{
   stbi__result_info ri;
   void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);

   if (result == NULL)
      return NULL;

   if (ri.bits_per_channel != 8) {
      STBI_ASSERT(ri.bits_per_channel == 16);
      result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
      ri.bits_per_channel = 8;
   }

   // @TODO: move stbi__convert_format to here

   if (stbi__vertically_flip_on_load) {
      int w = *x, h = *y;
      int channels = req_comp ? req_comp : *comp;
      int row,col,z;
      stbi_uc *image = (stbi_uc *) result;

      // @OPTIMIZE: use a bigger temp buffer and memcpy multiple pixels at once
      for (row = 0; row < (h>>1); row++) {
         for (col = 0; col < w; col++) {
            for (z = 0; z < channels; z++) {
               stbi_uc temp = image[(row * w + col) * channels + z];
               image[(row * w + col) * channels + z] = image[((h - row - 1) * w + col) * channels + z];
               image[((h - row - 1) * w + col) * channels + z] = temp;
            }
         }
      }
   }

   return (unsigned char *) result;
}

static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
{
   stbi__result_info ri;
   void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);

   if (result == NULL)
      return NULL;

   if (ri.bits_per_channel != 16) {
      STBI_ASSERT(ri.bits_per_channel == 8);
      result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
      ri.bits_per_channel = 16;
   }

   // @TODO: move stbi__convert_format16 to here
   // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision

   if (stbi__vertically_flip_on_load) {
      int w = *x, h = *y;
      int channels = req_comp ? req_comp : *comp;
      int row,col,z;
      stbi__uint16 *image = (stbi__uint16 *) result;

      // @OPTIMIZE: use a bigger temp buffer and memcpy multiple pixels at once
      for (row = 0; row < (h>>1); row++) {
         for (col = 0; col < w; col++) {
            for (z = 0; z < channels; z++) {
               stbi__uint16 temp = image[(row * w + col) * channels + z];
               image[(row * w + col) * channels + z] = image[((h - row - 1) * w + col) * channels + z];
               image[((h - row - 1) * w + col) * channels + z] = temp;
            }
         }
      }
   }

   return (stbi__uint16 *) result;
}

#ifndef STBI_NO_HDR
static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp)
{
   if (stbi__vertically_flip_on_load && result != NULL) {
      int w = *x, h = *y;
      int depth = req_comp ? req_comp : *comp;
      int row,col,z;
      float temp;

      // @OPTIMIZE: use a bigger temp buffer and memcpy multiple pixels at once
      for (row = 0; row < (h>>1); row++) {
         for (col = 0; col < w; col++) {
            for (z = 0; z < depth; z++) {
               temp = result[(row * w + col) * depth + z];
               result[(row * w + col) * depth + z] = result[((h - row - 1) * w + col) * depth + z];
               result[((h - row - 1) * w + col) * depth + z] = temp;
            }
         }
      }
   }
}
#endif

#ifndef STBI_NO_STDIO

static FILE *stbi__fopen(char const *filename, char const *mode)
{
   FILE *f;
#if defined(_MSC_VER) && _MSC_VER >= 1400
   if (0 != fopen_s(&f, filename, mode))
      f=0;
#else
   f = fopen(filename, mode);
#endif
   return f;
}


STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
{
   FILE *f = stbi__fopen(filename, "rb");
   unsigned char *result;
   if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
   result = stbi_load_from_file(f,x,y,comp,req_comp);
   fclose(f);
   return result;
}

STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
{
   unsigned char *result;
   stbi__context s;
   stbi__start_file(&s,f);
   result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
   if (result) {
      // need to 'unget' all the characters in the IO buffer
      fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
   }
   return result;
}

STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp)
{
   stbi__uint16 *result;
   stbi__context s;
   stbi__start_file(&s,f);
   result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp);
   if (result) {
      // need to 'unget' all the characters in the IO buffer
      fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
   }
   return result;
}

STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp)
{
   FILE *f = stbi__fopen(filename, "rb");
   stbi__uint16 *result;
   if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file");
   result = stbi_load_from_file_16(f,x,y,comp,req_comp);
   fclose(f);
   return result;
}


#endif //!STBI_NO_STDIO

STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
{
   stbi__context s;
   stbi__start_mem(&s,buffer,len);
   return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
}

STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
{
   stbi__context s;
   stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
   return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
}

#ifndef STBI_NO_LINEAR
static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
{
   unsigned char *data;
   #ifndef STBI_NO_HDR
   if (stbi__hdr_test(s)) {
      stbi__result_info ri;
      float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri);
      if (hdr_data)
         stbi__float_postprocess(hdr_data,x,y,comp,req_comp);
      return hdr_data;
   }
   #endif
   data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
   if (data)
      return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
   return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
}

STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
{
   stbi__context s;
   stbi__start_mem(&s,buffer,len);
   return stbi__loadf_main(&s,x,y,comp,req_comp);
}

STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
{
   stbi__context s;
   stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
   return stbi__loadf_main(&s,x,y,comp,req_comp);
}

#ifndef STBI_NO_STDIO
STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
{
   float *result;
   FILE *f = stbi__fopen(filename, "rb");
   if (!f) return stbi__errpf("can't fopen", "Unable to open file");
   result = stbi_loadf_from_file(f,x,y,comp,req_comp);
   fclose(f);
   return result;
}

STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
{
   stbi__context s;
   stbi__start_file(&s,f);
   return stbi__loadf_main(&s,x,y,comp,req_comp);
}
#endif // !STBI_NO_STDIO

#endif // !STBI_NO_LINEAR

// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
// reports false!

STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
{
   #ifndef STBI_NO_HDR
   stbi__context s;
   stbi__start_mem(&s,buffer,len);
   return stbi__hdr_test(&s);
   #else
   STBI_NOTUSED(buffer);
   STBI_NOTUSED(len);
   return 0;
   #endif
}

#ifndef STBI_NO_STDIO
STBIDEF int      stbi_is_hdr          (char const *filename)
{
   FILE *f = stbi__fopen(filename, "rb");
   int result=0;
   if (f) {
      result = stbi_is_hdr_from_file(f);
      fclose(f);
   }
   return result;
}

STBIDEF int      stbi_is_hdr_from_file(FILE *f)
{
   #ifndef STBI_NO_HDR
   stbi__context s;
   stbi__start_file(&s,f);
   return stbi__hdr_test(&s);
   #else
   STBI_NOTUSED(f);
   return 0;
   #endif
}
#endif // !STBI_NO_STDIO

STBIDEF int      stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
{
   #ifndef STBI_NO_HDR
   stbi__context s;
   stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
   return stbi__hdr_test(&s);
   #else
   STBI_NOTUSED(clbk);
   STBI_NOTUSED(user);
   return 0;
   #endif
}

#ifndef STBI_NO_LINEAR
static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f;

STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
STBIDEF void   stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
#endif

static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f;

STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; }
STBIDEF void   stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; }


//////////////////////////////////////////////////////////////////////////////
//
// Common code used by all image loaders
//

enum
{
   STBI__SCAN_load=0,
   STBI__SCAN_type,
   STBI__SCAN_header
};

static void stbi__refill_buffer(stbi__context *s)
{
   int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen);
   if (n == 0) {
      // at end of file, treat same as if from memory, but need to handle case
      // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
      s->read_from_callbacks = 0;
      s->img_buffer = s->buffer_start;
      s->img_buffer_end = s->buffer_start+1;
      *s->img_buffer = 0;
   } else {
      s->img_buffer = s->buffer_start;
      s->img_buffer_end = s->buffer_start + n;
   }
}

stbi_inline static stbi_uc stbi__get8(stbi__context *s)
{
   if (s->img_buffer < s->img_buffer_end)
      return *s->img_buffer++;
   if (s->read_from_callbacks) {
      stbi__refill_buffer(s);
      return *s->img_buffer++;
   }
   return 0;
}

stbi_inline static int stbi__at_eof(stbi__context *s)
{
   if (s->io.read) {
      if (!(s->io.eof)(s->io_user_data)) return 0;
      // if feof() is true, check if buffer = end
      // special case: we've only got the special 0 character at the end
      if (s->read_from_callbacks == 0) return 1;
   }

   return s->img_buffer >= s->img_buffer_end;
}

static void stbi__skip(stbi__context *s, int n)
{
   if (n < 0) {
      s->img_buffer = s->img_buffer_end;
      return;
   }
   if (s->io.read) {
      int blen = (int) (s->img_buffer_end - s->img_buffer);
      if (blen < n) {
         s->img_buffer = s->img_buffer_end;
         (s->io.skip)(s->io_user_data, n - blen);
         return;
      }
   }
   s->img_buffer += n;
}

static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
{
   if (s->io.read) {
      int blen = (int) (s->img_buffer_end - s->img_buffer);
      if (blen < n) {
         int res, count;

         memcpy(buffer, s->img_buffer, blen);

         count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen);
         res = (count == (n-blen));
         s->img_buffer = s->img_buffer_end;
         return res;
      }
   }

   if (s->img_buffer+n <= s->img_buffer_end) {
      memcpy(buffer, s->img_buffer, n);
      s->img_buffer += n;
      return 1;
   } else
      return 0;
}

static int stbi__get16be(stbi__context *s)
{
   int z = stbi__get8(s);
   return (z << 8) + stbi__get8(s);
}

static stbi__uint32 stbi__get32be(stbi__context *s)
{
   stbi__uint32 z = stbi__get16be(s);
   return (z << 16) + stbi__get16be(s);
}

#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
// nothing
#else
static int stbi__get16le(stbi__context *s)
{
   int z = stbi__get8(s);
   return z + (stbi__get8(s) << 8);
}
#endif

#ifndef STBI_NO_BMP
static stbi__uint32 stbi__get32le(stbi__context *s)
{
   stbi__uint32 z = stbi__get16le(s);
   return z + (stbi__get16le(s) << 16);
}
#endif

#define STBI__BYTECAST(x)  ((stbi_uc) ((x) & 255))  // truncate int to byte without warnings


//////////////////////////////////////////////////////////////////////////////
//
//  generic converter from built-in img_n to req_comp
//    individual types do this automatically as much as possible (e.g. jpeg
//    does all cases internally since it needs to colorspace convert anyway,
//    and it never has alpha, so very few cases ). png can automatically
//    interleave an alpha=255 channel, but falls back to this for other cases
//
//  assume data buffer is malloced, so malloc a new one and free that one
//  only failure mode is malloc failing

static stbi_uc stbi__compute_y(int r, int g, int b)
{
   return (stbi_uc) (((r*77) + (g*150) +  (29*b)) >> 8);
}

static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
{
   int i,j;
   unsigned char *good;

   if (req_comp == img_n) return data;
   STBI_ASSERT(req_comp >= 1 && req_comp <= 4);

   good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0);
   if (good == NULL) {
      STBI_FREE(data);
      return stbi__errpuc("outofmem", "Out of memory");
   }

   for (j=0; j < (int) y; ++j) {
      unsigned char *src  = data + j * x * img_n   ;
      unsigned char *dest = good + j * x * req_comp;

      #define STBI__COMBO(a,b)  ((a)*8+(b))
      #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
      // convert source image with img_n components to one with req_comp components;
      // avoid switch per pixel, so use switch per scanline and massive macros
      switch (STBI__COMBO(img_n, req_comp)) {
         STBI__CASE(1,2) { dest[0]=src[0], dest[1]=255;                                     } break;
         STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
         STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=255;                     } break;
         STBI__CASE(2,1) { dest[0]=src[0];                                                  } break;
         STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
         STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1];                  } break;
         STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255;        } break;
         STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
         STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255;    } break;
         STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
         STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; } break;
         STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2];                    } break;
         default: STBI_ASSERT(0);
      }
      #undef STBI__CASE
   }

   STBI_FREE(data);
   return good;
}

static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
{
   return (stbi__uint16) (((r*77) + (g*150) +  (29*b)) >> 8);
}

static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y)
{
   int i,j;
   stbi__uint16 *good;

   if (req_comp == img_n) return data;
   STBI_ASSERT(req_comp >= 1 && req_comp <= 4);

   good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2);
   if (good == NULL) {
      STBI_FREE(data);
      return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
   }

   for (j=0; j < (int) y; ++j) {
      stbi__uint16 *src  = data + j * x * img_n   ;
      stbi__uint16 *dest = good + j * x * req_comp;

      #define STBI__COMBO(a,b)  ((a)*8+(b))
      #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
      // convert source image with img_n components to one with req_comp components;
      // avoid switch per pixel, so use switch per scanline and massive macros
      switch (STBI__COMBO(img_n, req_comp)) {
         STBI__CASE(1,2) { dest[0]=src[0], dest[1]=0xffff;                                     } break;
         STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
         STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=0xffff;                     } break;
         STBI__CASE(2,1) { dest[0]=src[0];                                                     } break;
         STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
         STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1];                     } break;
         STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=0xffff;        } break;
         STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
         STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = 0xffff; } break;
         STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
         STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = src[3]; } break;
         STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2];                       } break;
         default: STBI_ASSERT(0);
      }
      #undef STBI__CASE
   }

   STBI_FREE(data);
   return good;
}

#ifndef STBI_NO_LINEAR
static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
{
   int i,k,n;
   float *output;
   if (!data) return NULL;
   output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
   if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
   // compute number of non-alpha components
   if (comp & 1) n = comp; else n = comp-1;
   for (i=0; i < x*y; ++i) {
      for (k=0; k < n; ++k) {
         output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
      }
      if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f;
   }
   STBI_FREE(data);
   return output;
}
#endif

#ifndef STBI_NO_HDR
#define stbi__float2int(x)   ((int) (x))
static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp)
{
   int i,k,n;
   stbi_uc *output;
   if (!data) return NULL;
   output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0);
   if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
   // compute number of non-alpha components
   if (comp & 1) n = comp; else n = comp-1;
   for (i=0; i < x*y; ++i) {
      for (k=0; k < n; ++k) {
         float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
         if (z < 0) z = 0;
         if (z > 255) z = 255;
         output[i*comp + k] = (stbi_uc) stbi__float2int(z);
      }
      if (k < comp) {
         float z = data[i*comp+k] * 255 + 0.5f;
         if (z < 0) z = 0;
         if (z > 255) z = 255;
         output[i*comp + k] = (stbi_uc) stbi__float2int(z);
      }
   }
   STBI_FREE(data);
   return output;
}
#endif

//////////////////////////////////////////////////////////////////////////////
//
//  "baseline" JPEG/JFIF decoder
//
//    simple implementation
//      - doesn't support delayed output of y-dimension
//      - simple interface (only one output format: 8-bit interleaved RGB)
//      - doesn't try to recover corrupt jpegs
//      - doesn't allow partial loading, loading multiple at once
//      - still fast on x86 (copying globals into locals doesn't help x86)
//      - allocates lots of intermediate memory (full size of all components)
//        - non-interleaved case requires this anyway
//        - allows good upsampling (see next)
//    high-quality
//      - upsampled channels are bilinearly interpolated, even across blocks
//      - quality integer IDCT derived from IJG's 'slow'
//    performance
//      - fast huffman; reasonable integer IDCT
//      - some SIMD kernels for common paths on targets with SSE2/NEON
//      - uses a lot of intermediate memory, could cache poorly

#ifndef STBI_NO_JPEG

// huffman decoding acceleration
#define FAST_BITS   9  // larger handles more cases; smaller stomps less cache

typedef struct
{
   stbi_uc  fast[1 << FAST_BITS];
   // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
   stbi__uint16 code[256];
   stbi_uc  values[256];
   stbi_uc  size[257];
   unsigned int maxcode[18];
   int    delta[17];   // old 'firstsymbol' - old 'firstcode'
} stbi__huffman;

typedef struct
{
   stbi__context *s;
   stbi__huffman huff_dc[4];
   stbi__huffman huff_ac[4];
   stbi__uint16 dequant[4][64];
   stbi__int16 fast_ac[4][1 << FAST_BITS];

// sizes for components, interleaved MCUs
   int img_h_max, img_v_max;
   int img_mcu_x, img_mcu_y;
   int img_mcu_w, img_mcu_h;

// definition of jpeg image component
   struct
   {
      int id;
      int h,v;
      int tq;
      int hd,ha;
      int dc_pred;

      int x,y,w2,h2;
      stbi_uc *data;
      void *raw_data, *raw_coeff;
      stbi_uc *linebuf;
      short   *coeff;   // progressive only
      int      coeff_w, coeff_h; // number of 8x8 coefficient blocks
   } img_comp[4];

   stbi__uint32   code_buffer; // jpeg entropy-coded buffer
   int            code_bits;   // number of valid bits
   unsigned char  marker;      // marker seen while filling entropy buffer
   int            nomore;      // flag if we saw a marker so must stop

   int            progressive;
   int            spec_start;
   int            spec_end;
   int            succ_high;
   int            succ_low;
   int            eob_run;
   int            rgb;

   int scan_n, order[4];
   int restart_interval, todo;

// kernels
   void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
   void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step);
   stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs);
} stbi__jpeg;

static int stbi__build_huffman(stbi__huffman *h, int *count)
{
   int i,j,k=0,code;
   // build size list for each symbol (from JPEG spec)
   for (i=0; i < 16; ++i)
      for (j=0; j < count[i]; ++j)
         h->size[k++] = (stbi_uc) (i+1);
   h->size[k] = 0;

   // compute actual symbols (from jpeg spec)
   code = 0;
   k = 0;
   for(j=1; j <= 16; ++j) {
      // compute delta to add to code to compute symbol id
      h->delta[j] = k - code;
      if (h->size[k] == j) {
         while (h->size[k] == j)
            h->code[k++] = (stbi__uint16) (code++);
         if (code-1 >= (1 << j)) return stbi__err("bad code lengths","Corrupt JPEG");
      }
      // compute largest code + 1 for this size, preshifted as needed later
      h->maxcode[j] = code << (16-j);
      code <<= 1;
   }
   h->maxcode[j] = 0xffffffff;

   // build non-spec acceleration table; 255 is flag for not-accelerated
   memset(h->fast, 255, 1 << FAST_BITS);
   for (i=0; i < k; ++i) {
      int s = h->size[i];
      if (s <= FAST_BITS) {
         int c = h->code[i] << (FAST_BITS-s);
         int m = 1 << (FAST_BITS-s);
         for (j=0; j < m; ++j) {
            h->fast[c+j] = (stbi_uc) i;
         }
      }
   }
   return 1;
}

// build a table that decodes both magnitude and value of small ACs in
// one go.
static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
{
   int i;
   for (i=0; i < (1 << FAST_BITS); ++i) {
      stbi_uc fast = h->fast[i];
      fast_ac[i] = 0;
      if (fast < 255) {
         int rs = h->values[fast];
         int run = (rs >> 4) & 15;
         int magbits = rs & 15;
         int len = h->size[fast];

         if (magbits && len + magbits <= FAST_BITS) {
            // magnitude code followed by receive_extend code
            int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
            int m = 1 << (magbits - 1);
            if (k < m) k += (-1 << magbits) + 1;
            // if the result is small enough, we can fit it in fast_ac table
            if (k >= -128 && k <= 127)
               fast_ac[i] = (stbi__int16) ((k << 8) + (run << 4) + (len + magbits));
         }
      }
   }
}

static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
{
   do {
      int b = j->nomore ? 0 : stbi__get8(j->s);
      if (b == 0xff) {
         int c = stbi__get8(j->s);
         while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes
         if (c != 0) {
            j->marker = (unsigned char) c;
            j->nomore = 1;
            return;
         }
      }
      j->code_buffer |= b << (24 - j->code_bits);
      j->code_bits += 8;
   } while (j->code_bits <= 24);
}

// (1 << n) - 1
static stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};

// decode a jpeg huffman value from the bitstream
stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
{
   unsigned int temp;
   int c,k;

   if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);

   // look at the top FAST_BITS and determine what symbol ID it is,
   // if the code is <= FAST_BITS
   c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
   k = h->fast[c];
   if (k < 255) {
      int s = h->size[k];
      if (s > j->code_bits)
         return -1;
      j->code_buffer <<= s;
      j->code_bits -= s;
      return h->values[k];
   }

   // naive test is to shift the code_buffer down so k bits are
   // valid, then test against maxcode. To speed this up, we've
   // preshifted maxcode left so that it has (16-k) 0s at the
   // end; in other words, regardless of the number of bits, it
   // wants to be compared against something shifted to have 16;
   // that way we don't need to shift inside the loop.
   temp = j->code_buffer >> 16;
   for (k=FAST_BITS+1 ; ; ++k)
      if (temp < h->maxcode[k])
         break;
   if (k == 17) {
      // error! code not found
      j->code_bits -= 16;
      return -1;
   }

   if (k > j->code_bits)
      return -1;

   // convert the huffman code to the symbol id
   c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
   STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);

   // convert the id to a symbol
   j->code_bits -= k;
   j->code_buffer <<= k;
   return h->values[c];
}

// bias[n] = (-1<<n) + 1
static int const stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};

// combined JPEG 'receive' and JPEG 'extend', since baseline
// always extends everything it receives.
stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
{
   unsigned int k;
   int sgn;
   if (j->code_bits < n) stbi__grow_buffer_unsafe(j);

   sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB
   k = stbi_lrot(j->code_buffer, n);
   STBI_ASSERT(n >= 0 && n < (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask)));
   j->code_buffer = k & ~stbi__bmask[n];
   k &= stbi__bmask[n];
   j->code_bits -= n;
   return k + (stbi__jbias[n] & ~sgn);
}

// get some unsigned bits
stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
{
   unsigned int k;
   if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
   k = stbi_lrot(j->code_buffer, n);
   j->code_buffer = k & ~stbi__bmask[n];
   k &= stbi__bmask[n];
   j->code_bits -= n;
   return k;
}

stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
{
   unsigned int k;
   if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
   k = j->code_buffer;
   j->code_buffer <<= 1;
   --j->code_bits;
   return k & 0x80000000;
}

// given a value that's at position X in the zigzag stream,
// where does it appear in the 8x8 matrix coded as row-major?
static stbi_uc stbi__jpeg_dezigzag[64+15] =
{
    0,  1,  8, 16,  9,  2,  3, 10,
   17, 24, 32, 25, 18, 11,  4,  5,
   12, 19, 26, 33, 40, 48, 41, 34,
   27, 20, 13,  6,  7, 14, 21, 28,
   35, 42, 49, 56, 57, 50, 43, 36,
   29, 22, 15, 23, 30, 37, 44, 51,
   58, 59, 52, 45, 38, 31, 39, 46,
   53, 60, 61, 54, 47, 55, 62, 63,
   // let corrupt input sample past end
   63, 63, 63, 63, 63, 63, 63, 63,
   63, 63, 63, 63, 63, 63, 63
};

// decode one 64-entry block--
static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant)
{
   int diff,dc,k;
   int t;

   if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
   t = stbi__jpeg_huff_decode(j, hdc);
   if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG");

   // 0 all the ac values now so we can do it 32-bits at a time
   memset(data,0,64*sizeof(data[0]));

   diff = t ? stbi__extend_receive(j, t) : 0;
   dc = j->img_comp[b].dc_pred + diff;
   j->img_comp[b].dc_pred = dc;
   data[0] = (short) (dc * dequant[0]);

   // decode AC components, see JPEG spec
   k = 1;
   do {
      unsigned int zig;
      int c,r,s;
      if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
      c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
      r = fac[c];
      if (r) { // fast-AC path
         k += (r >> 4) & 15; // run
         s = r & 15; // combined length
         j->code_buffer <<= s;
         j->code_bits -= s;
         // decode into unzigzag'd location
         zig = stbi__jpeg_dezigzag[k++];
         data[zig] = (short) ((r >> 8) * dequant[zig]);
      } else {
         int rs = stbi__jpeg_huff_decode(j, hac);
         if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
         s = rs & 15;
         r = rs >> 4;
         if (s == 0) {
            if (rs != 0xf0) break; // end block
            k += 16;
         } else {
            k += r;
            // decode into unzigzag'd location
            zig = stbi__jpeg_dezigzag[k++];
            data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]);
         }
      }
   } while (k < 64);
   return 1;
}

static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
{
   int diff,dc;
   int t;
   if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");

   if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);

   if (j->succ_high == 0) {
      // first scan for DC coefficient, must be first
      memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
      t = stbi__jpeg_huff_decode(j, hdc);
      diff = t ? stbi__extend_receive(j, t) : 0;

      dc = j->img_comp[b].dc_pred + diff;
      j->img_comp[b].dc_pred = dc;
      data[0] = (short) (dc << j->succ_low);
   } else {
      // refinement scan for DC coefficient
      if (stbi__jpeg_get_bit(j))
         data[0] += (short) (1 << j->succ_low);
   }
   return 1;
}

// @OPTIMIZE: store non-zigzagged during the decode passes,
// and only de-zigzag when dequantizing
static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
{
   int k;
   if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");

   if (j->succ_high == 0) {
      int shift = j->succ_low;

      if (j->eob_run) {
         --j->eob_run;
         return 1;
      }

      k = j->spec_start;
      do {
         unsigned int zig;
         int c,r,s;
         if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
         c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
         r = fac[c];
         if (r) { // fast-AC path
            k += (r >> 4) & 15; // run
            s = r & 15; // combined length
            j->code_buffer <<= s;
            j->code_bits -= s;
            zig = stbi__jpeg_dezigzag[k++];
            data[zig] = (short) ((r >> 8) << shift);
         } else {
            int rs = stbi__jpeg_huff_decode(j, hac);
            if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
            s = rs & 15;
            r = rs >> 4;
            if (s == 0) {
               if (r < 15) {
                  j->eob_run = (1 << r);
                  if (r)
                     j->eob_run += stbi__jpeg_get_bits(j, r);
                  --j->eob_run;
                  break;
               }
               k += 16;
            } else {
               k += r;
               zig = stbi__jpeg_dezigzag[k++];
               data[zig] = (short) (stbi__extend_receive(j,s) << shift);
            }
         }
      } while (k <= j->spec_end);
   } else {
      // refinement scan for these AC coefficients

      short bit = (short) (1 << j->succ_low);

      if (j->eob_run) {
         --j->eob_run;
         for (k = j->spec_start; k <= j->spec_end; ++k) {
            short *p = &data[stbi__jpeg_dezigzag[k]];
            if (*p != 0)
               if (stbi__jpeg_get_bit(j))
                  if ((*p & bit)==0) {
                     if (*p > 0)
                        *p += bit;
                     else
                        *p -= bit;
                  }
         }
      } else {
         k = j->spec_start;
         do {
            int r,s;
            int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
            if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
            s = rs & 15;
            r = rs >> 4;
            if (s == 0) {
               if (r < 15) {
                  j->eob_run = (1 << r) - 1;
                  if (r)
                     j->eob_run += stbi__jpeg_get_bits(j, r);
                  r = 64; // force end of block
               } else {
                  // r=15 s=0 should write 16 0s, so we just do
                  // a run of 15 0s and then write s (which is 0),
                  // so we don't have to do anything special here
               }
            } else {
               if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
               // sign bit
               if (stbi__jpeg_get_bit(j))
                  s = bit;
               else
                  s = -bit;
            }

            // advance by r
            while (k <= j->spec_end) {
               short *p = &data[stbi__jpeg_dezigzag[k++]];
               if (*p != 0) {
                  if (stbi__jpeg_get_bit(j))
                     if ((*p & bit)==0) {
                        if (*p > 0)
                           *p += bit;
                        else
                           *p -= bit;
                     }
               } else {
                  if (r == 0) {
                     *p = (short) s;
                     break;
                  }
                  --r;
               }
            }
         } while (k <= j->spec_end);
      }
   }
   return 1;
}

// take a -128..127 value and stbi__clamp it and convert to 0..255
stbi_inline static stbi_uc stbi__clamp(int x)
{
   // trick to use a single test to catch both cases
   if ((unsigned int) x > 255) {
      if (x < 0) return 0;
      if (x > 255) return 255;
   }
   return (stbi_uc) x;
}

#define stbi__f2f(x)  ((int) (((x) * 4096 + 0.5)))
#define stbi__fsh(x)  ((x) << 12)

// derived from jidctint -- DCT_ISLOW
#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
   int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
   p2 = s2;                                    \
   p3 = s6;                                    \
   p1 = (p2+p3) * stbi__f2f(0.5411961f);       \
   t2 = p1 + p3*stbi__f2f(-1.847759065f);      \
   t3 = p1 + p2*stbi__f2f( 0.765366865f);      \
   p2 = s0;                                    \
   p3 = s4;                                    \
   t0 = stbi__fsh(p2+p3);                      \
   t1 = stbi__fsh(p2-p3);                      \
   x0 = t0+t3;                                 \
   x3 = t0-t3;                                 \
   x1 = t1+t2;                                 \
   x2 = t1-t2;                                 \
   t0 = s7;                                    \
   t1 = s5;                                    \
   t2 = s3;                                    \
   t3 = s1;                                    \
   p3 = t0+t2;                                 \
   p4 = t1+t3;                                 \
   p1 = t0+t3;                                 \
   p2 = t1+t2;                                 \
   p5 = (p3+p4)*stbi__f2f( 1.175875602f);      \
   t0 = t0*stbi__f2f( 0.298631336f);           \
   t1 = t1*stbi__f2f( 2.053119869f);           \
   t2 = t2*stbi__f2f( 3.072711026f);           \
   t3 = t3*stbi__f2f( 1.501321110f);           \
   p1 = p5 + p1*stbi__f2f(-0.899976223f);      \
   p2 = p5 + p2*stbi__f2f(-2.562915447f);      \
   p3 = p3*stbi__f2f(-1.961570560f);           \
   p4 = p4*stbi__f2f(-0.390180644f);           \
   t3 += p1+p4;                                \
   t2 += p2+p3;                                \
   t1 += p2+p4;                                \
   t0 += p1+p3;

static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
{
   int i,val[64],*v=val;
   stbi_uc *o;
   short *d = data;

   // columns
   for (i=0; i < 8; ++i,++d, ++v) {
      // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
      if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
           && d[40]==0 && d[48]==0 && d[56]==0) {
         //    no shortcut                 0     seconds
         //    (1|2|3|4|5|6|7)==0          0     seconds
         //    all separate               -0.047 seconds
         //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
         int dcterm = d[0] << 2;
         v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
      } else {
         STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56])
         // constants scaled things up by 1<<12; let's bring them back
         // down, but keep 2 extra bits of precision
         x0 += 512; x1 += 512; x2 += 512; x3 += 512;
         v[ 0] = (x0+t3) >> 10;
         v[56] = (x0-t3) >> 10;
         v[ 8] = (x1+t2) >> 10;
         v[48] = (x1-t2) >> 10;
         v[16] = (x2+t1) >> 10;
         v[40] = (x2-t1) >> 10;
         v[24] = (x3+t0) >> 10;
         v[32] = (x3-t0) >> 10;
      }
   }

   for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
      // no fast case since the first 1D IDCT spread components out
      STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
      // constants scaled things up by 1<<12, plus we had 1<<2 from first
      // loop, plus horizontal and vertical each scale by sqrt(8) so together
      // we've got an extra 1<<3, so 1<<17 total we need to remove.
      // so we want to round that, which means adding 0.5 * 1<<17,
      // aka 65536. Also, we'll end up with -128 to 127 that we want
      // to encode as 0..255 by adding 128, so we'll add that before the shift
      x0 += 65536 + (128<<17);
      x1 += 65536 + (128<<17);
      x2 += 65536 + (128<<17);
      x3 += 65536 + (128<<17);
      // tried computing the shifts into temps, or'ing the temps to see
      // if any were out of range, but that was slower
      o[0] = stbi__clamp((x0+t3) >> 17);
      o[7] = stbi__clamp((x0-t3) >> 17);
      o[1] = stbi__clamp((x1+t2) >> 17);
      o[6] = stbi__clamp((x1-t2) >> 17);
      o[2] = stbi__clamp((x2+t1) >> 17);
      o[5] = stbi__clamp((x2-t1) >> 17);
      o[3] = stbi__clamp((x3+t0) >> 17);
      o[4] = stbi__clamp((x3-t0) >> 17);
   }
}

#ifdef STBI_SSE2
// sse2 integer IDCT. not the fastest possible implementation but it
// produces bit-identical results to the generic C version so it's
// fully "transparent".
static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
{
   // This is constructed to match our regular (generic) integer IDCT exactly.
   __m128i row0, row1, row2, row3, row4, row5, row6, row7;
   __m128i tmp;

   // dot product constant: even elems=x, odd elems=y
   #define dct_const(x,y)  _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))

   // out(0) = c0[even]*x + c0[odd]*y   (c0, x, y 16-bit, out 32-bit)
   // out(1) = c1[even]*x + c1[odd]*y
   #define dct_rot(out0,out1, x,y,c0,c1) \
      __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
      __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
      __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
      __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
      __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
      __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)

   // out = in << 12  (in 16-bit, out 32-bit)
   #define dct_widen(out, in) \
      __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
      __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)

   // wide add
   #define dct_wadd(out, a, b) \
      __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
      __m128i out##_h = _mm_add_epi32(a##_h, b##_h)

   // wide sub
   #define dct_wsub(out, a, b) \
      __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
      __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)

   // butterfly a/b, add bias, then shift by "s" and pack
   #define dct_bfly32o(out0, out1, a,b,bias,s) \
      { \
         __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
         __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
         dct_wadd(sum, abiased, b); \
         dct_wsub(dif, abiased, b); \
         out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
         out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
      }

   // 8-bit interleave step (for transposes)
   #define dct_interleave8(a, b) \
      tmp = a; \
      a = _mm_unpacklo_epi8(a, b); \
      b = _mm_unpackhi_epi8(tmp, b)

   // 16-bit interleave step (for transposes)
   #define dct_interleave16(a, b) \
      tmp = a; \
      a = _mm_unpacklo_epi16(a, b); \
      b = _mm_unpackhi_epi16(tmp, b)

   #define dct_pass(bias,shift) \
      { \
         /* even part */ \
         dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
         __m128i sum04 = _mm_add_epi16(row0, row4); \
         __m128i dif04 = _mm_sub_epi16(row0, row4); \
         dct_widen(t0e, sum04); \
         dct_widen(t1e, dif04); \
         dct_wadd(x0, t0e, t3e); \
         dct_wsub(x3, t0e, t3e); \
         dct_wadd(x1, t1e, t2e); \
         dct_wsub(x2, t1e, t2e); \
         /* odd part */ \
         dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
         dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
         __m128i sum17 = _mm_add_epi16(row1, row7); \
         __m128i sum35 = _mm_add_epi16(row3, row5); \
         dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
         dct_wadd(x4, y0o, y4o); \
         dct_wadd(x5, y1o, y5o); \
         dct_wadd(x6, y2o, y5o); \
         dct_wadd(x7, y3o, y4o); \
         dct_bfly32o(row0,row7, x0,x7,bias,shift); \
         dct_bfly32o(row1,row6, x1,x6,bias,shift); \
         dct_bfly32o(row2,row5, x2,x5,bias,shift); \
         dct_bfly32o(row3,row4, x3,x4,bias,shift); \
      }

   __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
   __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f));
   __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
   __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
   __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f));
   __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f));
   __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f));
   __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f));

   // rounding biases in column/row passes, see stbi__idct_block for explanation.
   __m128i bias_0 = _mm_set1_epi32(512);
   __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17));

   // load
   row0 = _mm_load_si128((const __m128i *) (data + 0*8));
   row1 = _mm_load_si128((const __m128i *) (data + 1*8));
   row2 = _mm_load_si128((const __m128i *) (data + 2*8));
   row3 = _mm_load_si128((const __m128i *) (data + 3*8));
   row4 = _mm_load_si128((const __m128i *) (data + 4*8));
   row5 = _mm_load_si128((const __m128i *) (data + 5*8));
   row6 = _mm_load_si128((const __m128i *) (data + 6*8));
   row7 = _mm_load_si128((const __m128i *) (data + 7*8));

   // column pass
   dct_pass(bias_0, 10);

   {
      // 16bit 8x8 transpose pass 1
      dct_interleave16(row0, row4);
      dct_interleave16(row1, row5);
      dct_interleave16(row2, row6);
      dct_interleave16(row3, row7);

      // transpose pass 2
      dct_interleave16(row0, row2);
      dct_interleave16(row1, row3);
      dct_interleave16(row4, row6);
      dct_interleave16(row5, row7);

      // transpose pass 3
      dct_interleave16(row0, row1);
      dct_interleave16(row2, row3);
      dct_interleave16(row4, row5);
      dct_interleave16(row6, row7);
   }

   // row pass
   dct_pass(bias_1, 17);

   {
      // pack
      __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
      __m128i p1 = _mm_packus_epi16(row2, row3);
      __m128i p2 = _mm_packus_epi16(row4, row5);
      __m128i p3 = _mm_packus_epi16(row6, row7);

      // 8bit 8x8 transpose pass 1
      dct_interleave8(p0, p2); // a0e0a1e1...
      dct_interleave8(p1, p3); // c0g0c1g1...

      // transpose pass 2
      dct_interleave8(p0, p1); // a0c0e0g0...
      dct_interleave8(p2, p3); // b0d0f0h0...

      // transpose pass 3
      dct_interleave8(p0, p2); // a0b0c0d0...
      dct_interleave8(p1, p3); // a4b4c4d4...

      // store
      _mm_storel_epi64((__m128i *) out, p0); out += out_stride;
      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
      _mm_storel_epi64((__m128i *) out, p2); out += out_stride;
      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
      _mm_storel_epi64((__m128i *) out, p1); out += out_stride;
      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
      _mm_storel_epi64((__m128i *) out, p3); out += out_stride;
      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e));
   }

#undef dct_const
#undef dct_rot
#undef dct_widen
#undef dct_wadd
#undef dct_wsub
#undef dct_bfly32o
#undef dct_interleave8
#undef dct_interleave16
#undef dct_pass
}

#endif // STBI_SSE2

#ifdef STBI_NEON

// NEON integer IDCT. should produce bit-identical
// results to the generic C version.
static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
{
   int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;

   int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
   int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
   int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f));
   int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f));
   int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
   int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
   int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
   int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
   int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f));
   int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f));
   int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f));
   int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f));

#define dct_long_mul(out, inq, coeff) \
   int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
   int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)

#define dct_long_mac(out, acc, inq, coeff) \
   int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
   int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)

#define dct_widen(out, inq) \
   int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
   int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)

// wide add
#define dct_wadd(out, a, b) \
   int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
   int32x4_t out##_h = vaddq_s32(a##_h, b##_h)

// wide sub
#define dct_wsub(out, a, b) \
   int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
   int32x4_t out##_h = vsubq_s32(a##_h, b##_h)

// butterfly a/b, then shift using "shiftop" by "s" and pack
#define dct_bfly32o(out0,out1, a,b,shiftop,s) \
   { \
      dct_wadd(sum, a, b); \
      dct_wsub(dif, a, b); \
      out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
      out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
   }

#define dct_pass(shiftop, shift) \
   { \
      /* even part */ \
      int16x8_t sum26 = vaddq_s16(row2, row6); \
      dct_long_mul(p1e, sum26, rot0_0); \
      dct_long_mac(t2e, p1e, row6, rot0_1); \
      dct_long_mac(t3e, p1e, row2, rot0_2); \
      int16x8_t sum04 = vaddq_s16(row0, row4); \
      int16x8_t dif04 = vsubq_s16(row0, row4); \
      dct_widen(t0e, sum04); \
      dct_widen(t1e, dif04); \
      dct_wadd(x0, t0e, t3e); \
      dct_wsub(x3, t0e, t3e); \
      dct_wadd(x1, t1e, t2e); \
      dct_wsub(x2, t1e, t2e); \
      /* odd part */ \
      int16x8_t sum15 = vaddq_s16(row1, row5); \
      int16x8_t sum17 = vaddq_s16(row1, row7); \
      int16x8_t sum35 = vaddq_s16(row3, row5); \
      int16x8_t sum37 = vaddq_s16(row3, row7); \
      int16x8_t sumodd = vaddq_s16(sum17, sum35); \
      dct_long_mul(p5o, sumodd, rot1_0); \
      dct_long_mac(p1o, p5o, sum17, rot1_1); \
      dct_long_mac(p2o, p5o, sum35, rot1_2); \
      dct_long_mul(p3o, sum37, rot2_0); \
      dct_long_mul(p4o, sum15, rot2_1); \
      dct_wadd(sump13o, p1o, p3o); \
      dct_wadd(sump24o, p2o, p4o); \
      dct_wadd(sump23o, p2o, p3o); \
      dct_wadd(sump14o, p1o, p4o); \
      dct_long_mac(x4, sump13o, row7, rot3_0); \
      dct_long_mac(x5, sump24o, row5, rot3_1); \
      dct_long_mac(x6, sump23o, row3, rot3_2); \
      dct_long_mac(x7, sump14o, row1, rot3_3); \
      dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
      dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
      dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
      dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
   }

   // load
   row0 = vld1q_s16(data + 0*8);
   row1 = vld1q_s16(data + 1*8);
   row2 = vld1q_s16(data + 2*8);
   row3 = vld1q_s16(data + 3*8);
   row4 = vld1q_s16(data + 4*8);
   row5 = vld1q_s16(data + 5*8);
   row6 = vld1q_s16(data + 6*8);
   row7 = vld1q_s16(data + 7*8);

   // add DC bias
   row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));

   // column pass
   dct_pass(vrshrn_n_s32, 10);

   // 16bit 8x8 transpose
   {
// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
// whether compilers actually get this is another story, sadly.
#define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
#define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
#define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }

      // pass 1
      dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
      dct_trn16(row2, row3);
      dct_trn16(row4, row5);
      dct_trn16(row6, row7);

      // pass 2
      dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
      dct_trn32(row1, row3);
      dct_trn32(row4, row6);
      dct_trn32(row5, row7);

      // pass 3
      dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
      dct_trn64(row1, row5);
      dct_trn64(row2, row6);
      dct_trn64(row3, row7);

#undef dct_trn16
#undef dct_trn32
#undef dct_trn64
   }

   // row pass
   // vrshrn_n_s32 only supports shifts up to 16, we need
   // 17. so do a non-rounding shift of 16 first then follow
   // up with a rounding shift by 1.
   dct_pass(vshrn_n_s32, 16);

   {
      // pack and round
      uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
      uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
      uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
      uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
      uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
      uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
      uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
      uint8x8_t p7 = vqrshrun_n_s16(row7, 1);

      // again, these can translate into one instruction, but often don't.
#define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
#define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
#define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }

      // sadly can't use interleaved stores here since we only write
      // 8 bytes to each scan line!

      // 8x8 8-bit transpose pass 1
      dct_trn8_8(p0, p1);
      dct_trn8_8(p2, p3);
      dct_trn8_8(p4, p5);
      dct_trn8_8(p6, p7);

      // pass 2
      dct_trn8_16(p0, p2);
      dct_trn8_16(p1, p3);
      dct_trn8_16(p4, p6);
      dct_trn8_16(p5, p7);

      // pass 3
      dct_trn8_32(p0, p4);
      dct_trn8_32(p1, p5);
      dct_trn8_32(p2, p6);
      dct_trn8_32(p3, p7);

      // store
      vst1_u8(out, p0); out += out_stride;
      vst1_u8(out, p1); out += out_stride;
      vst1_u8(out, p2); out += out_stride;
      vst1_u8(out, p3); out += out_stride;
      vst1_u8(out, p4); out += out_stride;
      vst1_u8(out, p5); out += out_stride;
      vst1_u8(out, p6); out += out_stride;
      vst1_u8(out, p7);

#undef dct_trn8_8
#undef dct_trn8_16
#undef dct_trn8_32
   }

#undef dct_long_mul
#undef dct_long_mac
#undef dct_widen
#undef dct_wadd
#undef dct_wsub
#undef dct_bfly32o
#undef dct_pass
}

#endif // STBI_NEON

#define STBI__MARKER_none  0xff
// if there's a pending marker from the entropy stream, return that
// otherwise, fetch from the stream and get a marker. if there's no
// marker, return 0xff, which is never a valid marker value
static stbi_uc stbi__get_marker(stbi__jpeg *j)
{
   stbi_uc x;
   if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
   x = stbi__get8(j->s);
   if (x != 0xff) return STBI__MARKER_none;
   while (x == 0xff)
      x = stbi__get8(j->s); // consume repeated 0xff fill bytes
   return x;
}

// in each scan, we'll have scan_n components, and the order
// of the components is specified by order[]
#define STBI__RESTART(x)     ((x) >= 0xd0 && (x) <= 0xd7)

// after a restart interval, stbi__jpeg_reset the entropy decoder and
// the dc prediction
static void stbi__jpeg_reset(stbi__jpeg *j)
{
   j->code_bits = 0;
   j->code_buffer = 0;
   j->nomore = 0;
   j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = 0;
   j->marker = STBI__MARKER_none;
   j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
   j->eob_run = 0;
   // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
   // since we don't even allow 1<<30 pixels
}

static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
{
   stbi__jpeg_reset(z);
   if (!z->progressive) {
      if (z->scan_n == 1) {
         int i,j;
         STBI_SIMD_ALIGN(short, data[64]);
         int n = z->order[0];
         // non-interleaved data, we just need to process one block at a time,
         // in trivial scanline order
         // number of blocks to do just depends on how many actual "pixels" this
         // component has, independent of interleaved MCU blocking and such
         int w = (z->img_comp[n].x+7) >> 3;
         int h = (z->img_comp[n].y+7) >> 3;
         for (j=0; j < h; ++j) {
            for (i=0; i < w; ++i) {
               int ha = z->img_comp[n].ha;
               if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
               z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
               // every data block is an MCU, so countdown the restart interval
               if (--z->todo <= 0) {
                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
                  // if it's NOT a restart, then just bail, so we get corrupt data
                  // rather than no data
                  if (!STBI__RESTART(z->marker)) return 1;
                  stbi__jpeg_reset(z);
               }
            }
         }
         return 1;
      } else { // interleaved
         int i,j,k,x,y;
         STBI_SIMD_ALIGN(short, data[64]);
         for (j=0; j < z->img_mcu_y; ++j) {
            for (i=0; i < z->img_mcu_x; ++i) {
               // scan an interleaved mcu... process scan_n components in order
               for (k=0; k < z->scan_n; ++k) {
                  int n = z->order[k];
                  // scan out an mcu's worth of this component; that's just determined
                  // by the basic H and V specified for the component
                  for (y=0; y < z->img_comp[n].v; ++y) {
                     for (x=0; x < z->img_comp[n].h; ++x) {
                        int x2 = (i*z->img_comp[n].h + x)*8;
                        int y2 = (j*z->img_comp[n].v + y)*8;
                        int ha = z->img_comp[n].ha;
                        if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
                        z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data);
                     }
                  }
               }
               // after all interleaved components, that's an interleaved MCU,
               // so now count down the restart interval
               if (--z->todo <= 0) {
                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
                  if (!STBI__RESTART(z->marker)) return 1;
                  stbi__jpeg_reset(z);
               }
            }
         }
         return 1;
      }
   } else {
      if (z->scan_n == 1) {
         int i,j;
         int n = z->order[0];
         // non-interleaved data, we just need to process one block at a time,
         // in trivial scanline order
         // number of blocks to do just depends on how many actual "pixels" this
         // component has, independent of interleaved MCU blocking and such
         int w = (z->img_comp[n].x+7) >> 3;
         int h = (z->img_comp[n].y+7) >> 3;
         for (j=0; j < h; ++j) {
            for (i=0; i < w; ++i) {
               short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
               if (z->spec_start == 0) {
                  if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
                     return 0;
               } else {
                  int ha = z->img_comp[n].ha;
                  if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
                     return 0;
               }
               // every data block is an MCU, so countdown the restart interval
               if (--z->todo <= 0) {
                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
                  if (!STBI__RESTART(z->marker)) return 1;
                  stbi__jpeg_reset(z);
               }
            }
         }
         return 1;
      } else { // interleaved
         int i,j,k,x,y;
         for (j=0; j < z->img_mcu_y; ++j) {
            for (i=0; i < z->img_mcu_x; ++i) {
               // scan an interleaved mcu... process scan_n components in order
               for (k=0; k < z->scan_n; ++k) {
                  int n = z->order[k];
                  // scan out an mcu's worth of this component; that's just determined
                  // by the basic H and V specified for the component
                  for (y=0; y < z->img_comp[n].v; ++y) {
                     for (x=0; x < z->img_comp[n].h; ++x) {
                        int x2 = (i*z->img_comp[n].h + x);
                        int y2 = (j*z->img_comp[n].v + y);
                        short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
                        if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
                           return 0;
                     }
                  }
               }
               // after all interleaved components, that's an interleaved MCU,
               // so now count down the restart interval
               if (--z->todo <= 0) {
                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
                  if (!STBI__RESTART(z->marker)) return 1;
                  stbi__jpeg_reset(z);
               }
            }
         }
         return 1;
      }
   }
}

static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant)
{
   int i;
   for (i=0; i < 64; ++i)
      data[i] *= dequant[i];
}

static void stbi__jpeg_finish(stbi__jpeg *z)
{
   if (z->progressive) {
      // dequantize and idct the data
      int i,j,n;
      for (n=0; n < z->s->img_n; ++n) {
         int w = (z->img_comp[n].x+7) >> 3;
         int h = (z->img_comp[n].y+7) >> 3;
         for (j=0; j < h; ++j) {
            for (i=0; i < w; ++i) {
               short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
               stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
               z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
            }
         }
      }
   }
}

static int stbi__process_marker(stbi__jpeg *z, int m)
{
   int L;
   switch (m) {
      case STBI__MARKER_none: // no marker found
         return stbi__err("expected marker","Corrupt JPEG");

      case 0xDD: // DRI - specify restart interval
         if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG");
         z->restart_interval = stbi__get16be(z->s);
         return 1;

      case 0xDB: // DQT - define quantization table
         L = stbi__get16be(z->s)-2;
         while (L > 0) {
            int q = stbi__get8(z->s);
            int p = q >> 4, sixteen = (p != 0);
            int t = q & 15,i;
            if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG");
            if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG");

            for (i=0; i < 64; ++i)
               z->dequant[t][stbi__jpeg_dezigzag[i]] = sixteen ? stbi__get16be(z->s) : stbi__get8(z->s);
            L -= (sixteen ? 129 : 65);
         }
         return L==0;

      case 0xC4: // DHT - define huffman table
         L = stbi__get16be(z->s)-2;
         while (L > 0) {
            stbi_uc *v;
            int sizes[16],i,n=0;
            int q = stbi__get8(z->s);
            int tc = q >> 4;
            int th = q & 15;
            if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG");
            for (i=0; i < 16; ++i) {
               sizes[i] = stbi__get8(z->s);
               n += sizes[i];
            }
            L -= 17;
            if (tc == 0) {
               if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0;
               v = z->huff_dc[th].values;
            } else {
               if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0;
               v = z->huff_ac[th].values;
            }
            for (i=0; i < n; ++i)
               v[i] = stbi__get8(z->s);
            if (tc != 0)
               stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
            L -= n;
         }
         return L==0;
   }
   // check for comment block or APP blocks
   if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
      stbi__skip(z->s, stbi__get16be(z->s)-2);
      return 1;
   }
   return stbi__err("unknown marker","Corrupt JPEG");
}

// after we see SOS
static int stbi__process_scan_header(stbi__jpeg *z)
{
   int i;
   int Ls = stbi__get16be(z->s);
   z->scan_n = stbi__get8(z->s);
   if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG");
   if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG");
   for (i=0; i < z->scan_n; ++i) {
      int id = stbi__get8(z->s), which;
      int q = stbi__get8(z->s);
      for (which = 0; which < z->s->img_n; ++which)
         if (z->img_comp[which].id == id)
            break;
      if (which == z->s->img_n) return 0; // no match
      z->img_comp[which].hd = q >> 4;   if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG");
      z->img_comp[which].ha = q & 15;   if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG");
      z->order[i] = which;
   }

   {
      int aa;
      z->spec_start = stbi__get8(z->s);
      z->spec_end   = stbi__get8(z->s); // should be 63, but might be 0
      aa = stbi__get8(z->s);
      z->succ_high = (aa >> 4);
      z->succ_low  = (aa & 15);
      if (z->progressive) {
         if (z->spec_start > 63 || z->spec_end > 63  || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
            return stbi__err("bad SOS", "Corrupt JPEG");
      } else {
         if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG");
         if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG");
         z->spec_end = 63;
      }
   }

   return 1;
}

static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
{
   int i;
   for (i=0; i < ncomp; ++i) {
      if (z->img_comp[i].raw_data) {
         STBI_FREE(z->img_comp[i].raw_data);
         z->img_comp[i].raw_data = NULL;
         z->img_comp[i].data = NULL;
      }
      if (z->img_comp[i].raw_coeff) {
         STBI_FREE(z->img_comp[i].raw_coeff);
         z->img_comp[i].raw_coeff = 0;
         z->img_comp[i].coeff = 0;
      }
      if (z->img_comp[i].linebuf) {
         STBI_FREE(z->img_comp[i].linebuf);
         z->img_comp[i].linebuf = NULL;
      }
   }
   return why;
}

static int stbi__process_frame_header(stbi__jpeg *z, int scan)
{
   stbi__context *s = z->s;
   int Lf,p,i,q, h_max=1,v_max=1,c;
   Lf = stbi__get16be(s);         if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG
   p  = stbi__get8(s);            if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
   s->img_y = stbi__get16be(s);   if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
   s->img_x = stbi__get16be(s);   if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires
   c = stbi__get8(s);
   if (c != 3 && c != 1) return stbi__err("bad component count","Corrupt JPEG");    // JFIF requires
   s->img_n = c;
   for (i=0; i < c; ++i) {
      z->img_comp[i].data = NULL;
      z->img_comp[i].linebuf = NULL;
   }

   if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG");

   z->rgb = 0;
   for (i=0; i < s->img_n; ++i) {
      static unsigned char rgb[3] = { 'R', 'G', 'B' };
      z->img_comp[i].id = stbi__get8(s);
      if (z->img_comp[i].id == rgb[i])
         ++z->rgb;
      q = stbi__get8(s);
      z->img_comp[i].h = (q >> 4);  if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG");
      z->img_comp[i].v = q & 15;    if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG");
      z->img_comp[i].tq = stbi__get8(s);  if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG");
   }

   if (scan != STBI__SCAN_load) return 1;

   if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode");

   for (i=0; i < s->img_n; ++i) {
      if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
      if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
   }

   // compute interleaved mcu info
   z->img_h_max = h_max;
   z->img_v_max = v_max;
   z->img_mcu_w = h_max * 8;
   z->img_mcu_h = v_max * 8;
   // these sizes can't be more than 17 bits
   z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
   z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;

   for (i=0; i < s->img_n; ++i) {
      // number of effective pixels (e.g. for non-interleaved MCU)
      z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
      z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
      // to simplify generation, we'll allocate enough memory to decode
      // the bogus oversized data from using interleaved MCUs and their
      // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
      // discard the extra data until colorspace conversion
      //
      // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
      // so these muls can't overflow with 32-bit ints (which we require)
      z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
      z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
      z->img_comp[i].coeff = 0;
      z->img_comp[i].raw_coeff = 0;
      z->img_comp[i].linebuf = NULL;
      z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
      if (z->img_comp[i].raw_data == NULL)
         return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
      // align blocks for idct using mmx/sse
      z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
      if (z->progressive) {
         // w2, h2 are multiples of 8 (see above)
         z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
         z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
         z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
         if (z->img_comp[i].raw_coeff == NULL)
            return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
         z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
      }
   }

   return 1;
}

// use comparisons since in some cases we handle more than one case (e.g. SOF)
#define stbi__DNL(x)         ((x) == 0xdc)
#define stbi__SOI(x)         ((x) == 0xd8)
#define stbi__EOI(x)         ((x) == 0xd9)
#define stbi__SOF(x)         ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
#define stbi__SOS(x)         ((x) == 0xda)

#define stbi__SOF_progressive(x)   ((x) == 0xc2)

static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
{
   int m;
   z->marker = STBI__MARKER_none; // initialize cached marker to empty
   m = stbi__get_marker(z);
   if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG");
   if (scan == STBI__SCAN_type) return 1;
   m = stbi__get_marker(z);
   while (!stbi__SOF(m)) {
      if (!stbi__process_marker(z,m)) return 0;
      m = stbi__get_marker(z);
      while (m == STBI__MARKER_none) {
         // some files have extra padding after their blocks, so ok, we'll scan
         if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
         m = stbi__get_marker(z);
      }
   }
   z->progressive = stbi__SOF_progressive(m);
   if (!stbi__process_frame_header(z, scan)) return 0;
   return 1;
}

// decode image to YCbCr format
static int stbi__decode_jpeg_image(stbi__jpeg *j)
{
   int m;
   for (m = 0; m < 4; m++) {
      j->img_comp[m].raw_data = NULL;
      j->img_comp[m].raw_coeff = NULL;
   }
   j->restart_interval = 0;
   if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
   m = stbi__get_marker(j);
   while (!stbi__EOI(m)) {
      if (stbi__SOS(m)) {
         if (!stbi__process_scan_header(j)) return 0;
         if (!stbi__parse_entropy_coded_data(j)) return 0;
         if (j->marker == STBI__MARKER_none ) {
            // handle 0s at the end of image data from IP Kamera 9060
            while (!stbi__at_eof(j->s)) {
               int x = stbi__get8(j->s);
               if (x == 255) {
                  j->marker = stbi__get8(j->s);
                  break;
               }
            }
            // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
         }
      } else if (stbi__DNL(m)) {
         int Ld = stbi__get16be(j->s);
         stbi__uint32 NL = stbi__get16be(j->s);
         if (Ld != 4) stbi__err("bad DNL len", "Corrupt JPEG");
         if (NL != j->s->img_y) stbi__err("bad DNL height", "Corrupt JPEG");
      } else {
         if (!stbi__process_marker(j, m)) return 0;
      }
      m = stbi__get_marker(j);
   }
   if (j->progressive)
      stbi__jpeg_finish(j);
   return 1;
}

// static jfif-centered resampling (across block boundaries)

typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1,
                                    int w, int hs);

#define stbi__div4(x) ((stbi_uc) ((x) >> 2))

static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
{
   STBI_NOTUSED(out);
   STBI_NOTUSED(in_far);
   STBI_NOTUSED(w);
   STBI_NOTUSED(hs);
   return in_near;
}

static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
{
   // need to generate two samples vertically for every one in input
   int i;
   STBI_NOTUSED(hs);
   for (i=0; i < w; ++i)
      out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2);
   return out;
}

static stbi_uc*  stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
{
   // need to generate two samples horizontally for every one in input
   int i;
   stbi_uc *input = in_near;

   if (w == 1) {
      // if only one sample, can't do any interpolation
      out[0] = out[1] = input[0];
      return out;
   }

   out[0] = input[0];
   out[1] = stbi__div4(input[0]*3 + input[1] + 2);
   for (i=1; i < w-1; ++i) {
      int n = 3*input[i]+2;
      out[i*2+0] = stbi__div4(n+input[i-1]);
      out[i*2+1] = stbi__div4(n+input[i+1]);
   }
   out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2);
   out[i*2+1] = input[w-1];

   STBI_NOTUSED(in_far);
   STBI_NOTUSED(hs);

   return out;
}

#define stbi__div16(x) ((stbi_uc) ((x) >> 4))

static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
{
   // need to generate 2x2 samples for every one in input
   int i,t0,t1;
   if (w == 1) {
      out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
      return out;
   }

   t1 = 3*in_near[0] + in_far[0];
   out[0] = stbi__div4(t1+2);
   for (i=1; i < w; ++i) {
      t0 = t1;
      t1 = 3*in_near[i]+in_far[i];
      out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
      out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
   }
   out[w*2-1] = stbi__div4(t1+2);

   STBI_NOTUSED(hs);

   return out;
}

#if defined(STBI_SSE2) || defined(STBI_NEON)
static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
{
   // need to generate 2x2 samples for every one in input
   int i=0,t0,t1;

   if (w == 1) {
      out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
      return out;
   }

   t1 = 3*in_near[0] + in_far[0];
   // process groups of 8 pixels for as long as we can.
   // note we can't handle the last pixel in a row in this loop
   // because we need to handle the filter boundary conditions.
   for (; i < ((w-1) & ~7); i += 8) {
#if defined(STBI_SSE2)
      // load and perform the vertical filtering pass
      // this uses 3*x + y = 4*x + (y - x)
      __m128i zero  = _mm_setzero_si128();
      __m128i farb  = _mm_loadl_epi64((__m128i *) (in_far + i));
      __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i));
      __m128i farw  = _mm_unpacklo_epi8(farb, zero);
      __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
      __m128i diff  = _mm_sub_epi16(farw, nearw);
      __m128i nears = _mm_slli_epi16(nearw, 2);
      __m128i curr  = _mm_add_epi16(nears, diff); // current row

      // horizontal filter works the same based on shifted vers of current
      // row. "prev" is current row shifted right by 1 pixel; we need to
      // insert the previous pixel value (from t1).
      // "next" is current row shifted left by 1 pixel, with first pixel
      // of next block of 8 pixels added in.
      __m128i prv0 = _mm_slli_si128(curr, 2);
      __m128i nxt0 = _mm_srli_si128(curr, 2);
      __m128i prev = _mm_insert_epi16(prv0, t1, 0);
      __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7);

      // horizontal filter, polyphase implementation since it's convenient:
      // even pixels = 3*cur + prev = cur*4 + (prev - cur)
      // odd  pixels = 3*cur + next = cur*4 + (next - cur)
      // note the shared term.
      __m128i bias  = _mm_set1_epi16(8);
      __m128i curs = _mm_slli_epi16(curr, 2);
      __m128i prvd = _mm_sub_epi16(prev, curr);
      __m128i nxtd = _mm_sub_epi16(next, curr);
      __m128i curb = _mm_add_epi16(curs, bias);
      __m128i even = _mm_add_epi16(prvd, curb);
      __m128i odd  = _mm_add_epi16(nxtd, curb);

      // interleave even and odd pixels, then undo scaling.
      __m128i int0 = _mm_unpacklo_epi16(even, odd);
      __m128i int1 = _mm_unpackhi_epi16(even, odd);
      __m128i de0  = _mm_srli_epi16(int0, 4);
      __m128i de1  = _mm_srli_epi16(int1, 4);

      // pack and write output
      __m128i outv = _mm_packus_epi16(de0, de1);
      _mm_storeu_si128((__m128i *) (out + i*2), outv);
#elif defined(STBI_NEON)
      // load and perform the vertical filtering pass
      // this uses 3*x + y = 4*x + (y - x)
      uint8x8_t farb  = vld1_u8(in_far + i);
      uint8x8_t nearb = vld1_u8(in_near + i);
      int16x8_t diff  = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
      int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
      int16x8_t curr  = vaddq_s16(nears, diff); // current row

      // horizontal filter works the same based on shifted vers of current
      // row. "prev" is current row shifted right by 1 pixel; we need to
      // insert the previous pixel value (from t1).
      // "next" is current row shifted left by 1 pixel, with first pixel
      // of next block of 8 pixels added in.
      int16x8_t prv0 = vextq_s16(curr, curr, 7);
      int16x8_t nxt0 = vextq_s16(curr, curr, 1);
      int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
      int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7);

      // horizontal filter, polyphase implementation since it's convenient:
      // even pixels = 3*cur + prev = cur*4 + (prev - cur)
      // odd  pixels = 3*cur + next = cur*4 + (next - cur)
      // note the shared term.
      int16x8_t curs = vshlq_n_s16(curr, 2);
      int16x8_t prvd = vsubq_s16(prev, curr);
      int16x8_t nxtd = vsubq_s16(next, curr);
      int16x8_t even = vaddq_s16(curs, prvd);
      int16x8_t odd  = vaddq_s16(curs, nxtd);

      // undo scaling and round, then store with even/odd phases interleaved
      uint8x8x2_t o;
      o.val[0] = vqrshrun_n_s16(even, 4);
      o.val[1] = vqrshrun_n_s16(odd,  4);
      vst2_u8(out + i*2, o);
#endif

      // "previous" value for next iter
      t1 = 3*in_near[i+7] + in_far[i+7];
   }

   t0 = t1;
   t1 = 3*in_near[i] + in_far[i];
   out[i*2] = stbi__div16(3*t1 + t0 + 8);

   for (++i; i < w; ++i) {
      t0 = t1;
      t1 = 3*in_near[i]+in_far[i];
      out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
      out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
   }
   out[w*2-1] = stbi__div4(t1+2);

   STBI_NOTUSED(hs);

   return out;
}
#endif

static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
{
   // resample with nearest-neighbor
   int i,j;
   STBI_NOTUSED(in_far);
   for (i=0; i < w; ++i)
      for (j=0; j < hs; ++j)
         out[i*hs+j] = in_near[i];
   return out;
}

// this is a reduced-precision calculation of YCbCr-to-RGB introduced
// to make sure the code produces the same results in both SIMD and scalar
#define stbi__float2fixed(x)  (((int) ((x) * 4096.0f + 0.5f)) << 8)
static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
{
   int i;
   for (i=0; i < count; ++i) {
      int y_fixed = (y[i] << 20) + (1<<19); // rounding
      int r,g,b;
      int cr = pcr[i] - 128;
      int cb = pcb[i] - 128;
      r = y_fixed +  cr* stbi__float2fixed(1.40200f);
      g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
      b = y_fixed                                     +   cb* stbi__float2fixed(1.77200f);
      r >>= 20;
      g >>= 20;
      b >>= 20;
      if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
      if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
      if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
      out[0] = (stbi_uc)r;
      out[1] = (stbi_uc)g;
      out[2] = (stbi_uc)b;
      out[3] = 255;
      out += step;
   }
}

#if defined(STBI_SSE2) || defined(STBI_NEON)
static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
{
   int i = 0;

#ifdef STBI_SSE2
   // step == 3 is pretty ugly on the final interleave, and i'm not convinced
   // it's useful in practice (you wouldn't use it for textures, for example).
   // so just accelerate step == 4 case.
   if (step == 4) {
      // this is a fairly straightforward implementation and not super-optimized.
      __m128i signflip  = _mm_set1_epi8(-0x80);
      __m128i cr_const0 = _mm_set1_epi16(   (short) ( 1.40200f*4096.0f+0.5f));
      __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f));
      __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f));
      __m128i cb_const1 = _mm_set1_epi16(   (short) ( 1.77200f*4096.0f+0.5f));
      __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128);
      __m128i xw = _mm_set1_epi16(255); // alpha channel

      for (; i+7 < count; i += 8) {
         // load
         __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i));
         __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i));
         __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i));
         __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
         __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128

         // unpack to short (and left-shift cr, cb by 8)
         __m128i yw  = _mm_unpacklo_epi8(y_bias, y_bytes);
         __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
         __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);

         // color transform
         __m128i yws = _mm_srli_epi16(yw, 4);
         __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
         __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
         __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
         __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
         __m128i rws = _mm_add_epi16(cr0, yws);
         __m128i gwt = _mm_add_epi16(cb0, yws);
         __m128i bws = _mm_add_epi16(yws, cb1);
         __m128i gws = _mm_add_epi16(gwt, cr1);

         // descale
         __m128i rw = _mm_srai_epi16(rws, 4);
         __m128i bw = _mm_srai_epi16(bws, 4);
         __m128i gw = _mm_srai_epi16(gws, 4);

         // back to byte, set up for transpose
         __m128i brb = _mm_packus_epi16(rw, bw);
         __m128i gxb = _mm_packus_epi16(gw, xw);

         // transpose to interleave channels
         __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
         __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
         __m128i o0 = _mm_unpacklo_epi16(t0, t1);
         __m128i o1 = _mm_unpackhi_epi16(t0, t1);

         // store
         _mm_storeu_si128((__m128i *) (out + 0), o0);
         _mm_storeu_si128((__m128i *) (out + 16), o1);
         out += 32;
      }
   }
#endif

#ifdef STBI_NEON
   // in this version, step=3 support would be easy to add. but is there demand?
   if (step == 4) {
      // this is a fairly straightforward implementation and not super-optimized.
      uint8x8_t signflip = vdup_n_u8(0x80);
      int16x8_t cr_const0 = vdupq_n_s16(   (short) ( 1.40200f*4096.0f+0.5f));
      int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f));
      int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f));
      int16x8_t cb_const1 = vdupq_n_s16(   (short) ( 1.77200f*4096.0f+0.5f));

      for (; i+7 < count; i += 8) {
         // load
         uint8x8_t y_bytes  = vld1_u8(y + i);
         uint8x8_t cr_bytes = vld1_u8(pcr + i);
         uint8x8_t cb_bytes = vld1_u8(pcb + i);
         int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
         int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));

         // expand to s16
         int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
         int16x8_t crw = vshll_n_s8(cr_biased, 7);
         int16x8_t cbw = vshll_n_s8(cb_biased, 7);

         // color transform
         int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
         int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
         int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
         int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
         int16x8_t rws = vaddq_s16(yws, cr0);
         int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
         int16x8_t bws = vaddq_s16(yws, cb1);

         // undo scaling, round, convert to byte
         uint8x8x4_t o;
         o.val[0] = vqrshrun_n_s16(rws, 4);
         o.val[1] = vqrshrun_n_s16(gws, 4);
         o.val[2] = vqrshrun_n_s16(bws, 4);
         o.val[3] = vdup_n_u8(255);

         // store, interleaving r/g/b/a
         vst4_u8(out, o);
         out += 8*4;
      }
   }
#endif

   for (; i < count; ++i) {
      int y_fixed = (y[i] << 20) + (1<<19); // rounding
      int r,g,b;
      int cr = pcr[i] - 128;
      int cb = pcb[i] - 128;
      r = y_fixed + cr* stbi__float2fixed(1.40200f);
      g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
      b = y_fixed                                   +   cb* stbi__float2fixed(1.77200f);
      r >>= 20;
      g >>= 20;
      b >>= 20;
      if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
      if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
      if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
      out[0] = (stbi_uc)r;
      out[1] = (stbi_uc)g;
      out[2] = (stbi_uc)b;
      out[3] = 255;
      out += step;
   }
}
#endif

// set up the kernels
static void stbi__setup_jpeg(stbi__jpeg *j)
{
   j->idct_block_kernel = stbi__idct_block;
   j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
   j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;

#ifdef STBI_SSE2
   if (stbi__sse2_available()) {
      j->idct_block_kernel = stbi__idct_simd;
      j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
      j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
   }
#endif

#ifdef STBI_NEON
   j->idct_block_kernel = stbi__idct_simd;
   j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
   j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
#endif
}

// clean up the temporary component buffers
static void stbi__cleanup_jpeg(stbi__jpeg *j)
{
   stbi__free_jpeg_components(j, j->s->img_n, 0);
}

typedef struct
{
   resample_row_func resample;
   stbi_uc *line0,*line1;
   int hs,vs;   // expansion factor in each axis
   int w_lores; // horizontal pixels pre-expansion
   int ystep;   // how far through vertical expansion we are
   int ypos;    // which pre-expansion row we're on
} stbi__resample;

static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
{
   int n, decode_n;
   z->s->img_n = 0; // make stbi__cleanup_jpeg safe

   // validate req_comp
   if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");

   // load a jpeg image from whichever source, but leave in YCbCr format
   if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }

   // determine actual number of components to generate
   n = req_comp ? req_comp : z->s->img_n;

   if (z->s->img_n == 3 && n < 3 && z->rgb != 3)
      decode_n = 1;
   else
      decode_n = z->s->img_n;

   // resample and color-convert
   {
      int k;
      unsigned int i,j;
      stbi_uc *output;
      stbi_uc *coutput[4];

      stbi__resample res_comp[4];

      for (k=0; k < decode_n; ++k) {
         stbi__resample *r = &res_comp[k];

         // allocate line buffer big enough for upsampling off the edges
         // with upsample factor of 4
         z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3);
         if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }

         r->hs      = z->img_h_max / z->img_comp[k].h;
         r->vs      = z->img_v_max / z->img_comp[k].v;
         r->ystep   = r->vs >> 1;
         r->w_lores = (z->s->img_x + r->hs-1) / r->hs;
         r->ypos    = 0;
         r->line0   = r->line1 = z->img_comp[k].data;

         if      (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
         else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
         else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
         else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
         else                               r->resample = stbi__resample_row_generic;
      }

      // can't error after this so, this is safe
      output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
      if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }

      // now go ahead and resample
      for (j=0; j < z->s->img_y; ++j) {
         stbi_uc *out = output + n * z->s->img_x * j;
         for (k=0; k < decode_n; ++k) {
            stbi__resample *r = &res_comp[k];
            int y_bot = r->ystep >= (r->vs >> 1);
            coutput[k] = r->resample(z->img_comp[k].linebuf,
                                     y_bot ? r->line1 : r->line0,
                                     y_bot ? r->line0 : r->line1,
                                     r->w_lores, r->hs);
            if (++r->ystep >= r->vs) {
               r->ystep = 0;
               r->line0 = r->line1;
               if (++r->ypos < z->img_comp[k].y)
                  r->line1 += z->img_comp[k].w2;
            }
         }
         if (n >= 3) {
            stbi_uc *y = coutput[0];
            if (z->s->img_n == 3) {
               if (z->rgb == 3) {
                  for (i=0; i < z->s->img_x; ++i) {
                     out[0] = y[i];
                     out[1] = coutput[1][i];
                     out[2] = coutput[2][i];
                     out[3] = 255;
                     out += n;
                  }
               } else {
                  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
               }
            } else
               for (i=0; i < z->s->img_x; ++i) {
                  out[0] = out[1] = out[2] = y[i];
                  out[3] = 255; // not used if n==3
                  out += n;
               }
         } else {
            if (z->rgb == 3) {
               if (n == 1)
                  for (i=0; i < z->s->img_x; ++i)
                     *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
               else {
                  for (i=0; i < z->s->img_x; ++i, out += 2) {
                     out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
                     out[1] = 255;
                  }
               }
            } else {
               stbi_uc *y = coutput[0];
               if (n == 1)
                  for (i=0; i < z->s->img_x; ++i) out[i] = y[i];
               else
                  for (i=0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255;
            }
         }
      }
      stbi__cleanup_jpeg(z);
      *out_x = z->s->img_x;
      *out_y = z->s->img_y;
      if (comp) *comp  = z->s->img_n; // report original components, not output
      return output;
   }
}

static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
{
   unsigned char* result;
   stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
   j->s = s;
   stbi__setup_jpeg(j);
   result = load_jpeg_image(j, x,y,comp,req_comp);
   STBI_FREE(j);
   return result;
}

static int stbi__jpeg_test(stbi__context *s)
{
   int r;
   stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
   j->s = s;
   stbi__setup_jpeg(j);
   r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
   stbi__rewind(s);
   STBI_FREE(j);
   return r;
}

static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
{
   if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
      stbi__rewind( j->s );
      return 0;
   }
   if (x) *x = j->s->img_x;
   if (y) *y = j->s->img_y;
   if (comp) *comp = j->s->img_n;
   return 1;
}

static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
{
   int result;
   stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
   j->s = s;
   result = stbi__jpeg_info_raw(j, x, y, comp);
   STBI_FREE(j);
   return result;
}
#endif

// public domain zlib decode    v0.2  Sean Barrett 2006-11-18
//    simple implementation
//      - all input must be provided in an upfront buffer
//      - all output is written to a single output buffer (can malloc/realloc)
//    performance
//      - fast huffman

#ifndef STBI_NO_ZLIB

// fast-way is faster to check than jpeg huffman, but slow way is slower
#define STBI__ZFAST_BITS  9 // accelerate all cases in default tables
#define STBI__ZFAST_MASK  ((1 << STBI__ZFAST_BITS) - 1)

// zlib-style huffman encoding
// (jpegs packs from left, zlib from right, so can't share code)
typedef struct
{
   stbi__uint16 fast[1 << STBI__ZFAST_BITS];
   stbi__uint16 firstcode[16];
   int maxcode[17];
   stbi__uint16 firstsymbol[16];
   stbi_uc  size[288];
   stbi__uint16 value[288];
} stbi__zhuffman;

stbi_inline static int stbi__bitreverse16(int n)
{
  n = ((n & 0xAAAA) >>  1) | ((n & 0x5555) << 1);
  n = ((n & 0xCCCC) >>  2) | ((n & 0x3333) << 2);
  n = ((n & 0xF0F0) >>  4) | ((n & 0x0F0F) << 4);
  n = ((n & 0xFF00) >>  8) | ((n & 0x00FF) << 8);
  return n;
}

stbi_inline static int stbi__bit_reverse(int v, int bits)
{
   STBI_ASSERT(bits <= 16);
   // to bit reverse n bits, reverse 16 and shift
   // e.g. 11 bits, bit reverse and shift away 5
   return stbi__bitreverse16(v) >> (16-bits);
}

static int stbi__zbuild_huffman(stbi__zhuffman *z, stbi_uc *sizelist, int num)
{
   int i,k=0;
   int code, next_code[16], sizes[17];

   // DEFLATE spec for generating codes
   memset(sizes, 0, sizeof(sizes));
   memset(z->fast, 0, sizeof(z->fast));
   for (i=0; i < num; ++i)
      ++sizes[sizelist[i]];
   sizes[0] = 0;
   for (i=1; i < 16; ++i)
      if (sizes[i] > (1 << i))
         return stbi__err("bad sizes", "Corrupt PNG");
   code = 0;
   for (i=1; i < 16; ++i) {
      next_code[i] = code;
      z->firstcode[i] = (stbi__uint16) code;
      z->firstsymbol[i] = (stbi__uint16) k;
      code = (code + sizes[i]);
      if (sizes[i])
         if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG");
      z->maxcode[i] = code << (16-i); // preshift for inner loop
      code <<= 1;
      k += sizes[i];
   }
   z->maxcode[16] = 0x10000; // sentinel
   for (i=0; i < num; ++i) {
      int s = sizelist[i];
      if (s) {
         int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
         stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i);
         z->size [c] = (stbi_uc     ) s;
         z->value[c] = (stbi__uint16) i;
         if (s <= STBI__ZFAST_BITS) {
            int j = stbi__bit_reverse(next_code[s],s);
            while (j < (1 << STBI__ZFAST_BITS)) {
               z->fast[j] = fastv;
               j += (1 << s);
            }
         }
         ++next_code[s];
      }
   }
   return 1;
}

// zlib-from-memory implementation for PNG reading
//    because PNG allows splitting the zlib stream arbitrarily,
//    and it's annoying structurally to have PNG call ZLIB call PNG,
//    we require PNG read all the IDATs and combine them into a single
//    memory buffer

typedef struct
{
   stbi_uc *zbuffer, *zbuffer_end;
   int num_bits;
   stbi__uint32 code_buffer;

   char *zout;
   char *zout_start;
   char *zout_end;
   int   z_expandable;

   stbi__zhuffman z_length, z_distance;
} stbi__zbuf;

stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
{
   if (z->zbuffer >= z->zbuffer_end) return 0;
   return *z->zbuffer++;
}

static void stbi__fill_bits(stbi__zbuf *z)
{
   do {
      STBI_ASSERT(z->code_buffer < (1U << z->num_bits));
      z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits;
      z->num_bits += 8;
   } while (z->num_bits <= 24);
}

stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n)
{
   unsigned int k;
   if (z->num_bits < n) stbi__fill_bits(z);
   k = z->code_buffer & ((1 << n) - 1);
   z->code_buffer >>= n;
   z->num_bits -= n;
   return k;
}

static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
{
   int b,s,k;
   // not resolved by fast table, so compute it the slow way
   // use jpeg approach, which requires MSbits at top
   k = stbi__bit_reverse(a->code_buffer, 16);
   for (s=STBI__ZFAST_BITS+1; ; ++s)
      if (k < z->maxcode[s])
         break;
   if (s == 16) return -1; // invalid code!
   // code size is s, so:
   b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
   STBI_ASSERT(z->size[b] == s);
   a->code_buffer >>= s;
   a->num_bits -= s;
   return z->value[b];
}

stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
{
   int b,s;
   if (a->num_bits < 16) stbi__fill_bits(a);
   b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
   if (b) {
      s = b >> 9;
      a->code_buffer >>= s;
      a->num_bits -= s;
      return b & 511;
   }
   return stbi__zhuffman_decode_slowpath(a, z);
}

static int stbi__zexpand(stbi__zbuf *z, char *zout, int n)  // need to make room for n bytes
{
   char *q;
   int cur, limit, old_limit;
   z->zout = zout;
   if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG");
   cur   = (int) (z->zout     - z->zout_start);
   limit = old_limit = (int) (z->zout_end - z->zout_start);
   while (cur + n > limit)
      limit *= 2;
   q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
   STBI_NOTUSED(old_limit);
   if (q == NULL) return stbi__err("outofmem", "Out of memory");
   z->zout_start = q;
   z->zout       = q + cur;
   z->zout_end   = q + limit;
   return 1;
}

static int stbi__zlength_base[31] = {
   3,4,5,6,7,8,9,10,11,13,
   15,17,19,23,27,31,35,43,51,59,
   67,83,99,115,131,163,195,227,258,0,0 };

static int stbi__zlength_extra[31]=
{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };

static int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};

static int stbi__zdist_extra[32] =
{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};

static int stbi__parse_huffman_block(stbi__zbuf *a)
{
   char *zout = a->zout;
   for(;;) {
      int z = stbi__zhuffman_decode(a, &a->z_length);
      if (z < 256) {
         if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes
         if (zout >= a->zout_end) {
            if (!stbi__zexpand(a, zout, 1)) return 0;
            zout = a->zout;
         }
         *zout++ = (char) z;
      } else {
         stbi_uc *p;
         int len,dist;
         if (z == 256) {
            a->zout = zout;
            return 1;
         }
         z -= 257;
         len = stbi__zlength_base[z];
         if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
         z = stbi__zhuffman_decode(a, &a->z_distance);
         if (z < 0) return stbi__err("bad huffman code","Corrupt PNG");
         dist = stbi__zdist_base[z];
         if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
         if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
         if (zout + len > a->zout_end) {
            if (!stbi__zexpand(a, zout, len)) return 0;
            zout = a->zout;
         }
         p = (stbi_uc *) (zout - dist);
         if (dist == 1) { // run of one byte; common in images.
            stbi_uc v = *p;
            if (len) { do *zout++ = v; while (--len); }
         } else {
            if (len) { do *zout++ = *p++; while (--len); }
         }
      }
   }
}

static int stbi__compute_huffman_codes(stbi__zbuf *a)
{
   static stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
   stbi__zhuffman z_codelength;
   stbi_uc lencodes[286+32+137];//padding for maximum single op
   stbi_uc codelength_sizes[19];
   int i,n;

   int hlit  = stbi__zreceive(a,5) + 257;
   int hdist = stbi__zreceive(a,5) + 1;
   int hclen = stbi__zreceive(a,4) + 4;
   int ntot  = hlit + hdist;

   memset(codelength_sizes, 0, sizeof(codelength_sizes));
   for (i=0; i < hclen; ++i) {
      int s = stbi__zreceive(a,3);
      codelength_sizes[length_dezigzag[i]] = (stbi_uc) s;
   }
   if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;

   n = 0;
   while (n < ntot) {
      int c = stbi__zhuffman_decode(a, &z_codelength);
      if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
      if (c < 16)
         lencodes[n++] = (stbi_uc) c;
      else {
         stbi_uc fill = 0;
         if (c == 16) {
            c = stbi__zreceive(a,2)+3;
            if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG");
            fill = lencodes[n-1];
         } else if (c == 17)
            c = stbi__zreceive(a,3)+3;
         else {
            STBI_ASSERT(c == 18);
            c = stbi__zreceive(a,7)+11;
         }
         if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG");
         memset(lencodes+n, fill, c);
         n += c;
      }
   }
   if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG");
   if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
   if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
   return 1;
}

static int stbi__parse_uncompressed_block(stbi__zbuf *a)
{
   stbi_uc header[4];
   int len,nlen,k;
   if (a->num_bits & 7)
      stbi__zreceive(a, a->num_bits & 7); // discard
   // drain the bit-packed data into header
   k = 0;
   while (a->num_bits > 0) {
      header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check
      a->code_buffer >>= 8;
      a->num_bits -= 8;
   }
   STBI_ASSERT(a->num_bits == 0);
   // now fill header the normal way
   while (k < 4)
      header[k++] = stbi__zget8(a);
   len  = header[1] * 256 + header[0];
   nlen = header[3] * 256 + header[2];
   if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG");
   if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG");
   if (a->zout + len > a->zout_end)
      if (!stbi__zexpand(a, a->zout, len)) return 0;
   memcpy(a->zout, a->zbuffer, len);
   a->zbuffer += len;
   a->zout += len;
   return 1;
}

static int stbi__parse_zlib_header(stbi__zbuf *a)
{
   int cmf   = stbi__zget8(a);
   int cm    = cmf & 15;
   /* int cinfo = cmf >> 4; */
   int flg   = stbi__zget8(a);
   if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
   if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
   if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png
   // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
   return 1;
}

// @TODO: should statically initialize these for optimal thread safety
static stbi_uc stbi__zdefault_length[288], stbi__zdefault_distance[32];
static void stbi__init_zdefaults(void)
{
   int i;   // use <= to match clearly with spec
   for (i=0; i <= 143; ++i)     stbi__zdefault_length[i]   = 8;
   for (   ; i <= 255; ++i)     stbi__zdefault_length[i]   = 9;
   for (   ; i <= 279; ++i)     stbi__zdefault_length[i]   = 7;
   for (   ; i <= 287; ++i)     stbi__zdefault_length[i]   = 8;

   for (i=0; i <=  31; ++i)     stbi__zdefault_distance[i] = 5;
}

static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
{
   int final, type;
   if (parse_header)
      if (!stbi__parse_zlib_header(a)) return 0;
   a->num_bits = 0;
   a->code_buffer = 0;
   do {
      final = stbi__zreceive(a,1);
      type = stbi__zreceive(a,2);
      if (type == 0) {
         if (!stbi__parse_uncompressed_block(a)) return 0;
      } else if (type == 3) {
         return 0;
      } else {
         if (type == 1) {
            // use fixed code lengths
            if (!stbi__zdefault_distance[31]) stbi__init_zdefaults();
            if (!stbi__zbuild_huffman(&a->z_length  , stbi__zdefault_length  , 288)) return 0;
            if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance,  32)) return 0;
         } else {
            if (!stbi__compute_huffman_codes(a)) return 0;
         }
         if (!stbi__parse_huffman_block(a)) return 0;
      }
   } while (!final);
   return 1;
}

static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header)
{
   a->zout_start = obuf;
   a->zout       = obuf;
   a->zout_end   = obuf + olen;
   a->z_expandable = exp;

   return stbi__parse_zlib(a, parse_header);
}

STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
{
   stbi__zbuf a;
   char *p = (char *) stbi__malloc(initial_size);
   if (p == NULL) return NULL;
   a.zbuffer = (stbi_uc *) buffer;
   a.zbuffer_end = (stbi_uc *) buffer + len;
   if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
      if (outlen) *outlen = (int) (a.zout - a.zout_start);
      return a.zout_start;
   } else {
      STBI_FREE(a.zout_start);
      return NULL;
   }
}

STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
{
   return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
}

STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
{
   stbi__zbuf a;
   char *p = (char *) stbi__malloc(initial_size);
   if (p == NULL) return NULL;
   a.zbuffer = (stbi_uc *) buffer;
   a.zbuffer_end = (stbi_uc *) buffer + len;
   if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
      if (outlen) *outlen = (int) (a.zout - a.zout_start);
      return a.zout_start;
   } else {
      STBI_FREE(a.zout_start);
      return NULL;
   }
}

STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
{
   stbi__zbuf a;
   a.zbuffer = (stbi_uc *) ibuffer;
   a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
   if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
      return (int) (a.zout - a.zout_start);
   else
      return -1;
}

STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
{
   stbi__zbuf a;
   char *p = (char *) stbi__malloc(16384);
   if (p == NULL) return NULL;
   a.zbuffer = (stbi_uc *) buffer;
   a.zbuffer_end = (stbi_uc *) buffer+len;
   if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
      if (outlen) *outlen = (int) (a.zout - a.zout_start);
      return a.zout_start;
   } else {
      STBI_FREE(a.zout_start);
      return NULL;
   }
}

STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
{
   stbi__zbuf a;
   a.zbuffer = (stbi_uc *) ibuffer;
   a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
   if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
      return (int) (a.zout - a.zout_start);
   else
      return -1;
}
#endif

// public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18
//    simple implementation
//      - only 8-bit samples
//      - no CRC checking
//      - allocates lots of intermediate memory
//        - avoids problem of streaming data between subsystems
//        - avoids explicit window management
//    performance
//      - uses stb_zlib, a PD zlib implementation with fast huffman decoding

#ifndef STBI_NO_PNG
typedef struct
{
   stbi__uint32 length;
   stbi__uint32 type;
} stbi__pngchunk;

static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
{
   stbi__pngchunk c;
   c.length = stbi__get32be(s);
   c.type   = stbi__get32be(s);
   return c;
}

static int stbi__check_png_header(stbi__context *s)
{
   static stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
   int i;
   for (i=0; i < 8; ++i)
      if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG");
   return 1;
}

typedef struct
{
   stbi__context *s;
   stbi_uc *idata, *expanded, *out;
   int depth;
} stbi__png;


enum {
   STBI__F_none=0,
   STBI__F_sub=1,
   STBI__F_up=2,
   STBI__F_avg=3,
   STBI__F_paeth=4,
   // synthetic f
Download .txt
gitextract_a0o9p4yg/

├── .gitignore
├── .gitmodules
├── LICENSE.txt
├── Makefile
├── README.md
├── meta/
│   ├── banner.xcf
│   └── icon.xcf
├── romfs/
│   ├── logo.xcf
│   └── textcolor.cfg
└── source/
    ├── core/
    │   ├── default.v.pica
    │   ├── screen.c
    │   ├── screen.h
    │   ├── util.c
    │   └── util.h
    ├── locale.c
    ├── locale.h
    ├── main.c
    ├── stb_image/
    │   ├── stb_image.c
    │   └── stb_image.h
    └── ui/
        ├── error.c
        ├── error.h
        ├── info.c
        ├── info.h
        ├── list.c
        ├── list.h
        ├── mainmenu.c
        ├── mainmenu.h
        ├── prompt.c
        ├── prompt.h
        ├── section/
        │   ├── action/
        │   │   ├── action.h
        │   │   ├── change_language.c
        │   │   ├── change_locale_dir.c
        │   │   ├── change_region.c
        │   │   ├── pick_locale_dir.c
        │   │   └── use_system_default.c
        │   ├── config.c
        │   ├── nuke.c
        │   ├── section.h
        │   ├── task/
        │   │   ├── listtitles.c
        │   │   ├── task.c
        │   │   └── task.h
        │   └── titles.c
        ├── ui.c
        └── ui.h
Download .txt
SYMBOL INDEX (356 symbols across 26 files)

FILE: source/core/screen.c
  function FILE (line 55) | static FILE* screen_open_resource(const char* path) {
  function screen_set_blend (line 71) | static void screen_set_blend(u32 color, bool rgb, bool alpha) {
  function screen_init (line 97) | void screen_init() {
  function screen_exit (line 263) | void screen_exit() {
  function screen_set_base_alpha (line 299) | void screen_set_base_alpha(u8 alpha) {
  function u32 (line 303) | static u32 screen_next_pow_2(u32 i) {
  function u32 (line 315) | u32 screen_allocate_free_texture() {
  function screen_load_texture (line 334) | void screen_load_texture(u32 id, void* data, u32 size, u32 width, u32 he...
  function screen_load_texture_file (line 398) | void screen_load_texture_file(u32 id, const char* path, bool linearFilte...
  function u32 (line 442) | static u32 screen_tiled_texture_index(u32 x, u32 y, u32 w, u32 h) {
  function screen_load_texture_tiled (line 446) | void screen_load_texture_tiled(u32 id, void* tiledData, u32 size, u32 wi...
  function screen_unload_texture (line 476) | void screen_unload_texture(u32 id) {
  function screen_get_texture_size (line 489) | void screen_get_texture_size(u32* width, u32* height, u32 id) {
  function screen_draw_quad (line 504) | static void screen_draw_quad(float x1, float y1, float x2, float y2, flo...
  function screen_begin_frame (line 528) | void screen_begin_frame() {
  function screen_end_frame (line 535) | void screen_end_frame() {
  function screen_select (line 539) | void screen_select(gfxScreen_t screen) {
  function screen_draw_texture (line 548) | void screen_draw_texture(u32 id, float x, float y, float width, float he...
  function screen_draw_texture_crop (line 570) | void screen_draw_texture_crop(u32 id, float x, float y, float width, flo...
  function screen_get_font_height (line 592) | float screen_get_font_height(float scaleY) {
  function screen_get_string_size_internal (line 596) | static void screen_get_string_size_internal(float* width, float* height,...
  function screen_get_string_size (line 650) | void screen_get_string_size(float* width, float* height, const char* tex...
  function screen_get_string_size_wrap (line 654) | void screen_get_string_size_wrap(float* width, float* height, const char...
  function screen_draw_string_internal (line 658) | static void screen_draw_string_internal(const char* text, float x, float...
  function screen_draw_string (line 740) | void screen_draw_string(const char* text, float x, float y, float scaleX...
  function screen_draw_string_wrap (line 744) | void screen_draw_string_wrap(const char* text, float x, float y, float s...

FILE: source/core/util.c
  function util_get_line_length (line 14) | static int util_get_line_length(PrintConsole* console, const char* str) {
  function util_get_lines (line 32) | static int util_get_lines(PrintConsole* console, const char* str) {
  function util_panic (line 53) | void util_panic(const char* s, ...) {
  function util_is_dir (line 128) | bool util_is_dir(FS_Archive* archive, const char* path) {
  type count_data (line 146) | typedef struct {
  type populate_data (line 152) | typedef struct {
  function util_get_path_file (line 159) | void util_get_path_file(char* out, const char* path, u32 size) {
  function util_get_parent_path (line 177) | void util_get_parent_path(char* out, const char* path, u32 size) {
  function Result (line 194) | Result util_get_locale_path(char* out, size_t size) {
  function Result (line 232) | Result util_ensure_dir(FS_Archive* archive, const char* path) {
  function Result (line 269) | Result util_get_locale_dir(char* out, size_t size) {
  function FS_Path (line 281) | FS_Path* util_make_path_utf8(const char* path) {
  function util_free_path_utf8 (line 304) | void util_free_path_utf8(FS_Path* path) {
  function FS_Path (line 309) | FS_Path util_make_binary_path(const void* data, u32 size) {
  function util_compare_u32 (line 314) | int util_compare_u32(const void* e1, const void* e2) {
  function util_compare_u64 (line 321) | int util_compare_u64(const void* e1, const void* e2) {
  function FILE (line 328) | FILE* util_open_resource(const char* path) {

FILE: source/core/util.h
  type SMDH_title (line 5) | typedef struct {
  type SMDH (line 11) | typedef struct {
  type BNR (line 30) | typedef struct {

FILE: source/locale.c
  function Region (line 9) | Region region_from_string(char* string) {
  function Language (line 21) | Language language_from_string(char* string) {
  function Locale (line 69) | Locale* locale_for_title(u64 titleId) {
  function Region (line 125) | Region region_for_title(u64 titleId) {
  function Language (line 130) | Language language_for_title(u64 titleId) {
  function Result (line 135) | Result _set_locale_for_title(u64 titleId, Locale* locale) {
  function Result (line 188) | Result set_region_and_language_for_title(u64 titleId, Region region, Lan...
  function Result (line 210) | Result set_region_for_title(u64 titleId, Region region) {
  function Result (line 216) | Result set_language_for_title(u64 titleId, Language language) {

FILE: source/locale.h
  type Region (line 5) | typedef enum {
  type Language (line 18) | typedef enum {
  type Locale (line 35) | typedef struct {

FILE: source/main.c
  function cleanup (line 12) | void cleanup() {
  function main (line 33) | int main(int argc, const char* argv[]) {

FILE: source/stb_image/stb_image.h
  type stbi_uc (line 320) | typedef unsigned char stbi_uc;
  type stbi_us (line 321) | typedef unsigned short stbi_us;
  type stbi_io_callbacks (line 342) | typedef struct
  type stbi__uint16 (line 530) | typedef unsigned short stbi__uint16;
  type stbi__int16 (line 531) | typedef   signed short stbi__int16;
  type stbi__uint32 (line 532) | typedef unsigned int   stbi__uint32;
  type stbi__int32 (line 533) | typedef   signed int   stbi__int32;
  type stbi__uint16 (line 536) | typedef uint16_t stbi__uint16;
  type stbi__int16 (line 537) | typedef int16_t  stbi__int16;
  type stbi__uint32 (line 538) | typedef uint32_t stbi__uint32;
  type stbi__int32 (line 539) | typedef int32_t  stbi__int32;
  function stbi__cpuid3 (line 618) | static int stbi__cpuid3(void)
  function stbi__sse2_available (line 639) | static int stbi__sse2_available()
  function stbi__sse2_available (line 647) | static int stbi__sse2_available()
  type stbi__context (line 682) | typedef struct
  function stbi__start_mem (line 702) | static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int...
  function stbi__start_callbacks (line 711) | static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c...
  function stbi__stdio_read (line 724) | static int stbi__stdio_read(void *user, char *data, int size)
  function stbi__stdio_skip (line 729) | static void stbi__stdio_skip(void *user, int n)
  function stbi__stdio_eof (line 734) | static int stbi__stdio_eof(void *user)
  function stbi__start_file (line 746) | static void stbi__start_file(stbi__context *s, FILE *f)
  function stbi__rewind (line 755) | static void stbi__rewind(stbi__context *s)
  type stbi__result_info (line 770) | typedef struct
  function STBIDEF (line 834) | STBIDEF const char *stbi_failure_reason(void)
  function stbi__err (line 839) | static int stbi__err(const char *str)
  function stbi__addsizes_valid (line 862) | static int stbi__addsizes_valid(int a, int b)
  function stbi__mul2sizes_valid (line 874) | static int stbi__mul2sizes_valid(int a, int b)
  function stbi__mad2sizes_valid (line 883) | static int stbi__mad2sizes_valid(int a, int b, int add)
  function stbi__mad3sizes_valid (line 889) | static int stbi__mad3sizes_valid(int a, int b, int c, int add)
  function stbi__mad4sizes_valid (line 896) | static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
  function STBIDEF (line 936) | STBIDEF void stbi_image_free(void *retval_from_stbi_load)
  function STBIDEF (line 951) | STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
  function stbi_uc (line 1001) | static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, ...
  function stbi__uint16 (line 1017) | static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, ...
  function stbi__uint16 (line 1070) | static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, ...
  function stbi__float_postprocess (line 1109) | static void stbi__float_postprocess(float *result, int *x, int *y, int *...
  function FILE (line 1133) | static FILE *stbi__fopen(char const *filename, char const *mode)
  function STBIDEF (line 1146) | STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *co...
  function STBIDEF (line 1156) | STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp,...
  function STBIDEF (line 1169) | STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, in...
  function STBIDEF (line 1182) | STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int ...
  function STBIDEF (line 1195) | STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, i...
  function STBIDEF (line 1202) | STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk,...
  function STBIDEF (line 1228) | STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, in...
  function STBIDEF (line 1235) | STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, ...
  function STBIDEF (line 1243) | STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *com...
  function STBIDEF (line 1253) | STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, ...
  function STBIDEF (line 1267) | STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
  function STBIDEF (line 1281) | STBIDEF int      stbi_is_hdr          (char const *filename)
  function STBIDEF (line 1292) | STBIDEF int      stbi_is_hdr_from_file(FILE *f)
  function STBIDEF (line 1305) | STBIDEF int      stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clb...
  function STBIDEF (line 1321) | STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = ga...
  function STBIDEF (line 1322) | STBIDEF void   stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = sc...
  function STBIDEF (line 1327) | STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = ...
  function STBIDEF (line 1328) | STBIDEF void   stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = ...
  function stbi__refill_buffer (line 1343) | static void stbi__refill_buffer(stbi__context *s)
  function stbi_inline (line 1359) | stbi_inline static stbi_uc stbi__get8(stbi__context *s)
  function stbi_inline (line 1370) | stbi_inline static int stbi__at_eof(stbi__context *s)
  function stbi__skip (line 1382) | static void stbi__skip(stbi__context *s, int n)
  function stbi__getn (line 1399) | static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
  function stbi__get16be (line 1423) | static int stbi__get16be(stbi__context *s)
  function stbi__uint32 (line 1429) | static stbi__uint32 stbi__get32be(stbi__context *s)
  function stbi__get16le (line 1438) | static int stbi__get16le(stbi__context *s)
  function stbi__uint32 (line 1446) | static stbi__uint32 stbi__get32le(stbi__context *s)
  function stbi_uc (line 1467) | static stbi_uc stbi__compute_y(int r, int g, int b)
  function stbi__uint16 (line 1516) | static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
  function stbi__uint16 (line 1521) | static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_...
  function stbi_uc (line 1588) | static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp)
  type stbi__huffman (line 1642) | typedef struct
  type stbi__jpeg (line 1653) | typedef struct
  function stbi__build_huffman (line 1705) | static int stbi__build_huffman(stbi__huffman *h, int *count)
  function stbi__build_fast_ac (line 1748) | static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
  function stbi__grow_buffer_unsafe (line 1773) | static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
  function stbi_inline (line 1795) | stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffm...
  function stbi_inline (line 1849) | stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
  function stbi_inline (line 1865) | stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
  function stbi_inline (line 1876) | stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
  function stbi__jpeg_decode_block (line 1904) | static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__...
  function stbi__jpeg_decode_block_prog_dc (line 1956) | static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64]...
  function stbi__jpeg_decode_block_prog_ac (line 1983) | static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64]...
  function stbi_inline (line 2103) | stbi_inline static stbi_uc stbi__clamp(int x)
  function stbi__idct_block (line 2154) | static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
  function stbi__idct_simd (line 2217) | static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
  function stbi__idct_simd (line 2398) | static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
  function stbi_uc (line 2606) | static stbi_uc stbi__get_marker(stbi__jpeg *j)
  function stbi__jpeg_reset (line 2623) | static void stbi__jpeg_reset(stbi__jpeg *j)
  function stbi__parse_entropy_coded_data (line 2636) | static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
  function stbi__jpeg_dequantize (line 2760) | static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant)
  function stbi__jpeg_finish (line 2767) | static void stbi__jpeg_finish(stbi__jpeg *z)
  function stbi__process_marker (line 2786) | static int stbi__process_marker(stbi__jpeg *z, int m)
  function stbi__process_scan_header (line 2851) | static int stbi__process_scan_header(stbi__jpeg *z)
  function stbi__free_jpeg_components (line 2890) | static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
  function stbi__process_frame_header (line 2912) | static int stbi__process_frame_header(stbi__jpeg *z, int scan)
  function stbi__decode_jpeg_header (line 3004) | static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
  function stbi__decode_jpeg_image (line 3027) | static int stbi__decode_jpeg_image(stbi__jpeg *j)
  type stbi_uc (line 3069) | typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_u...
  function stbi_uc (line 3074) | static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *...
  function stbi_uc (line 3083) | static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, s...
  function stbi_uc (line 3093) | static stbi_uc*  stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, ...
  function stbi_uc (line 3123) | static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, ...
  function stbi_uc (line 3148) | static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_n...
  function stbi_uc (line 3264) | static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_nea...
  function stbi__YCbCr_to_RGB_row (line 3278) | static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const...
  function stbi__YCbCr_to_RGB_simd (line 3304) | static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi...
  function stbi__setup_jpeg (line 3439) | static void stbi__setup_jpeg(stbi__jpeg *j)
  function stbi__cleanup_jpeg (line 3461) | static void stbi__cleanup_jpeg(stbi__jpeg *j)
  type stbi__resample (line 3466) | typedef struct
  function stbi_uc (line 3476) | static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, i...
  function stbi__jpeg_test (line 3606) | static int stbi__jpeg_test(stbi__context *s)
  function stbi__jpeg_info_raw (line 3618) | static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
  function stbi__jpeg_info (line 3630) | static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
  type stbi__zhuffman (line 3656) | typedef struct
  function stbi_inline (line 3666) | stbi_inline static int stbi__bitreverse16(int n)
  function stbi_inline (line 3675) | stbi_inline static int stbi__bit_reverse(int v, int bits)
  function stbi__zbuild_huffman (line 3683) | static int stbi__zbuild_huffman(stbi__zhuffman *z, stbi_uc *sizelist, in...
  type stbi__zbuf (line 3736) | typedef struct
  function stbi_inline (line 3750) | stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
  function stbi__fill_bits (line 3756) | static void stbi__fill_bits(stbi__zbuf *z)
  function stbi__zreceive (line 3765) | int stbi__zreceive(stbi__zbuf *z, int n)
  function stbi__zhuffman_decode_slowpath (line 3775) | static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
  function stbi_inline (line 3793) | stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffm...
  function stbi__zexpand (line 3807) | static int stbi__zexpand(stbi__zbuf *z, char *zout, int n)  // need to m...
  function stbi__parse_huffman_block (line 3840) | static int stbi__parse_huffman_block(stbi__zbuf *a)
  function stbi__compute_huffman_codes (line 3882) | static int stbi__compute_huffman_codes(stbi__zbuf *a)
  function stbi__parse_uncompressed_block (line 3931) | static int stbi__parse_uncompressed_block(stbi__zbuf *a)
  function stbi__parse_zlib_header (line 3960) | static int stbi__parse_zlib_header(stbi__zbuf *a)
  function stbi__init_zdefaults (line 3975) | static void stbi__init_zdefaults(void)
  function stbi__parse_zlib (line 3986) | static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
  function stbi__do_zlib (line 4015) | static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, i...
  function STBIDEF (line 4025) | STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int ...
  function STBIDEF (line 4041) | STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *...
  function STBIDEF (line 4046) | STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *b...
  function STBIDEF (line 4062) | STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const ...
  function STBIDEF (line 4073) | STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int l...
  function STBIDEF (line 4089) | STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, co...
  type stbi__pngchunk (line 4112) | typedef struct
  function stbi__pngchunk (line 4118) | static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
  function stbi__check_png_header (line 4126) | static int stbi__check_png_header(stbi__context *s)
  type stbi__png (line 4135) | typedef struct
  function stbi__paeth (line 4163) | static int stbi__paeth(int a, int b, int c)
  function stbi__create_png_image_raw (line 4177) | static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__...
  function stbi__create_png_image (line 4387) | static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stb...
  function stbi__compute_transparency (line 4431) | static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int o...
  function stbi__compute_transparency16 (line 4456) | static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3]...
  function stbi__expand_png_palette (line 4481) | static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int ...
  function STBIDEF (line 4521) | STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpr...
  function STBIDEF (line 4526) | STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_conv...
  function stbi__de_iphone (line 4531) | static void stbi__de_iphone(stbi__png *z)
  function stbi__parse_png_file (line 4575) | static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
  function stbi__png_test (line 4781) | static int stbi__png_test(stbi__context *s)
  function stbi__png_info_raw (line 4789) | static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
  function stbi__png_info (line 4801) | static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
  function stbi__bmp_test_raw (line 4812) | static int stbi__bmp_test_raw(stbi__context *s)
  function stbi__bmp_test (line 4827) | static int stbi__bmp_test(stbi__context *s)
  function stbi__high_bit (line 4836) | static int stbi__high_bit(unsigned int z)
  function stbi__bitcount (line 4848) | static int stbi__bitcount(unsigned int a)
  function stbi__shiftsigned (line 4858) | static int stbi__shiftsigned(int v, int shift, int bits)
  type stbi__bmp_data (line 4875) | typedef struct
  function stbi__tga_get_comp (line 5128) | static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_r...
  function stbi__tga_info (line 5144) | static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
  function stbi__tga_test (line 5209) | static int stbi__tga_test(stbi__context *s)
  function stbi__tga_read_rgb16 (line 5241) | static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
  function stbi__psd_test (line 5459) | static int stbi__psd_test(stbi__context *s)
  function stbi__psd_decode_rle (line 5466) | static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelC...
  function stbi__pic_is4 (line 5711) | static int stbi__pic_is4(stbi__context *s,const char *str)
  function stbi__pic_test_core (line 5721) | static int stbi__pic_test_core(stbi__context *s)
  type stbi__pic_packet (line 5737) | typedef struct
  function stbi_uc (line 5742) | static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
  function stbi__copyval (line 5756) | static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src)
  function stbi_uc (line 5765) | static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int heigh...
  function stbi__pic_test (line 5909) | static int stbi__pic_test(stbi__context *s)
  type stbi__gif_lzw (line 5921) | typedef struct
  type stbi__gif (line 5928) | typedef struct
  function stbi__gif_test_raw (line 5945) | static int stbi__gif_test_raw(stbi__context *s)
  function stbi__gif_test (line 5955) | static int stbi__gif_test(stbi__context *s)
  function stbi__gif_parse_colortable (line 5962) | static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256...
  function stbi__gif_header (line 5973) | static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, i...
  function stbi__gif_info_raw (line 6001) | static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
  function stbi__out_gif_code (line 6015) | static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
  function stbi_uc (line 6049) | static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
  function stbi__fill_gif_background (line 6129) | static void stbi__fill_gif_background(stbi__gif *g, int x0, int y0, int ...
  function stbi_uc (line 6145) | static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int ...
  function stbi__gif_info (line 6285) | static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
  function stbi__hdr_test_core (line 6295) | static int stbi__hdr_test_core(stbi__context *s, const char *signature)
  function stbi__hdr_test (line 6305) | static int stbi__hdr_test(stbi__context* s)
  function stbi__hdr_convert (line 6339) | static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
  function stbi__hdr_info (line 6494) | static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
  function stbi__bmp_info (line 6535) | static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
  function stbi__psd_info (line 6553) | static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
  function stbi__pic_info (line 6586) | static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
  function stbi__pnm_test (line 6654) | static int      stbi__pnm_test(stbi__context *s)
  function stbi__pnm_isspace (line 6692) | static int      stbi__pnm_isspace(char c)
  function stbi__pnm_skip_whitespace (line 6697) | static void     stbi__pnm_skip_whitespace(stbi__context *s, char *c)
  function stbi__pnm_isdigit (line 6711) | static int      stbi__pnm_isdigit(char c)
  function stbi__pnm_getinteger (line 6716) | static int      stbi__pnm_getinteger(stbi__context *s, char *c)
  function stbi__pnm_info (line 6728) | static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
  function stbi__info_main (line 6763) | static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
  function STBIDEF (line 6806) | STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
  function STBIDEF (line 6816) | STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
  function STBIDEF (line 6828) | STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x...
  function STBIDEF (line 6835) | STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *u...

FILE: source/ui/error.c
  type error_data (line 543) | typedef struct {
  function error_draw_top (line 550) | static void error_draw_top(ui_view* view, void* data, float x1, float y1...
  function error_onresponse (line 558) | static void error_onresponse(ui_view* view, void* data, bool response) {
  function error_display (line 568) | void error_display(volatile bool* dismissed, void* data, void (*drawTop)...
  function error_display_res (line 587) | void error_display_res(volatile bool* dismissed, void* data, void (*draw...
  function error_display_errno (line 613) | void error_display_errno(volatile bool* dismissed, void* data, void (*dr...

FILE: source/ui/info.c
  type info_data (line 10) | typedef struct {
  function info_update (line 19) | static void info_update(ui_view* view, void* data, float bx1, float by1,...
  function info_draw_top (line 27) | static void info_draw_top(ui_view* view, void* data, float x1, float y1,...
  function info_draw_bottom (line 35) | static void info_draw_bottom(ui_view* view, void* data, float x1, float ...
  function info_display (line 69) | void info_display(const char* name, const char* info, bool bar, void* da...
  function info_destroy (line 102) | void info_destroy(ui_view* view) {

FILE: source/ui/list.c
  type list_data (line 9) | typedef struct {
  function list_get_item_screen_y (line 23) | static float list_get_item_screen_y(list_data* listData, u32 index) {
  function list_get_item_at (line 34) | static int list_get_item_at(list_data* listData, float screenY) {
  function list_validate_pos (line 50) | static void list_validate_pos(list_data* listData, float by1, float by2) {
  function list_update (line 76) | static void list_update(ui_view* view, void* data, float bx1, float by1,...
  function list_draw_top (line 174) | static void list_draw_top(ui_view* view, void* data, float x1, float y1,...
  function list_draw_bottom (line 182) | static void list_draw_bottom(ui_view* view, void* data, float x1, float ...
  function list_display (line 214) | void list_display(const char* name, const char* info, void* data, void (...
  function list_destroy (line 251) | void list_destroy(ui_view* view) {

FILE: source/ui/list.h
  type list_item (line 7) | typedef struct {

FILE: source/ui/mainmenu.c
  function mainmenu_draw_top (line 20) | static void mainmenu_draw_top(ui_view* view, void* data, float x1, float...
  function mainmenu_update (line 41) | static void mainmenu_update(ui_view* view, void* data, list_item** items...
  function mainmenu_open (line 60) | void mainmenu_open() {

FILE: source/ui/prompt.c
  type prompt_data (line 9) | typedef struct {
  function prompt_notify_response (line 19) | static void prompt_notify_response(ui_view* view, prompt_data* promptDat...
  function prompt_update (line 30) | static void prompt_update(ui_view* view, void* data, float bx1, float by...
  function prompt_draw_top (line 84) | static void prompt_draw_top(ui_view* view, void* data, float x1, float y...
  function prompt_draw_bottom (line 92) | static void prompt_draw_bottom(ui_view* view, void* data, float x1, floa...
  function prompt_display (line 141) | void prompt_display(const char* name, const char* text, u32 rgba, bool o...

FILE: source/ui/section/action/change_language.c
  type language_data (line 13) | typedef struct {
  function action_set_language (line 21) | static void action_set_language(language_data* data, char* name, bool po...
  function language_draw_top (line 36) | static void language_draw_top(ui_view* view, void* data, float x1, float...
  function language_update (line 40) | static void language_update(ui_view* view, void* data, list_item** items...
  function action_change_language (line 82) | void action_change_language(title_info* info, bool* populated) {

FILE: source/ui/section/action/change_locale_dir.c
  function action_write_locale_dir_config (line 9) | static void action_write_locale_dir_config(ui_view* view, void* data, bo...
  function action_change_locale_dir (line 27) | void action_change_locale_dir(config_info* info, bool* populated) {

FILE: source/ui/section/action/change_region.c
  type region_data (line 13) | typedef struct {
  function action_set_region (line 21) | static void action_set_region(region_data* data, char* name, bool popula...
  function region_draw_top (line 37) | static void region_draw_top(ui_view* view, void* data, float x1, float y...
  function region_update (line 41) | static void region_update(ui_view* view, void* data, list_item** items, ...
  function action_change_region (line 84) | void action_change_region(title_info* info, bool* populated) {

FILE: source/ui/section/action/pick_locale_dir.c
  function action_pick_locale_dir (line 7) | void action_pick_locale_dir(config_info* info, bool* populated) {

FILE: source/ui/section/action/use_system_default.c
  function action_remove_locale_file (line 13) | static void action_remove_locale_file(ui_view* view, void* data, bool re...
  function action_use_system_default (line 42) | void action_use_system_default(title_info* info, bool* populated) {

FILE: source/ui/section/config.c
  type locale_data (line 16) | typedef struct {
  type config_action_data (line 23) | typedef struct {
  function config_draw_top (line 42) | static void config_draw_top(ui_view* view, void* data, float x1, float y...
  function config_update (line 52) | static void config_update(ui_view* view, void* data, list_item** items, ...
  function config_open (line 108) | void config_open() {

FILE: source/ui/section/nuke.c
  function really_nuke (line 16) | static void really_nuke(ui_view* view, void* data, bool response) {
  function nuke (line 75) | void nuke() {

FILE: source/ui/section/task/listtitles.c
  type populate_titles_data (line 18) | typedef struct {
  type locale_entry_list (line 27) | typedef struct locale_entry_list {
  function locale_entry_list (line 42) | locale_entry_list* find_entry_for_title_id(u64 titleId) {
  function Result (line 51) | Result populate_locales() {
  function Result (line 117) | static Result task_populate_titles_add_ctr(populate_titles_data* data, F...
  function Result (line 218) | static Result task_populate_titles_add_twl(populate_titles_data* data, F...
  function Result (line 363) | static Result task_populate_titles_from(populate_titles_data* data, FS_M...
  function task_populate_titles_thread (line 408) | static void task_populate_titles_thread(void* arg) {
  function task_clear_titles (line 420) | void task_clear_titles(list_item* items, u32* count) {
  function Handle (line 444) | Handle task_populate_titles(list_item* items, u32* count, u32 max, bool ...

FILE: source/ui/section/task/task.c
  function task_is_quit_all (line 7) | bool task_is_quit_all() {
  function task_quit_all (line 11) | void task_quit_all() {

FILE: source/ui/section/task/task.h
  type meta_info (line 9) | typedef struct meta_info_s {
  type title_info (line 17) | typedef struct {
  type config_info (line 29) | typedef struct {
  type pending_title_info (line 34) | typedef struct {
  type ticket_info (line 40) | typedef struct {
  type ext_save_data_info (line 44) | typedef struct {
  type system_save_data_info (line 52) | typedef struct {
  type cia_info (line 56) | typedef struct {
  type file_info (line 64) | typedef struct {
  type DataOp (line 82) | typedef enum {
  type data_op_info (line 87) | typedef struct {

FILE: source/ui/section/titles.c
  type titles_data (line 13) | typedef struct {
  type titles_action_data (line 56) | typedef struct {
  function titles_action_draw_top (line 61) | static void titles_action_draw_top(ui_view* view, void* data, float x1, ...
  function titles_action_update (line 65) | static void titles_action_update(ui_view* view, void* data, list_item** ...
  function titles_action_open (line 91) | static void titles_action_open(title_info* info, bool* populated) {
  function titles_draw_top (line 105) | static void titles_draw_top(ui_view* view, void* data, float x1, float y...
  function titles_update (line 111) | static void titles_update(ui_view* view, void* data, list_item** items, ...
  function titles_open (line 176) | void titles_open() {

FILE: source/ui/ui.c
  function ui_init (line 19) | void ui_init() {
  function ui_exit (line 25) | void ui_exit() {
  function ui_view (line 32) | ui_view* ui_top() {
  function ui_push (line 45) | bool ui_push(ui_view* view) {
  function ui_pop (line 62) | void ui_pop() {
  function ui_draw_top (line 72) | static void ui_draw_top(ui_view* ui) {
  function ui_draw_bottom (line 168) | static void ui_draw_bottom(ui_view* ui) {
  function ui_update (line 221) | bool ui_update() {
  function ui_draw_ext_save_data_info (line 248) | void ui_draw_ext_save_data_info(ui_view* view, void* data, float x1, flo...
  function ui_draw_title_info (line 320) | void ui_draw_title_info(ui_view* view, void* data, float x1, float y1, f...

FILE: source/ui/ui.h
  type ui_view (line 5) | typedef struct ui_view_s {
Condensed preview — 44 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (420K chars).
[
  {
    "path": ".gitignore",
    "chars": 60,
    "preview": ".idea\nCMakeLists.txt\n\n# Vim swap files\n.*.sw*\n\nbuild\noutput\n"
  },
  {
    "path": ".gitmodules",
    "chars": 90,
    "preview": "[submodule \"buildtools\"]\n\tpath = buildtools\n\turl = git://github.com/Steveice10/buildtools\n"
  },
  {
    "path": "LICENSE.txt",
    "chars": 1080,
    "preview": "Copyright (C) 2016 Possum\nCopyright (C) 2015 Steveice10\n\nPermission is hereby granted, free of charge, to any person obt"
  },
  {
    "path": "Makefile",
    "chars": 1638,
    "preview": "# TARGET #\n\nTARGET := 3DS\nLIBRARY := 0\n\nifeq ($(TARGET),3DS)\n    ifeq ($(strip $(DEVKITPRO)),)\n        $(error \"Please s"
  },
  {
    "path": "README.md",
    "chars": 3150,
    "preview": "# LumaLocaleSwitcher\n\nLumaLocaleSwitcher can be used to manage per-title locales for Luma3ds (and\ncompatible forks such "
  },
  {
    "path": "romfs/textcolor.cfg",
    "chars": 112,
    "preview": "text=FF000000\nnand=FF0000FF\nsd=FF00FF00\ngamecard=FFFF0000\ndstitle=FF82004B\nimportant=FF0000FF\noutdated=FF0000FF\n"
  },
  {
    "path": "source/core/default.v.pica",
    "chars": 656,
    "preview": "; Uniforms\n.fvec projection[4]\n\n; Constants\n.constf myconst(0.0, 1.0, -1.0, 0.1)\n.constf myconst2(0.3, 0.0, 0.0, 0.0)\n.a"
  },
  {
    "path": "source/core/screen.c",
    "chars": 24280,
    "preview": "#include <errno.h>\n#include <malloc.h>\n#include <stdio.h>\n#include <stdlib.h>\n\n#include <3ds.h>\n#include <citro3d.h>\n\n#i"
  },
  {
    "path": "source/core/screen.h",
    "chars": 2782,
    "preview": "#pragma once\n\n#define TOP_SCREEN_WIDTH 400\n#define TOP_SCREEN_HEIGHT 240\n\n#define BOTTOM_SCREEN_WIDTH 320\n#define BOTTOM"
  },
  {
    "path": "source/core/util.c",
    "chars": 9127,
    "preview": "#include <stdarg.h>\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n\n#include <3ds.h>\n#include <3ds/services/"
  },
  {
    "path": "source/core/util.h",
    "chars": 1559,
    "preview": "#pragma once\n\n#include <stdio.h>\n\ntypedef struct {\n    u16 shortDescription[0x40];\n    u16 longDescription[0x80];\n    u1"
  },
  {
    "path": "source/locale.c",
    "chars": 6869,
    "preview": "#include <3ds.h>\n#include <sys/syslimits.h>\n#include <stdlib.h>\n#include <string.h>\n#include <stdio.h>\n#include \"locale."
  },
  {
    "path": "source/locale.h",
    "chars": 1104,
    "preview": "#pragma once\n#include <3ds/services/cfgu.h>\n\n// These align with CFG_Region in 3ds/services/cfgu.h\ntypedef enum {\n    JP"
  },
  {
    "path": "source/main.c",
    "chars": 1118,
    "preview": "#include <malloc.h>\n\n#include <3ds.h>\n\n#include \"core/screen.h\"\n#include \"core/util.h\"\n#include \"ui/mainmenu.h\"\n#include"
  },
  {
    "path": "source/stb_image/stb_image.c",
    "chars": 55,
    "preview": "#define STB_IMAGE_IMPLEMENTATION\n#include \"stb_image.h\""
  },
  {
    "path": "source/stb_image/stb_image.h",
    "chars": 244960,
    "preview": "/* stb_image - v2.14 - public domain image loader - http://nothings.org/stb_image.h\n                                    "
  },
  {
    "path": "source/ui/error.c",
    "chars": 18578,
    "preview": "#include <malloc.h>\n#include <stdarg.h>\n#include <stdio.h>\n#include <string.h>\n\n#include <3ds.h>\n\n#include \"error.h\"\n#in"
  },
  {
    "path": "source/ui/error.h",
    "chars": 560,
    "preview": "#pragma once\n\n#include \"ui.h\"\n\nvoid error_display(volatile bool* dismissed, void* data, void (*drawTop)(ui_view* view, v"
  },
  {
    "path": "source/ui/info.c",
    "chars": 3695,
    "preview": "#include <malloc.h>\n#include <stdio.h>\n\n#include <3ds.h>\n\n#include \"error.h\"\n#include \"info.h\"\n#include \"../core/screen."
  },
  {
    "path": "source/ui/info.h",
    "chars": 404,
    "preview": "#pragma once\n\n#include \"ui.h\"\n\n#define PROGRESS_TEXT_MAX 512\n\nvoid info_display(const char* name, const char* info, bool"
  },
  {
    "path": "source/ui/list.c",
    "chars": 9789,
    "preview": "#include <malloc.h>\n\n#include <3ds.h>\n\n#include \"error.h\"\n#include \"list.h\"\n#include \"../core/screen.h\"\n\ntypedef struct "
  },
  {
    "path": "source/ui/list.h",
    "chars": 541,
    "preview": "#pragma once\n\n#include <sys/syslimits.h>\n\n#include \"ui.h\"\n\ntypedef struct {\n    char name[NAME_MAX];\n    u32 rgba;\n    v"
  },
  {
    "path": "source/ui/mainmenu.c",
    "chars": 2011,
    "preview": "#include <malloc.h>\n#include <stdio.h>\n\n#include <3ds.h>\n\n#include \"list.h\"\n#include \"mainmenu.h\"\n#include \"section/sect"
  },
  {
    "path": "source/ui/mainmenu.h",
    "chars": 53,
    "preview": "#pragma once\n\n#include \"ui.h\"\n\nvoid mainmenu_open();\n"
  },
  {
    "path": "source/ui/prompt.c",
    "chars": 6675,
    "preview": "#include <malloc.h>\n\n#include <3ds.h>\n\n#include \"error.h\"\n#include \"prompt.h\"\n#include \"../core/screen.h\"\n\ntypedef struc"
  },
  {
    "path": "source/ui/prompt.h",
    "chars": 501,
    "preview": "#pragma once\n\n#include \"ui.h\"\n\nvoid prompt_display(const char* name, const char* text, u32 rgba, bool option, void* data"
  },
  {
    "path": "source/ui/section/action/action.h",
    "chars": 424,
    "preview": "#pragma once\n\n#include \"../task/task.h\"\n\nvoid action_change_region(title_info* info, bool* populated);\nvoid action_chang"
  },
  {
    "path": "source/ui/section/action/change_language.c",
    "chars": 3201,
    "preview": "#include <3ds.h>\n#include <string.h>\n#include <stdlib.h>\n\n#include \"action.h\"\n#include \"../task/task.h\"\n#include \"../../"
  },
  {
    "path": "source/ui/section/action/change_locale_dir.c",
    "chars": 1305,
    "preview": "#include <malloc.h>\n#include <string.h>\n#include <3ds.h>\n#include \"action.h\"\n#include \"../../prompt.h\"\n#include \"../../e"
  },
  {
    "path": "source/ui/section/action/change_region.c",
    "chars": 3269,
    "preview": "#include <3ds.h>\n#include <string.h>\n#include <stdlib.h>\n\n#include \"action.h\"\n#include \"../task/task.h\"\n#include \"../../"
  },
  {
    "path": "source/ui/section/action/pick_locale_dir.c",
    "chars": 324,
    "preview": "#include <3ds.h>\n#include \"action.h\"\n#include \"../../prompt.h\"\n#include \"../../../core/screen.h\"\n\n// TODO Add a file cho"
  },
  {
    "path": "source/ui/section/action/use_system_default.c",
    "chars": 1507,
    "preview": "#include <3ds.h>\n#include <stdlib.h>\n#include <string.h>\n\n#include \"action.h\"\n#include \"../task/task.h\"\n#include \"../../"
  },
  {
    "path": "source/ui/section/config.c",
    "chars": 3897,
    "preview": "#include <malloc.h>\n#include <stdio.h>\n#include <string.h>\n#include <sys/syslimits.h>\n\n#include <3ds.h>\n\n#include \"actio"
  },
  {
    "path": "source/ui/section/nuke.c",
    "chars": 2790,
    "preview": "#include <malloc.h>\n#include <stdio.h>\n#include <string.h>\n\n#include <3ds.h>\n#include <3ds/services/fs.h>\n\n#include \"act"
  },
  {
    "path": "source/ui/section/section.h",
    "chars": 124,
    "preview": "#pragma once\n\n#include \"../ui.h\"\n\nvoid files_open(FS_Archive archive);\nvoid titles_open();\nvoid config_open();\nvoid nuke"
  },
  {
    "path": "source/ui/section/task/listtitles.c",
    "chars": 17371,
    "preview": "#include <sys/syslimits.h>\n#include <malloc.h>\n#include <stdio.h>\n#include <stdlib.h>\n#include <string.h>\n#include <ctyp"
  },
  {
    "path": "source/ui/section/task/task.c",
    "chars": 158,
    "preview": "#include <3ds.h>\n\n#include \"task.h\"\n\nstatic bool task_quit;\n\nbool task_is_quit_all() {\n    return task_quit;\n}\n\nvoid tas"
  },
  {
    "path": "source/ui/section/task/task.h",
    "chars": 3563,
    "preview": "#pragma once\n\n#include <sys/syslimits.h>\n#include <stdio.h>\n\n#include \"../../list.h\"\n#include \"../../../locale.h\"\n\ntyped"
  },
  {
    "path": "source/ui/section/titles.c",
    "chars": 5893,
    "preview": "#include <malloc.h>\n#include <stdio.h>\n\n#include <3ds.h>\n\n#include \"action/action.h\"\n#include \"section.h\"\n#include \"../e"
  },
  {
    "path": "source/ui/ui.c",
    "chars": 17681,
    "preview": "#include <stdio.h>\n#include <string.h>\n#include <time.h>\n\n#include <3ds.h>\n\n#include \"section/task/task.h\"\n#include \"ui."
  },
  {
    "path": "source/ui/ui.h",
    "chars": 1026,
    "preview": "#pragma once\n\n#include <stdbool.h>\n\ntypedef struct ui_view_s {\n    const char* name;\n    const char* info;\n    void* dat"
  }
]

// ... and 3 more files (download for full content)

About this extraction

This page contains the full source code of the Possum/LumaLocaleSwitcher GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 44 files (394.5 KB), approximately 122.5k tokens, and a symbol index with 356 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!