Full Code of gpakosz/PackedArray for AI

master a4cffc21a76f cached
39 files
862.6 KB
390.9k tokens
44 symbols
1 requests
Download .txt
Showing preview only (895K chars total). Download the full file or copy to clipboard to get everything.
Repository: gpakosz/PackedArray
Branch: master
Commit: a4cffc21a76f
Files: 39
Total size: 862.6 KB

Directory structure:
gitextract_wmqt4bm2/

├── .gitattributes
├── .gitignore
├── .travis.yml
├── LICENSE
├── PackedArray.c
├── PackedArray.h
├── PackedArraySIMD.c
├── README.md
├── _gnu-make/
│   └── Makefile
├── _ios-xcode/
│   ├── .gitignore
│   ├── PackedArray-Info.plist
│   └── PackedArray.xcodeproj/
│       ├── project.pbxproj
│       └── project.xcworkspace/
│           └── contents.xcworkspacedata
├── _mac-xcode/
│   ├── .gitignore
│   └── PackedArray.xcodeproj/
│       ├── project.pbxproj
│       └── project.xcworkspace/
│           └── contents.xcworkspacedata
├── _win-vs11/
│   ├── .gitignore
│   ├── Common.props
│   ├── Debug.props
│   ├── PackedArray.sln
│   ├── PackedArraySIMDSelfBench.vcxproj
│   ├── PackedArraySIMDSelfTest.vcxproj
│   ├── PackedArraySelfBench.vcxproj
│   ├── PackedArraySelfTest.vcxproj
│   ├── Release.props
│   ├── x64.props
│   └── x86.props
└── benchmark/
    ├── PackedArraySIMDSelfBench-unrolled-galaxy-note-cortex-a9-1.4GHz.txt
    ├── PackedArraySIMDSelfBench-unrolled-ipad2-cortex-a9-1GHz.txt
    ├── PackedArraySIMDSelfBench-unrolled-iphone5-a6-1.3GHz.txt
    ├── PackedArraySIMDSelfBench-unrolled-mbp-corei7-M620-2.67GHz.txt
    ├── PackedArraySelfBench-reference-galaxy-note-cortex-a9-1.4GHz.txt
    ├── PackedArraySelfBench-reference-ipad2-cortex-a9-1GHz.txt
    ├── PackedArraySelfBench-reference-iphone5-a6-1.3GHz.txt
    ├── PackedArraySelfBench-reference-mbp-corei7-M620-2.67GHz.txt
    ├── PackedArraySelfBench-unrolled-galaxy-note-cortex-a9-1.4GHz.txt
    ├── PackedArraySelfBench-unrolled-ipad2-cortex-a9-1GHz.txt
    ├── PackedArraySelfBench-unrolled-iphone5-a6-1.3GHz.txt
    └── PackedArraySelfBench-unrolled-mbp-corei7-M620-2.67GHz.txt

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitattributes
================================================
* text=auto

# sources
*.h text diff=cpp
*.c text diff=cpp
*.cpp text diff=cpp
*.rb text diff=ruby
*.html text diff=html
*.m text diff=objc

# shell scripts
*.sh eol=lf

# GNU Makefile
Makefile text eol=lf

# Autotools
*.am text eol=lf

# Android
*.mk text eol=lf

# Xcode files
*.pbxproj text eol=lf merge=union

# Visual Studio files
*.sln text eol=crlf merge=union
*.vcxproj text eol=crlf merge=union
*.vcxproj.filters text eol=crlf merge=union
*.props text eol=crlf


================================================
FILE: .gitignore
================================================
.DS_Store
*~
*.swp

/bin
PackedArray.pp.c
PackedArray.cut.c
PackedArraySIMD.pp.c
PackedArraySIMD.cut.c


================================================
FILE: .travis.yml
================================================
language: cpp
compiler:
  - clang
  - gcc
env:
  - TARGET=build
  - TARGET=test
  - TARGET=preprocess
  - TARGET=cut
  - TARGET=assembly
script: make -j -C ./_gnu-make $TARGET


================================================
FILE: LICENSE
================================================
        DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 
                    Version 2, December 2004 

 Copyright (C) 2004 Sam Hocevar <sam@hocevar.net> 

 Everyone is permitted to copy and distribute verbatim or modified 
 copies of this license document, and changing it is allowed as long 
 as the name is changed. 

            DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 
   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 

  0. You just DO WHAT THE FUCK YOU WANT TO.
  1. Bla bla bla
  2. Montesqieu et camembert, vive la France, zut alors!


================================================
FILE: PackedArray.c
================================================
// see README.md for usage instructions.
// (‑●‑●)> released under the WTFPL v2 license, by Gregory Pakosz (@gpakosz)

#ifndef PACKEDARRAY_SELF
#define PACKEDARRAY_SELF "PackedArray.c"
#endif

#ifdef PACKEDARRAY_IMPL

#ifndef PACKEDARRAY_JOIN
#define PACKEDARRAY_JOIN(lhs, rhs)    PACKEDARRAY_JOIN_(lhs, rhs)
#define PACKEDARRAY_JOIN_(lhs, rhs)   PACKEDARRAY_JOIN__(lhs, rhs)
#define PACKEDARRAY_JOIN__(lhs, rhs)  lhs##rhs
#endif // #ifndef PACKEDARRAY_JOIN

#ifndef PACKEDARRAY_IMPL_BITS_PER_ITEM
#error PACKEDARRAY_IMPL_BITS_PER_ITEM undefined
#endif // #ifndef PACKEDARRAY_IMPL_BITS_PER_ITEM

#if defined(PACKEDARRAY_IMPL_PACK_CASES) || defined(PACKEDARRAY_IMPL_UNPACK_CASES)

#ifndef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 0
#elif PACKEDARRAY_IMPL_CASE_I == 0
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 1
#elif PACKEDARRAY_IMPL_CASE_I == 1
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 2
#elif PACKEDARRAY_IMPL_CASE_I == 2
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 3
#elif PACKEDARRAY_IMPL_CASE_I == 3
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 4
#elif PACKEDARRAY_IMPL_CASE_I == 4
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 5
#elif PACKEDARRAY_IMPL_CASE_I == 5
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 6
#elif PACKEDARRAY_IMPL_CASE_I == 6
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 7
#elif PACKEDARRAY_IMPL_CASE_I == 7
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 8
#elif PACKEDARRAY_IMPL_CASE_I == 8
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 9
#elif PACKEDARRAY_IMPL_CASE_I == 9
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 10
#elif PACKEDARRAY_IMPL_CASE_I == 10
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 11
#elif PACKEDARRAY_IMPL_CASE_I == 11
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 12
#elif PACKEDARRAY_IMPL_CASE_I == 12
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 13
#elif PACKEDARRAY_IMPL_CASE_I == 13
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 14
#elif PACKEDARRAY_IMPL_CASE_I == 14
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 15
#elif PACKEDARRAY_IMPL_CASE_I == 15
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 16
#elif PACKEDARRAY_IMPL_CASE_I == 16
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 17
#elif PACKEDARRAY_IMPL_CASE_I == 17
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 18
#elif PACKEDARRAY_IMPL_CASE_I == 18
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 19
#elif PACKEDARRAY_IMPL_CASE_I == 19
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 20
#elif PACKEDARRAY_IMPL_CASE_I == 20
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 21
#elif PACKEDARRAY_IMPL_CASE_I == 21
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 22
#elif PACKEDARRAY_IMPL_CASE_I == 22
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 23
#elif PACKEDARRAY_IMPL_CASE_I == 23
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 24
#elif PACKEDARRAY_IMPL_CASE_I == 24
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 25
#elif PACKEDARRAY_IMPL_CASE_I == 25
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 26
#elif PACKEDARRAY_IMPL_CASE_I == 26
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 27
#elif PACKEDARRAY_IMPL_CASE_I == 27
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 28
#elif PACKEDARRAY_IMPL_CASE_I == 28
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 29
#elif PACKEDARRAY_IMPL_CASE_I == 29
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 30
#elif PACKEDARRAY_IMPL_CASE_I == 30
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 31
#elif PACKEDARRAY_IMPL_CASE_I == 31
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 32
#endif // #ifndef PACKEDARRAY_IMPL_CASE_I

#ifndef PACKEDARRAY_IMPL_BITS_AVAILABLE
#define PACKEDARRAY_IMPL_BITS_AVAILABLE (32 - ((PACKEDARRAY_IMPL_CASE_I * PACKEDARRAY_IMPL_BITS_PER_ITEM) % 32))
#endif
#ifndef PACKEDARRAY_IMPL_START_BIT
#define PACKEDARRAY_IMPL_START_BIT ((PACKEDARRAY_IMPL_CASE_I * PACKEDARRAY_IMPL_BITS_PER_ITEM) % 32)
#endif
#ifndef PACKEDARRAY_IMPL_MASK
#define PACKEDARRAY_IMPL_MASK (uint32_t)((1ULL << PACKEDARRAY_IMPL_BITS_PER_ITEM) - 1)
#endif

#if defined(PACKEDARRAY_IMPL_PACK_CASES)

#ifndef PACKEDARRAY_IMPL_PACK_CASE_BREAK
#define PACKEDARRAY_IMPL_PACK_CASE_BREAK
#endif

      case PACKEDARRAY_IMPL_CASE_I:
#if (PACKEDARRAY_IMPL_BITS_PER_ITEM <= PACKEDARRAY_IMPL_BITS_AVAILABLE)
        packed |= *in++ << PACKEDARRAY_IMPL_START_BIT;
#if (PACKEDARRAY_IMPL_BITS_PER_ITEM == PACKEDARRAY_IMPL_BITS_AVAILABLE)
        *out++ = packed;
        packed = 0;
#endif
#else
        packed |= *in << PACKEDARRAY_IMPL_START_BIT;
        *out++ = packed;
        packed = *in++ >> PACKEDARRAY_IMPL_BITS_AVAILABLE;
#endif
        PACKEDARRAY_IMPL_PACK_CASE_BREAK

#if PACKEDARRAY_IMPL_CASE_I < 31
#include PACKEDARRAY_SELF
#else
#undef PACKEDARRAY_IMPL_CASE_I
#undef PACKEDARRAY_IMPL_PACK_CASE_BREAK
#undef PACKEDARRAY_IMPL_PACK_CASES
#endif

#elif defined(PACKEDARRAY_IMPL_UNPACK_CASES) // #if defined(PACKEDARRAY_IMPL_PACK_CASES)

#ifndef PACKEDARRAY_IMPL_UNPACK_CASE_BREAK
#define PACKEDARRAY_IMPL_UNPACK_CASE_BREAK
#endif

      case PACKEDARRAY_IMPL_CASE_I:
#if (PACKEDARRAY_IMPL_BITS_PER_ITEM <= PACKEDARRAY_IMPL_BITS_AVAILABLE)
        *out++ = (packed >> PACKEDARRAY_IMPL_START_BIT) & PACKEDARRAY_IMPL_MASK;
        PACKEDARRAY_IMPL_UNPACK_CASE_BREAK
#if (PACKEDARRAY_IMPL_CASE_I < 31) && (PACKEDARRAY_IMPL_BITS_PER_ITEM == PACKEDARRAY_IMPL_BITS_AVAILABLE)
        packed = *++in;
#endif
#else
        {
          uint32_t low, high;
          low = packed >> PACKEDARRAY_IMPL_START_BIT;
          packed = *++in;
          high = packed << PACKEDARRAY_IMPL_BITS_AVAILABLE;

          *out++ = (low | high) & PACKEDARRAY_IMPL_MASK;
        }
        PACKEDARRAY_IMPL_UNPACK_CASE_BREAK
#endif

#if PACKEDARRAY_IMPL_CASE_I < 31
#include PACKEDARRAY_SELF
#else
#undef PACKEDARRAY_IMPL_CASE_I
#undef PACKEDARRAY_IMPL_UNPACK_CASE_BREAK
#undef PACKEDARRAY_IMPL_UNPACK_CASES
#endif

#endif // #elif defined(PACKEDARRAY_IMPL_UNPACK_CASES)

#else // #if defined(PACKEDARRAY_IMPL_PACK_CASES) || defined(PACKEDARRAY_IMPL_UNPACK_CASES)

void PACKEDARRAY_JOIN(__PackedArray_pack_, PACKEDARRAY_IMPL_BITS_PER_ITEM)(uint32_t* __restrict out, uint32_t offset, const uint32_t* __restrict in, uint32_t count)
{
  uint32_t startBit;
  uint32_t packed;
  const uint32_t* __restrict end;

  out += ((uint64_t)offset * (uint64_t)PACKEDARRAY_IMPL_BITS_PER_ITEM) / 32;
  startBit = ((uint64_t)offset * (uint64_t)PACKEDARRAY_IMPL_BITS_PER_ITEM) % 32;
  packed = *out & (uint32_t)((1ULL << startBit) - 1);

  offset = offset % 32;
  if (count >= 32 - offset)
  {
    int32_t n;

    n = (count + offset) / 32;
    count -= 32 * n - offset;
    switch (offset)
    {
      do
      {
#define PACKEDARRAY_IMPL_PACK_CASES
#include PACKEDARRAY_SELF
      } while (--n > 0);
    }

    if (count == 0)
      return;

    offset = 0;
    startBit = 0;
  }

  end = in + count;
  switch (offset)
  {
#define PACKEDARRAY_IMPL_PACK_CASES
#define PACKEDARRAY_IMPL_PACK_CASE_BREAK \
    if (in == end)\
      break;
#include PACKEDARRAY_SELF
  }
  PACKEDARRAY_ASSERT(in == end);
  if ((count * PACKEDARRAY_IMPL_BITS_PER_ITEM + startBit) % 32)
  {
    packed |= *out & ~((uint32_t)(1ULL << ((((uint64_t)count * (uint64_t)PACKEDARRAY_IMPL_BITS_PER_ITEM + startBit - 1) % 32) + 1)) - 1);
    *out = packed;
  }
}

void PACKEDARRAY_JOIN(__PackedArray_unpack_, PACKEDARRAY_IMPL_BITS_PER_ITEM)(const uint32_t* __restrict in, uint32_t offset, uint32_t* __restrict out, uint32_t count)
{
  uint32_t packed;
  const uint32_t* __restrict end;

  in += ((uint64_t)offset * (uint64_t)PACKEDARRAY_IMPL_BITS_PER_ITEM) / 32;
  packed = *in;

  offset = offset % 32;
  if (count >= 32 - offset)
  {
    int32_t n;

    n = (count + offset) / 32;
    count -= 32 * n - offset;
    switch (offset)
    {
      do
      {
        packed = *++in;
#define PACKEDARRAY_IMPL_UNPACK_CASES
#include PACKEDARRAY_SELF
      } while (--n > 0);
    }

    if (count == 0)
      return;

    packed = *++in;
    offset = 0;
  }

  end = out + count;
  switch (offset)
  {
#define PACKEDARRAY_IMPL_UNPACK_CASES
#define PACKEDARRAY_IMPL_UNPACK_CASE_BREAK \
    if (out == end)\
      break;
#include PACKEDARRAY_SELF
  }
  PACKEDARRAY_ASSERT(out == end);
}

#undef PACKEDARRAY_IMPL_BITS_PER_ITEM
#undef PACKEDARRAY_IMPL_BITS_AVAILABLE
#undef PACKEDARRAY_IMPL_START_BIT
#undef PACKEDARRAY_IMPL_START_MASK

#endif // #if defined(PACKEDARRAY_IMPL_PACK_CASES) || defined(PACKEDARRAY_IMPL_UNPACK_CASES)

#else

#include "PackedArray.h"

#if !defined(PACKEDARRAY_ASSERT)
#include <assert.h>
#define PACKEDARRAY_ASSERT(expression) assert(expression)
#endif

#define PACKEDARRAY_IMPL
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 1
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 2
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 3
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 4
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 5
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 6
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 7
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 8
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 9
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 10
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 11
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 12
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 13
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 14
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 15
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 16
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 17
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 18
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 19
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 20
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 21
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 22
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 23
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 24
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 25
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 26
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 27
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 28
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 29
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 30
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 31
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 32
#include PACKEDARRAY_SELF
#undef PACKEDARRAY_IMPL


#if !defined(PACKEDARRAY_MALLOC) || !defined(PACKEDARRAY_FREE)
#include <stdlib.h>
#endif

#if !defined(PACKEDARRAY_MALLOC)
#define PACKEDARRAY_MALLOC(size) malloc(size)
#endif

#if !defined(PACKEDARRAY_FREE)
#define PACKEDARRAY_FREE(p) free(p)
#endif

#include <stddef.h>

PackedArray* PackedArray_create(uint32_t bitsPerItem, uint32_t count)
{
  PackedArray* a;
  size_t bufferSize;

  PACKEDARRAY_ASSERT(bitsPerItem > 0);
  PACKEDARRAY_ASSERT(bitsPerItem <= 32);

  bufferSize = sizeof(uint32_t) * (((uint64_t)bitsPerItem * (uint64_t)count + 31) / 32);
  a = (PackedArray*)PACKEDARRAY_MALLOC(sizeof(PackedArray) + bufferSize);

  if (a != NULL)
  {
    a->buffer[((uint64_t)bitsPerItem * (uint64_t)count + 31) / 32 - 1] = 0;
    a->bitsPerItem = bitsPerItem;
    a->count = count;
  }

  return a;
}

void PackedArray_destroy(PackedArray* a)
{
  PACKEDARRAY_ASSERT(a);
  PACKEDARRAY_FREE(a);
}

void PackedArray_pack(PackedArray* a, const uint32_t offset, const uint32_t* in, uint32_t count)
{
  PACKEDARRAY_ASSERT(a != NULL);
  PACKEDARRAY_ASSERT(in != NULL);

  switch (a->bitsPerItem)
  {
    case 1:   __PackedArray_pack_1(a->buffer, offset, in, count); break;
    case 2:   __PackedArray_pack_2(a->buffer, offset, in, count); break;
    case 3:   __PackedArray_pack_3(a->buffer, offset, in, count); break;
    case 4:   __PackedArray_pack_4(a->buffer, offset, in, count); break;
    case 5:   __PackedArray_pack_5(a->buffer, offset, in, count); break;
    case 6:   __PackedArray_pack_6(a->buffer, offset, in, count); break;
    case 7:   __PackedArray_pack_7(a->buffer, offset, in, count); break;
    case 8:   __PackedArray_pack_8(a->buffer, offset, in, count); break;
    case 9:   __PackedArray_pack_9(a->buffer, offset, in, count); break;
    case 10:  __PackedArray_pack_10(a->buffer, offset, in, count); break;
    case 11:  __PackedArray_pack_11(a->buffer, offset, in, count); break;
    case 12:  __PackedArray_pack_12(a->buffer, offset, in, count); break;
    case 13:  __PackedArray_pack_13(a->buffer, offset, in, count); break;
    case 14:  __PackedArray_pack_14(a->buffer, offset, in, count); break;
    case 15:  __PackedArray_pack_15(a->buffer, offset, in, count); break;
    case 16:  __PackedArray_pack_16(a->buffer, offset, in, count); break;
    case 17:  __PackedArray_pack_17(a->buffer, offset, in, count); break;
    case 18:  __PackedArray_pack_18(a->buffer, offset, in, count); break;
    case 19:  __PackedArray_pack_19(a->buffer, offset, in, count); break;
    case 20:  __PackedArray_pack_20(a->buffer, offset, in, count); break;
    case 21:  __PackedArray_pack_21(a->buffer, offset, in, count); break;
    case 22:  __PackedArray_pack_22(a->buffer, offset, in, count); break;
    case 23:  __PackedArray_pack_23(a->buffer, offset, in, count); break;
    case 24:  __PackedArray_pack_24(a->buffer, offset, in, count); break;
    case 25:  __PackedArray_pack_25(a->buffer, offset, in, count); break;
    case 26:  __PackedArray_pack_26(a->buffer, offset, in, count); break;
    case 27:  __PackedArray_pack_27(a->buffer, offset, in, count); break;
    case 28:  __PackedArray_pack_28(a->buffer, offset, in, count); break;
    case 29:  __PackedArray_pack_29(a->buffer, offset, in, count); break;
    case 30:  __PackedArray_pack_30(a->buffer, offset, in, count); break;
    case 31:  __PackedArray_pack_31(a->buffer, offset, in, count); break;
    case 32:  __PackedArray_pack_32(a->buffer, offset, in, count); break;
  }
}

void PackedArray_unpack(const PackedArray* a, const uint32_t offset, uint32_t* out, uint32_t count)
{
  PACKEDARRAY_ASSERT(a != NULL);
  PACKEDARRAY_ASSERT(out != NULL);

  switch (a->bitsPerItem)
  {
    case 1:   __PackedArray_unpack_1(a->buffer, offset, out, count); break;
    case 2:   __PackedArray_unpack_2(a->buffer, offset, out, count); break;
    case 3:   __PackedArray_unpack_3(a->buffer, offset, out, count); break;
    case 4:   __PackedArray_unpack_4(a->buffer, offset, out, count); break;
    case 5:   __PackedArray_unpack_5(a->buffer, offset, out, count); break;
    case 6:   __PackedArray_unpack_6(a->buffer, offset, out, count); break;
    case 7:   __PackedArray_unpack_7(a->buffer, offset, out, count); break;
    case 8:   __PackedArray_unpack_8(a->buffer, offset, out, count); break;
    case 9:   __PackedArray_unpack_9(a->buffer, offset, out, count); break;
    case 10:  __PackedArray_unpack_10(a->buffer, offset, out, count); break;
    case 11:  __PackedArray_unpack_11(a->buffer, offset, out, count); break;
    case 12:  __PackedArray_unpack_12(a->buffer, offset, out, count); break;
    case 13:  __PackedArray_unpack_13(a->buffer, offset, out, count); break;
    case 14:  __PackedArray_unpack_14(a->buffer, offset, out, count); break;
    case 15:  __PackedArray_unpack_15(a->buffer, offset, out, count); break;
    case 16:  __PackedArray_unpack_16(a->buffer, offset, out, count); break;
    case 17:  __PackedArray_unpack_17(a->buffer, offset, out, count); break;
    case 18:  __PackedArray_unpack_18(a->buffer, offset, out, count); break;
    case 19:  __PackedArray_unpack_19(a->buffer, offset, out, count); break;
    case 20:  __PackedArray_unpack_20(a->buffer, offset, out, count); break;
    case 21:  __PackedArray_unpack_21(a->buffer, offset, out, count); break;
    case 22:  __PackedArray_unpack_22(a->buffer, offset, out, count); break;
    case 23:  __PackedArray_unpack_23(a->buffer, offset, out, count); break;
    case 24:  __PackedArray_unpack_24(a->buffer, offset, out, count); break;
    case 25:  __PackedArray_unpack_25(a->buffer, offset, out, count); break;
    case 26:  __PackedArray_unpack_26(a->buffer, offset, out, count); break;
    case 27:  __PackedArray_unpack_27(a->buffer, offset, out, count); break;
    case 28:  __PackedArray_unpack_28(a->buffer, offset, out, count); break;
    case 29:  __PackedArray_unpack_29(a->buffer, offset, out, count); break;
    case 30:  __PackedArray_unpack_30(a->buffer, offset, out, count); break;
    case 31:  __PackedArray_unpack_31(a->buffer, offset, out, count); break;
    case 32:  __PackedArray_unpack_32(a->buffer, offset, out, count); break;
  }
}

void PackedArray_set(PackedArray* a, const uint32_t offset, const uint32_t in)
{
  uint32_t* __restrict out;
  uint32_t bitsPerItem;
  uint32_t startBit;
  uint32_t bitsAvailable;
  uint32_t mask;

  PACKEDARRAY_ASSERT(a != NULL);

  bitsPerItem = a->bitsPerItem;

  out = &a->buffer[((uint64_t)offset * (uint64_t)bitsPerItem) / 32];
  startBit = ((uint64_t)offset * (uint64_t)bitsPerItem) % 32;

  bitsAvailable = 32 - startBit;

  mask = (uint32_t)(1ULL << bitsPerItem) - 1;
  PACKEDARRAY_ASSERT(0 == (~mask & in));

  if (bitsPerItem <= bitsAvailable)
  {
    out[0] = (out[0] & ~(mask << startBit)) | (in << startBit);
  }
  else
  {
    // value spans 2 buffer cells
    uint32_t low, high;

    low = in << startBit;
    high = in >> bitsAvailable;

    out[0] = (out[0] & ~(mask << startBit)) | low;

    out[1] = (out[1] & ~(mask >> (32 - startBit))) | high;
  }
}

uint32_t PackedArray_get(const PackedArray* a, const uint32_t offset)
{
  const uint32_t* __restrict in;
  uint32_t bitsPerItem;
  uint32_t startBit;
  uint32_t bitsAvailable;
  uint32_t mask;
  uint32_t out;

  PACKEDARRAY_ASSERT(a != NULL);

  bitsPerItem = a->bitsPerItem;

  in = &a->buffer[((uint64_t)offset * (uint64_t)bitsPerItem) / 32];
  startBit = ((uint64_t)offset * (uint64_t)bitsPerItem) % 32;

  bitsAvailable = 32 - startBit;

  mask = (uint32_t)(1ULL << bitsPerItem) - 1;

  if (bitsPerItem <= bitsAvailable)
  {
    out = (in[0] >> startBit) & mask;
  }
  else
  {
    // out spans 2 buffer cells
    uint32_t low, high;

    low = in[0] >> startBit;
    high = in[1] << (32 - startBit);

    out = low ^ ((low ^ high) & (mask >> bitsAvailable << bitsAvailable));
  }

  return out;
}

uint32_t PackedArray_bufferSize(const PackedArray* a)
{
  PACKEDARRAY_ASSERT(a != NULL);
  return (uint32_t)(((uint64_t)a->bitsPerItem * (uint64_t)a->count + 31) / 32);
}

#if !(defined(_MSC_VER) && _MSC_VER >= 1400) && !defined(__GNUC__)
// log base 2 of an integer, aka the position of the highest bit set
static uint32_t __PackedArray_log2(uint32_t v)
{
  // references
  // http://aggregate.org/MAGIC
  // http://graphics.stanford.edu/~seander/bithacks.html

  static const uint32_t multiplyDeBruijnBitPosition[32] =
  {
    0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
    8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
  };

  v |= v >> 1;
  v |= v >> 2;
  v |= v >> 4;
  v |= v >> 8;
  v |= v >> 16;

  return multiplyDeBruijnBitPosition[(uint32_t)(v * 0x7C4ACDDU) >> 27];
}
#endif

// position of the highest bit set
static int __PackedArray_highestBitSet(uint32_t v)
{
#if defined(_MSC_VER) && _MSC_VER >= 1400
  unsigned long index;
  return _BitScanReverse(&index, v) ? index : -1;
#elif defined(__GNUC__)
  return v == 0 ? -1 : 31 - __builtin_clz(v);
#else
  return v != 0 ? __PackedArray_log2(v) : -1;
#endif
}

uint32_t PackedArray_computeBitsPerItem(const uint32_t* in, uint32_t count)
{
  uint32_t i, in_max, bitsPerItem;

  in_max = 0;
  for (i = 0; i < count; ++i)
    in_max = in[i] > in_max ? in[i] : in_max;

  bitsPerItem = __PackedArray_highestBitSet(in_max) + 1;
  return bitsPerItem == 0 ? 1 : bitsPerItem;
}


// - 8< ------------------------------------------------------------------------

#if defined(PACKEDARRAY_SELF_TEST) && defined(PACKEDARRAY_SELF_BENCH)
  #error choose either PACKEDARRAY_SELF_TEST or PACKEDARRAY_SELF_BENCH
#endif

#if defined(PACKEDARRAY_SELF_TEST)

#undef NDEBUG // we want asserts
#include <assert.h>

#include <stdio.h>
#include <string.h> // memcmp

static void PackedArray_pack_reference(PackedArray* a, const uint32_t offset, const uint32_t* in, uint32_t count)
{
  uint32_t* __restrict out;
  uint32_t bitsPerItem;
  uint32_t startBit;
  uint32_t bitsAvailable;
  uint32_t mask;
  uint32_t packed;

  assert(a != NULL);
  assert(in != NULL);
  assert(count != 0);

  bitsPerItem = a->bitsPerItem;

  out = &a->buffer[(uint64_t)offset * (uint64_t)bitsPerItem / 32];
  startBit = ((uint64_t)offset * (uint64_t)bitsPerItem) % 32;

  bitsAvailable = 32 - startBit;

  mask = (uint32_t)(1ULL << bitsPerItem) - 1;

  packed = *out;

  while (count--)
  {
    uint32_t value = *in++;

    assert(0 == (~mask & value));

    if (bitsPerItem <= bitsAvailable)
    {
      packed = (packed & ~(mask << startBit)) | (value << startBit);

      startBit += bitsPerItem;
      bitsAvailable -= bitsPerItem;
    }
    else if (bitsAvailable == 0)
    {
      *out++ = packed;
      packed = *out;

      startBit = 0;
      bitsAvailable = 32;

      packed = (packed & ~mask) | value;

      startBit += bitsPerItem;
      bitsAvailable -= bitsPerItem;
    }
    else
    {
      // value spans 2 buffer cells
      uint32_t low, high;

      low = value << startBit;
      high = value >> bitsAvailable;

      packed = (packed & ~(mask << startBit)) | low;
      *out++ = packed;

      packed = *out;
      packed = (packed & ~(mask >> (32 - startBit))) | high;

      startBit = (startBit + bitsPerItem) % 32;
      bitsAvailable = 32 - startBit;
    }
  }
  *out = packed;
}

static void PackedArray_unpack_reference(const PackedArray* a, const uint32_t offset, uint32_t* out, uint32_t count)
{
  const uint32_t* __restrict in;
  uint32_t bitsPerItem;
  uint32_t startBit;
  uint32_t bitsAvailable;
  uint32_t mask;
  uint32_t packed;

  assert(a != NULL);
  assert(out != NULL);
  assert(count != 0);

  bitsPerItem = a->bitsPerItem;

  in = &a->buffer[(uint64_t)offset * (uint64_t)bitsPerItem / 32];
  startBit = ((uint64_t)offset * (uint64_t)bitsPerItem) % 32;

  bitsAvailable = 32 - startBit;

  mask = (uint32_t)(1ULL << bitsPerItem) - 1;

  packed = *in;

  while (count--)
  {
    uint32_t value;

    if (bitsPerItem <= bitsAvailable)
    {
      value = (packed >> startBit) & mask;
      *out++ = value;

      startBit += bitsPerItem;
      bitsAvailable -= bitsPerItem;
    }
    else if (bitsAvailable == 0)
    {
      packed = *++in;
      value = packed & mask;
      *out++ = value;

      startBit = bitsPerItem;
      bitsAvailable = 32 - bitsPerItem;
    }
    else
    {
      // value spans 2 buffer cells
      uint32_t low, high;

      low = packed >> startBit;
      packed = *++in;
      high = packed << (32 - startBit);

      value = low ^ ((low ^ high) & (mask >> bitsAvailable << bitsAvailable));
      *out++ = value;

      startBit = (startBit + bitsPerItem) % 32;
      bitsAvailable = 32 - startBit;
    }
  }
}

int main(void)
{
  uint32_t bitsPerItem;

  printf("-- PackedArray self test -------------------------------------------------------\n");
  printf("\n");

  printf("sizeof(PackedArray) = %d\n", (int)sizeof(PackedArray));
  printf("\n");

  printf("1 by 1 packing / unpacking:\n");
  for (bitsPerItem = 1; bitsPerItem <= 32; ++bitsPerItem)
  {
    uint32_t mask = (uint32_t)(1ULL << bitsPerItem) - 1;
    int count;

    for (count = 1; count <= 1024; ++count)
    {
      PackedArray* a1 = PackedArray_create(bitsPerItem, count);
      PackedArray* a2 = PackedArray_create(bitsPerItem, count);
      int i;

      assert(a1->count == (uint32_t)count);
      assert(a2->count == (uint32_t)count);
      assert(a1->bitsPerItem == bitsPerItem);
      assert(a2->bitsPerItem == bitsPerItem);
      assert(PackedArray_bufferSize(a1) == PackedArray_bufferSize(a2));

      for (i = 0; i < (int)PackedArray_bufferSize(a1); ++i)
        a1->buffer[i] = a2->buffer[i] = rand();

      for (i = 0; i < count; ++i)
      {
        uint32_t v1, v2;

        v1 = rand() & mask;
        v2 = v1 + 1;
        PackedArray_pack(a1, i, &v1, 1);
        PackedArray_pack_reference(a2, i, &v1, 1);
        assert(memcmp(a1->buffer, a2->buffer, sizeof(a1->buffer[0]) * PackedArray_bufferSize(a1)) == 0);

        PackedArray_set(a1, i, v1);
        assert(memcmp(a1->buffer, a2->buffer, sizeof(a1->buffer[0]) * PackedArray_bufferSize(a1)) == 0);

        PackedArray_unpack(a1, i, &v2, 1);
        assert(v1 == v2);
        PackedArray_unpack_reference(a2, i, &v2, 1);
        assert(v1 == v2);
        v2 = PackedArray_get(a2, i);
        assert(v1 == v2);
      }

      for (i = count - 1; i >= 0; --i)
      {
        uint32_t v1, v2;

        v1 = rand() & mask;
        v2 = v1 + 1;
        PackedArray_pack(a1, i, &v1, 1);
        PackedArray_pack_reference(a2, i, &v1, 1);
        assert(memcmp(a1->buffer, a2->buffer, sizeof(a1->buffer[0]) * PackedArray_bufferSize(a1)) == 0);

        PackedArray_set(a1, i, v1);
        assert(memcmp(a1->buffer, a2->buffer, sizeof(a1->buffer[0]) * PackedArray_bufferSize(a1)) == 0);

        PackedArray_unpack(a1, i, &v2, 1);
        assert(v1 == v2);
        PackedArray_unpack_reference(a2, i, &v2, 1);
        assert(v1 == v2);
        v2 = PackedArray_get(a2, i);
        assert(v1 == v2);
      }

      PackedArray_destroy(a1);
      PackedArray_destroy(a2);
    }
    printf("  %2d bits per item -- success.\n", bitsPerItem);
  }

  printf("\n");
  printf("bulk packing / unpacking:\n");
  for (bitsPerItem = 1; bitsPerItem <= 32; ++bitsPerItem)
  {
    uint32_t mask = (uint32_t)(1ULL << bitsPerItem) - 1;
    int count;

    for (count = 1; count <= 128; ++count)
    {
      uint32_t* v1;
      uint32_t* v2;
      PackedArray* a1;
      PackedArray* a2;
      int i, j;

      v1 = (uint32_t*)malloc(sizeof(uint32_t) * count);
      assert(v1 != NULL);
      v2 = (uint32_t*)malloc(sizeof(uint32_t) * count);
      assert(v2 != NULL);

      a1 = PackedArray_create(bitsPerItem, count);
      assert(a1 != NULL);
      a2 = PackedArray_create(bitsPerItem, count);
      assert(a2 != NULL);
      
      for (i = 0; i < (int)PackedArray_bufferSize(a1); ++i)
        a1->buffer[i] = a2->buffer[i] = rand();

      for (i = 0; i < count; ++i)
        v1[i] = rand() & mask;

      assert(bitsPerItem >= PackedArray_computeBitsPerItem(v1, count));

      for (i = 0; i < count; ++i)
      {
        for (j = 1; j <= count - i; ++j)
        {
          PackedArray_pack(a1, i, v1, j);
          PackedArray_pack_reference(a2, i, v1, j);
          assert(memcmp(a1->buffer, a2->buffer, sizeof(a1->buffer[0]) * PackedArray_bufferSize(a1)) == 0);

          PackedArray_unpack(a1, i, v2, j);
          assert(bitsPerItem >= PackedArray_computeBitsPerItem(v2, j));
          assert(memcmp(v1, v2, j * sizeof(uint32_t)) == 0);
          PackedArray_unpack_reference(a2, i, v2, j);
          assert(memcmp(v1, v2, j * sizeof(uint32_t)) == 0);
        }
      }

      PackedArray_destroy(a1);
      PackedArray_destroy(a2);
      free(v1);
      free(v2);
    }
    printf("  %2d bits per item -- success.\n", bitsPerItem);
  }

  return 0;
}

#elif defined(PACKEDARRAY_SELF_BENCH)  // #if defined(PACKEDARRAY_SELF_TEST)

#ifndef NDEBUG
#error please define NDEBUG to inhibit asserts when compiling the benchmark
#endif

#include <stdio.h>
#include <string.h>
#include <float.h>

#ifndef MIN
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
#endif

#ifndef MAX
#define MAX(x, y) (((x) < (y)) ? (y) : (x))
#endif

#ifdef _MSC_VER
#pragma warning(push, 3)
#include <windows.h>
#pragma warning(pop)
static double getChronometerTime(void)
{
  LARGE_INTEGER frequency;
  LARGE_INTEGER t;

  QueryPerformanceFrequency(&frequency);
  QueryPerformanceCounter(&t);

  return (double)t.QuadPart / (double)frequency.QuadPart * 1000;
}

#else
#include <sys/time.h>

static double getChronometerTime()
{
  struct timeval now = { 0 };
  gettimeofday(&now, NULL);

  return (double)now.tv_sec + (double)now.tv_usec * 1e-6;
}
#endif

#define LOOP_COUNT 1000
static double bench_memcpy(uint32_t* in, uint32_t* out, uint32_t count)
{
  double start, end;
  uint32_t i;

  start = getChronometerTime();

  for (i = 0; i < LOOP_COUNT; ++i)
    memcpy(out, in, count * sizeof(uint32_t));

  end = getChronometerTime();

  return 1e6 * (end - start) / LOOP_COUNT;
}

static double bench_loopcpy(uint32_t* in, uint32_t* out, uint32_t count)
{
  double start, end;
  uint32_t i;

  start = getChronometerTime();

  for (i = 0; i < LOOP_COUNT; ++i)
  {
    uint32_t j;

    for (j = 0; j < count; ++j)
      out[j] = in[j];
  }

  end = getChronometerTime();

  return 1e6 * (end - start) / LOOP_COUNT;
}

static double bench_pack(uint32_t* in, PackedArray* out, uint32_t count)
{
  double start, end;
  int i;

  start = getChronometerTime();

  for (i = 0; i < LOOP_COUNT; ++i)
    PackedArray_pack(out, 0, in, count);

  end = getChronometerTime();

  return 1e6 * (end - start) / LOOP_COUNT;
}

static double bench_unpack(PackedArray* in, uint32_t* out, uint32_t count)
{
  double start, end;
  int i;

  start = getChronometerTime();

  for (i = 0; i < LOOP_COUNT; ++i)
    PackedArray_unpack(in, 0, out, count);

  end = getChronometerTime();

  return 1e6 * (end - start) / LOOP_COUNT;
}

#define MAX_ELEMENT_COUNT (1 << 18)
#define LOG2_MAX_ELEMENT_COUNT 18
int main(void)
{
  double start, end;
  uint32_t* b1;
  uint32_t* b2;
  uint32_t count, bitsPerItem;
  PackedArray** packed;
  uint32_t i;
  double* speed_memcpy;
  double avg_memcpy, min_memcpy, max_memcpy;
  double* speed_loopcpy;
  double avg_loopcpy, min_loopcpy, max_loopcpy;
  double* speed_pack[32];
  double avg_pack, min_pack, max_pack;
  double* speed_unpack[32];
  double avg_unpack, min_unpack, max_unpack;

  printf("-- PackedArray self bench ------------------------------------------------------\n");

  start = getChronometerTime();

  b1 = (uint32_t*)malloc(sizeof(uint32_t) * MAX_ELEMENT_COUNT);
  assert(b1 != NULL);
  b2 = (uint32_t*)malloc(sizeof(uint32_t) * MAX_ELEMENT_COUNT);
  assert(b2 != NULL);

  packed = (PackedArray**)malloc(sizeof(PackedArray*) * 32);
  assert(packed != NULL);
  for (i = 0; i < 32; ++i)
    packed[i] = PackedArray_create(i + 1, MAX_ELEMENT_COUNT);

  for (i = 0; i < MAX_ELEMENT_COUNT; ++i)
    b1[i] = rand();

  speed_memcpy = (double*)malloc(sizeof(double) * (LOG2_MAX_ELEMENT_COUNT + 1));
  assert(speed_memcpy != NULL);
  avg_memcpy = 0;
  min_memcpy = DBL_MAX;
  max_memcpy = 0;

  printf("memcpy:\n");
  printf("bits\tsize (B)\ttime (µs)\tspeed (B/µs)\n");

  for (count = 1, i = 0; count <= MAX_ELEMENT_COUNT; count *= 2, ++i)
  {
    double elapsed = bench_memcpy(b1, b2, count);
    double speed = count * sizeof(uint32_t) / elapsed;
    printf("%4d\t%8d\t%9.3f\t%12.3f\n", 32, (uint32_t)(count * sizeof(uint32_t)), elapsed, speed);

    avg_memcpy += speed;
    min_memcpy = MIN(min_memcpy, speed);
    max_memcpy = MAX(max_memcpy, speed);

    speed_memcpy[i] = speed;
  }

  avg_memcpy /= i;

  printf("avg (B/µs)\tmin (B/µs)\tmax (B/µs)\n");
  printf("%10.3f\t%10.3f\t%10.3f\n", avg_memcpy, min_memcpy, max_memcpy);
  printf("\n");

  speed_loopcpy = (double*)malloc(sizeof(double) * (LOG2_MAX_ELEMENT_COUNT + 1));
  assert(speed_loopcpy != NULL);
  avg_loopcpy = 0;
  min_loopcpy = DBL_MAX;
  max_loopcpy = 0;

  printf("loopcpy:\n");
  printf("bits\tsize (B)\ttime (µs)\tspeed (B/µs)\n");

  for (count = 1, i = 0; count <= MAX_ELEMENT_COUNT; count *= 2, ++i)
  {
    double elapsed = bench_loopcpy(b1, b2, count);
    double speed = count * sizeof(uint32_t) / elapsed;
    printf("%4d\t%8d\t%9.3f\t%12.3f\n", 32, (uint32_t)(count * sizeof(uint32_t)), elapsed, speed);

    avg_loopcpy += speed;
    min_loopcpy = MIN(min_loopcpy, speed);
    max_loopcpy = MAX(max_loopcpy, speed);

    speed_loopcpy[i] = speed;
  }

  avg_loopcpy /= i;

  printf("avg (B/µs)\tmin (B/µs)\tmax (B/µs)\n");
  printf("%10.3f\t%10.3f\t%10.3f\n", avg_loopcpy, min_loopcpy, max_loopcpy);
  printf("\n");

  for (bitsPerItem = 1; bitsPerItem <= 32; ++bitsPerItem)
  {
    avg_pack = 0;
    min_pack = DBL_MAX;
    max_pack = 0;
    avg_unpack = 0;
    min_unpack = DBL_MAX;
    max_unpack = 0;

    printf("pack:\t        \t         \t            \t");
    printf("unpack:\t        \t         \t            \t");
    printf("\n");
    printf("bits\tsize (B)\ttime (µs)\tspeed (B/µs)");
    printf("\t");
    printf("bits\tsize (B)\ttime (µs)\tspeed (B/µs)");
    printf("\n");

    speed_pack[bitsPerItem - 1] = (double*)malloc(sizeof(double) * (LOG2_MAX_ELEMENT_COUNT + 1));
    assert(speed_pack[bitsPerItem - 1] != NULL);
    speed_unpack[bitsPerItem - 1] = (double*)malloc(sizeof(double) * (LOG2_MAX_ELEMENT_COUNT + 1));
    assert(speed_unpack[bitsPerItem - 1] != NULL);
    for (count = 1, i = 0; count <= MAX_ELEMENT_COUNT; count *= 2, ++i)
    {
      uint32_t mask = (uint32_t)(1ULL << bitsPerItem) - 1;
      uint32_t j;
      double elapsed, speed;

      for (j = 0; j < count; ++j)
        b2[j] = b1[j] & mask;

      elapsed = bench_pack(b2, packed[bitsPerItem - 1], count);
      speed = count * sizeof(uint32_t) / elapsed;
      printf("%4d\t%8d\t%9.3f\t%12.3f", bitsPerItem, (uint32_t)(count * sizeof(uint32_t)), elapsed, speed);

      avg_pack += speed;
      min_pack = MIN(min_pack, speed);
      max_pack = MAX(max_pack, speed);

      speed_pack[bitsPerItem - 1][i] = speed;

      printf("\t");

      elapsed = bench_unpack(packed[bitsPerItem - 1], b2, count);
      speed = count * sizeof(uint32_t) / elapsed;
      printf("%4d\t%8d\t%9.3f\t%12.3f", bitsPerItem, (uint32_t)(count * sizeof(uint32_t)), elapsed, speed);

      avg_unpack += speed;
      min_unpack = MIN(min_unpack, speed);
      max_unpack = MAX(max_unpack, speed);

      speed_unpack[bitsPerItem - 1][i] = speed;

      printf("\n");
    }
    assert(i == LOG2_MAX_ELEMENT_COUNT + 1);

    printf("avg (B/µs)\tmin (B/µs)\tmax (B/µs)");
    printf("\t\t");
    printf("avg (B/µs)\tmin (B/µs)\tmax (B/µs)");
    printf("\n");

    avg_pack /= i;
    printf("%10.3f\t%10.3f\t%10.3f", avg_pack, min_pack, max_pack);

    printf("\t\t");

    avg_unpack /= i;
    printf("%10.3f\t%10.3f\t%10.3f", avg_unpack, min_unpack, max_unpack);
    printf("\n");
    printf("\n");
  }

  printf("\n");

  printf("stats by bits per item\n");
  printf("pack:\t          \t          \t          \t");
  printf("unpack:\t          \t          \t          \t");
  printf("\n");
  printf("bits\tavg (B/µs)\tmin (B/µs)\tmax (B/µs)");
  printf("\t");
  printf("avg (B/µs)\tmin (B/µs)\tmax (B/µs)");
  printf("\n");
  for (bitsPerItem = 1; bitsPerItem <= 32; ++bitsPerItem)
  {
    avg_pack = 0;
    min_pack = DBL_MAX;
    max_pack = 0;
    avg_unpack = 0;
    min_unpack = DBL_MAX;
    max_unpack = 0;

    for (count = 1, i = 0; count <= MAX_ELEMENT_COUNT; count *= 2, ++i)
    {
      double speed;

      speed = speed_pack[bitsPerItem - 1][i];

      avg_pack += speed;
      min_pack = MIN(min_pack, speed);
      max_pack = MAX(max_pack, speed);

      speed = speed_unpack[bitsPerItem - 1][i];

      avg_unpack += speed;
      min_unpack = MIN(min_unpack, speed);
      max_unpack = MAX(max_unpack, speed);
    }
    assert(i == LOG2_MAX_ELEMENT_COUNT + 1);

    printf("%4d\t", bitsPerItem);

    avg_pack /= i;
    printf("%10.3f\t%10.3f\t%10.3f", avg_pack, min_pack, max_pack);
    printf("\t");

    avg_unpack /= i;
    printf("%10.3f\t%10.3f\t%10.3f", avg_unpack, min_unpack, max_unpack);
    printf("\n");
  }

  printf("\n");

  printf("stats by size\n");
  printf("pack:\t          \t          \t          \t");
  printf("unpack:\t          \t          \t          \t");
  printf("\n");
  printf("size(B)\tavg (B/µs)\tmin (B/µs)\tmax (B/µs)");
  printf("\t");
  printf("avg (B/µs)\tmin (B/µs)\tmax (B/µs)");
  printf("\n");
  for (count = 1, i = 0; count <= MAX_ELEMENT_COUNT; count *= 2, ++i)
  {
    avg_pack = 0;
    min_pack = DBL_MAX;
    max_pack = 0;
    avg_unpack = 0;
    min_unpack = DBL_MAX;
    max_unpack = 0;

    for (bitsPerItem = 1; bitsPerItem <= 32; ++bitsPerItem)
    {
      double speed;

      speed = speed_pack[bitsPerItem - 1][i];

      avg_pack += speed;
      min_pack = MIN(min_pack, speed);
      max_pack = MAX(max_pack, speed);

      speed = speed_unpack[bitsPerItem - 1][i];

      avg_unpack += speed;
      min_unpack = MIN(min_unpack, speed);
      max_unpack = MAX(max_unpack, speed);
    }

    printf("%7d\t", (uint32_t)sizeof(uint32_t) * count);

    avg_pack /= 32;
    printf("%10.3f\t%10.3f\t%10.3f", avg_pack, min_pack, max_pack);
    printf("\t");

    avg_unpack /= 32;
    printf("%10.3f\t%10.3f\t%10.3f", avg_unpack, min_unpack, max_unpack);
    printf("\n");
  }

  printf("\n");

  free(b1);
  free(b2);
  free(speed_memcpy);
  free(speed_loopcpy);

  for (i = 0; i < 32; ++i)
  {
    PackedArray_destroy(packed[i]);
    free(speed_pack[i]);
    free(speed_unpack[i]);
  }

  free(packed);

  end = getChronometerTime();
  printf("total time (s): %f\n", (end - start));
  printf("\n");

  return 0;
}

#endif // #elif defined(PACKEDARRAY_SELF_BENCH)

#endif // #ifdef PACKEDARRAY_IMPL


================================================
FILE: PackedArray.h
================================================
#ifndef PACKEDARRAY_H
#define PACKEDARRAY_H

#ifdef __cplusplus
extern "C" {
#endif

#include <stdint.h>

/*

PackedArray principle:
  . compact storage of <= 32 bits items
  . items are tightly packed into a buffer of uint32_t integers

PackedArray requirements:
  . you must know in advance how many bits are needed to hold a single item
  . you must know in advance how many items you want to store
  . when packing, behavior is undefined if items have more than bitsPerItem bits

PackedArray general in memory representation:
  |-------------------------------------------------- - - -
  |       b0       |       b1       |       b2       |
  |-------------------------------------------------- - - -
  | i0 | i1 | i2 | i3 | i4 | i5 | i6 | i7 | i8 | i9 |
  |-------------------------------------------------- - - -

  . items are tightly packed together
  . several items end up inside the same buffer cell, e.g. i0, i1, i2
  . some items span two buffer cells, e.g. i3, i6

*/

struct _PackedArray
{
  uint32_t bitsPerItem;
  uint32_t count;

  uint32_t padding[2];
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable: 4200)
#endif // #ifdef _MSC_VER
  uint32_t buffer[];
#ifdef _MSC_VER
#pragma warning(pop)
#endif // #ifdef _MSC_VER
};
typedef struct _PackedArray PackedArray;

// creation / destruction
PackedArray* PackedArray_create(uint32_t bitsPerItem, uint32_t count);
void PackedArray_destroy(PackedArray* a);

// packing / unpacking
// offset is expressed in number of elements
void PackedArray_pack(PackedArray* a, const uint32_t offset, const uint32_t* in, uint32_t count);
void PackedArray_unpack(const PackedArray* a, const uint32_t offset, uint32_t* out, uint32_t count);

// single item access
void PackedArray_set(PackedArray* a, const uint32_t offset, const uint32_t in);
uint32_t PackedArray_get(const PackedArray* a, const uint32_t offset);

// helpers
uint32_t PackedArray_bufferSize(const PackedArray* a);
uint32_t PackedArray_computeBitsPerItem(const uint32_t* in, uint32_t count);

#ifdef __cplusplus
}
#endif

#endif // #ifndef PACKEDARRAY_H


================================================
FILE: PackedArraySIMD.c
================================================
// see README.md for usage instructions.
// (‑●‑●)> released under the WTFPL v2 license, by Gregory Pakosz (@gpakosz)

#ifndef PACKEDARRAY_SELF
#define PACKEDARRAY_SELF "PackedArraySIMD.c"
#endif

#ifdef PACKEDARRAY_IMPL

#ifndef PACKEDARRAY_JOIN
#define PACKEDARRAY_JOIN(lhs, rhs)    PACKEDARRAY_JOIN_(lhs, rhs)
#define PACKEDARRAY_JOIN_(lhs, rhs)   PACKEDARRAY_JOIN__(lhs, rhs)
#define PACKEDARRAY_JOIN__(lhs, rhs)  lhs##rhs
#endif // #ifndef PACKEDARRAY_JOIN

#ifndef PACKEDARRAY_IMPL_BITS_PER_ITEM
#error PACKEDARRAY_IMPL_BITS_PER_ITEM undefined
#endif // #ifndef PACKEDARRAY_IMPL_BITS_PER_ITEM

#if defined(PACKEDARRAY_IMPL_PACK_CASES) || defined(PACKEDARRAY_IMPL_UNPACK_CASES)

#ifndef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 0
#elif PACKEDARRAY_IMPL_CASE_I == 0
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 1
#elif PACKEDARRAY_IMPL_CASE_I == 1
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 2
#elif PACKEDARRAY_IMPL_CASE_I == 2
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 3
#elif PACKEDARRAY_IMPL_CASE_I == 3
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 4
#elif PACKEDARRAY_IMPL_CASE_I == 4
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 5
#elif PACKEDARRAY_IMPL_CASE_I == 5
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 6
#elif PACKEDARRAY_IMPL_CASE_I == 6
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 7
#elif PACKEDARRAY_IMPL_CASE_I == 7
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 8
#elif PACKEDARRAY_IMPL_CASE_I == 8
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 9
#elif PACKEDARRAY_IMPL_CASE_I == 9
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 10
#elif PACKEDARRAY_IMPL_CASE_I == 10
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 11
#elif PACKEDARRAY_IMPL_CASE_I == 11
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 12
#elif PACKEDARRAY_IMPL_CASE_I == 12
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 13
#elif PACKEDARRAY_IMPL_CASE_I == 13
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 14
#elif PACKEDARRAY_IMPL_CASE_I == 14
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 15
#elif PACKEDARRAY_IMPL_CASE_I == 15
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 16
#elif PACKEDARRAY_IMPL_CASE_I == 16
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 17
#elif PACKEDARRAY_IMPL_CASE_I == 17
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 18
#elif PACKEDARRAY_IMPL_CASE_I == 18
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 19
#elif PACKEDARRAY_IMPL_CASE_I == 19
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 20
#elif PACKEDARRAY_IMPL_CASE_I == 20
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 21
#elif PACKEDARRAY_IMPL_CASE_I == 21
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 22
#elif PACKEDARRAY_IMPL_CASE_I == 22
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 23
#elif PACKEDARRAY_IMPL_CASE_I == 23
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 24
#elif PACKEDARRAY_IMPL_CASE_I == 24
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 25
#elif PACKEDARRAY_IMPL_CASE_I == 25
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 26
#elif PACKEDARRAY_IMPL_CASE_I == 26
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 27
#elif PACKEDARRAY_IMPL_CASE_I == 27
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 28
#elif PACKEDARRAY_IMPL_CASE_I == 28
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 29
#elif PACKEDARRAY_IMPL_CASE_I == 29
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 30
#elif PACKEDARRAY_IMPL_CASE_I == 30
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 31
#elif PACKEDARRAY_IMPL_CASE_I == 31
#undef PACKEDARRAY_IMPL_CASE_I
#define PACKEDARRAY_IMPL_CASE_I 32
#endif // #ifndef PACKEDARRAY_IMPL_CASE_I

#ifndef PACKEDARRAY_IMPL_BITS_AVAILABLE
#define PACKEDARRAY_IMPL_BITS_AVAILABLE (32 - ((PACKEDARRAY_IMPL_CASE_I * PACKEDARRAY_IMPL_BITS_PER_ITEM) % 32))
#endif
#ifndef PACKEDARRAY_IMPL_START_BIT
#define PACKEDARRAY_IMPL_START_BIT ((PACKEDARRAY_IMPL_CASE_I * PACKEDARRAY_IMPL_BITS_PER_ITEM) % 32)
#endif

#if defined(PACKEDARRAY_IMPL_PACK_CASES)

#ifndef PACKEDARRAY_IMPL_PACK_CASE_BREAK
#define PACKEDARRAY_IMPL_PACK_CASE_BREAK
#endif

      case PACKEDARRAY_IMPL_CASE_I:
#if (PACKEDARRAY_IMPL_BITS_PER_ITEM <= PACKEDARRAY_IMPL_BITS_AVAILABLE)
        in_4 = PackedArray_loadu_uint32x4(in);
        packed = PackedArray_vsli0_uint32x4(packed, in_4, PACKEDARRAY_IMPL_START_BIT);
        in += 4;
#if (PACKEDARRAY_IMPL_BITS_PER_ITEM == PACKEDARRAY_IMPL_BITS_AVAILABLE)
        PackedArray_store_uint32x4(out, packed);
        out += 4;
        packed = PackedArray_uint32x4_zero;
#endif
#else
        in_4 = PackedArray_loadu_uint32x4(in);
        packed = PackedArray_vsli0_uint32x4(packed, in_4, PACKEDARRAY_IMPL_START_BIT);
        PackedArray_store_uint32x4(out, packed);
        out += 4;
        packed = PackedArray_shr_uint32x4(in_4, PACKEDARRAY_IMPL_BITS_AVAILABLE);
        in += 4;
#endif
        PACKEDARRAY_IMPL_PACK_CASE_BREAK

#if PACKEDARRAY_IMPL_CASE_I < 31
#include PACKEDARRAY_SELF
#else
#undef PACKEDARRAY_IMPL_CASE_I
#undef PACKEDARRAY_IMPL_PACK_CASE_BREAK
#undef PACKEDARRAY_IMPL_PACK_CASES
#endif

#elif defined(PACKEDARRAY_IMPL_UNPACK_CASES) // #if defined(PACKEDARRAY_IMPL_PACK_CASES)

#ifndef PACKEDARRAY_IMPL_UNPACK_CASE_BREAK
#define PACKEDARRAY_IMPL_UNPACK_CASE_BREAK
#endif

      case PACKEDARRAY_IMPL_CASE_I:
#if (PACKEDARRAY_IMPL_BITS_PER_ITEM <= PACKEDARRAY_IMPL_BITS_AVAILABLE)
        out_4 = PackedArray_and_uint32x4(PackedArray_shr_uint32x4(packed, PACKEDARRAY_IMPL_START_BIT), PackedArray_set_uint32x4(PACKEDARRAY_IMPL_MASK));
        PackedArray_storeu_uint32x4(out, out_4);
        out += 4;
        PACKEDARRAY_IMPL_UNPACK_CASE_BREAK
#if (PACKEDARRAY_IMPL_CASE_I < 31) && (PACKEDARRAY_IMPL_BITS_PER_ITEM == PACKEDARRAY_IMPL_BITS_AVAILABLE)
        in += 4;
        packed = PackedArray_load_uint32x4(in);
#endif
#else
        out_4 = PackedArray_shr_uint32x4(packed, PACKEDARRAY_IMPL_START_BIT);
        in += 4;
        packed = PackedArray_load_uint32x4(in);
        out_4 = PackedArray_vsli0_uint32x4(out_4, packed, PACKEDARRAY_IMPL_BITS_AVAILABLE);
        out_4 = PackedArray_and_uint32x4(out_4, PackedArray_set_uint32x4(PACKEDARRAY_IMPL_MASK));
        PackedArray_storeu_uint32x4(out, out_4);
        out += 4;
        PACKEDARRAY_IMPL_UNPACK_CASE_BREAK
#endif

#if PACKEDARRAY_IMPL_CASE_I < 31
#include PACKEDARRAY_SELF
#else
#undef PACKEDARRAY_IMPL_CASE_I
#undef PACKEDARRAY_IMPL_UNPACK_CASE_BREAK
#undef PACKEDARRAY_IMPL_UNPACK_CASES
#endif

#endif // #elif defined(PACKEDARRAY_IMPL_UNPACK_CASES)

#else // #if defined(PACKEDARRAY_IMPL_PACK_CASES) || defined(PACKEDARRAY_IMPL_UNPACK_CASES)

#ifndef PACKEDARRAY_IMPL_MASK
#define PACKEDARRAY_IMPL_MASK (uint32_t)((1ULL << PACKEDARRAY_IMPL_BITS_PER_ITEM) - 1)
#endif

void PACKEDARRAY_JOIN(__PackedArray_pack_, PACKEDARRAY_IMPL_BITS_PER_ITEM)(uint32_t* __restrict buffer, uint32_t offset, const uint32_t* __restrict in, uint32_t count)
{
  uint32_t pre, post;
  uint32_t* __restrict out;
  const uint32_t* __restrict end;
  uint32_t startBit;
  PackedArray_uint32x4_t packed, in_4, mask;
  uint32_t offset_4;

  pre = (offset + 3) / 4 * 4 - offset;
  pre = pre > count ? count : pre;

  if (pre > 0)
  {
    __PackedArray_pack_scalar(buffer, PACKEDARRAY_IMPL_BITS_PER_ITEM, PACKEDARRAY_IMPL_MASK, offset, in, pre);
    offset += pre;
    in += pre;
    count -= pre;
  }

  post = count % 4;
  count -= post;

  if (count > 0)
  {
    out = &buffer[(offset / 4 * PACKEDARRAY_IMPL_BITS_PER_ITEM) / 32 * 4];
    startBit = (offset / 4 * PACKEDARRAY_IMPL_BITS_PER_ITEM) % 32;
    packed = PackedArray_load_uint32x4(out);
    mask = PackedArray_sub_uint32x4(PackedArray_shl_uint32x4(PackedArray_set_uint32x4(1), startBit), PackedArray_set_uint32x4(1));
    packed = PackedArray_and_uint32x4(packed, mask);

    offset_4 = offset % 128;
    offset += count;

    if (count >= 128 - offset_4)
    {
      int32_t n;

      n = (count + offset_4) / 128;
      count -= 128 * n - offset_4;
      switch (offset_4 / 4)
      {
        do
        {
#define PACKEDARRAY_IMPL_PACK_CASES
#include PACKEDARRAY_SELF
        } while (--n > 0);
      }

      if (count == 0)
        goto PACKEDARRAY_JOIN(PACKEDARRAY_JOIN(__PackedArray_pack_, PACKEDARRAY_IMPL_BITS_PER_ITEM), _post);

      offset_4 = 0;
      startBit = 0;
    }

    end = in + count;
    switch (offset_4 / 4)
    {
#define PACKEDARRAY_IMPL_PACK_CASES
#define PACKEDARRAY_IMPL_PACK_CASE_BREAK \
      if (in == end)\
        break;
#include PACKEDARRAY_SELF
    }
    PACKEDARRAY_ASSERT(in == end);
    if ((count / 4 * PACKEDARRAY_IMPL_BITS_PER_ITEM + startBit) % 32)
    {
      in_4 = PackedArray_loadu_uint32x4(out);
      mask = PackedArray_sub_uint32x4(PackedArray_shl_uint32x4(PackedArray_set_uint32x4(1), ((count / 4 * PACKEDARRAY_IMPL_BITS_PER_ITEM + startBit - 1) % 32) + 1), PackedArray_set_uint32x4(1));
      in_4 = PackedArray_andnot_uint32x4(in_4, mask);
      packed = PackedArray_or_uint32x4(packed, in_4);
      PackedArray_store_uint32x4(out, packed);
    }
  }

  PACKEDARRAY_JOIN(PACKEDARRAY_JOIN(__PackedArray_pack_, PACKEDARRAY_IMPL_BITS_PER_ITEM), _post):
  if (post > 0)
    __PackedArray_pack_scalar(buffer, PACKEDARRAY_IMPL_BITS_PER_ITEM, PACKEDARRAY_IMPL_MASK, offset, in, post);
}

void PACKEDARRAY_JOIN(__PackedArray_unpack_, PACKEDARRAY_IMPL_BITS_PER_ITEM)(const uint32_t* __restrict buffer, uint32_t offset, uint32_t* __restrict out, uint32_t count)
{
  uint32_t pre, post;
  const uint32_t* __restrict in;
  const uint32_t* __restrict end;
  PackedArray_uint32x4_t packed, out_4;
  uint32_t offset_4;

  pre = (offset + 3) / 4 * 4 - offset;
  pre = pre > count ? count : pre;

  if (pre > 0)
  {
    __PackedArray_unpack_scalar(buffer, PACKEDARRAY_IMPL_BITS_PER_ITEM, PACKEDARRAY_IMPL_MASK, offset, out, pre);
    offset += pre;
    out += pre;
    count -= pre;
  }

  post = count % 4;
  count -= post;

  if (count > 0)
  {
    in = &buffer[(offset / 4 * PACKEDARRAY_IMPL_BITS_PER_ITEM) / 32 * 4];
    packed = PackedArray_load_uint32x4(in);

    offset_4 = offset % 128;
    offset += count;

    if (count >= 128 - offset_4)
    {
      int32_t n;

      n = (count + offset_4) / 128;
      count -= 128 * n - offset_4;
      switch (offset_4 / 4)
      {
        do
        {
          in += 4;
          packed = PackedArray_load_uint32x4(in);
#define PACKEDARRAY_IMPL_UNPACK_CASES
#include PACKEDARRAY_SELF
        } while (--n > 0);
      }

      if (count == 0)
        goto PACKEDARRAY_JOIN(PACKEDARRAY_JOIN(__PackedArray_unpack_, PACKEDARRAY_IMPL_BITS_PER_ITEM), _post);

      in += 4;
      packed = PackedArray_loadu_uint32x4(in);
      offset_4 = 0;
    }

    end = out + count;
    switch (offset_4 / 4)
    {
#define PACKEDARRAY_IMPL_UNPACK_CASES
#define PACKEDARRAY_IMPL_UNPACK_CASE_BREAK \
      if (out == end)\
        break;
#include PACKEDARRAY_SELF
    }
    PACKEDARRAY_ASSERT(out == end);
  }

  PACKEDARRAY_JOIN(PACKEDARRAY_JOIN(__PackedArray_unpack_, PACKEDARRAY_IMPL_BITS_PER_ITEM), _post):
  if (post > 0)
    __PackedArray_unpack_scalar(buffer, PACKEDARRAY_IMPL_BITS_PER_ITEM, PACKEDARRAY_IMPL_MASK, offset, out, post);
}

#undef PACKEDARRAY_IMPL_BITS_PER_ITEM
#undef PACKEDARRAY_IMPL_BITS_AVAILABLE
#undef PACKEDARRAY_IMPL_START_BIT
#undef PACKEDARRAY_IMPL_START_MASK

#endif // #if defined(PACKEDARRAY_IMPL_PACK_CASES) || defined(PACKEDARRAY_IMPL_UNPACK_CASES)

#else

#include "PackedArray.h"

#if !defined(PACKEDARRAY_ASSERT)
#include <assert.h>
#define PACKEDARRAY_ASSERT(expression) assert(expression)
#endif

#include <stddef.h>

static void __PackedArray_pack_scalar(uint32_t* buffer, const uint32_t bitsPerItem, const uint32_t mask, uint32_t offset, const uint32_t* in, uint32_t count)
{
  uint32_t* __restrict out;
  uint32_t startBit;
  uint32_t bitsAvailable;

  PACKEDARRAY_ASSERT(buffer != NULL);
  PACKEDARRAY_ASSERT(in != NULL);
  PACKEDARRAY_ASSERT(count != 0);

  while (count--)
  {
    uint32_t value = *in++;

    PACKEDARRAY_ASSERT(0 == (~mask & value));

    out = &buffer[((uint64_t)offset / 4 * (uint64_t)bitsPerItem) / 32 * 4 + offset % 4];
    startBit = ((uint64_t)offset / 4 * (uint64_t)bitsPerItem) % 32;
    ++offset;

    bitsAvailable = 32 - startBit;

    if (bitsPerItem <= bitsAvailable)
    {
      out[0] = (out[0] & ~(mask << startBit)) | (value << startBit);
    }
    else
    {
      // value spans 2 buffer cells
      uint32_t low, high;

      low = value << startBit;
      high = value >> bitsAvailable;

      out[0] = (out[0] & ~(mask << startBit)) | low;

      out[4] = (out[4] & ~(mask >> bitsAvailable)) | high;
    }
  }
}

static void __PackedArray_unpack_scalar(const uint32_t* buffer, const uint32_t bitsPerItem, const uint32_t mask, uint32_t offset, uint32_t* out, uint32_t count)
{
  const uint32_t* __restrict in;
  uint32_t startBit;
  uint32_t bitsAvailable;

  PACKEDARRAY_ASSERT(buffer != NULL);
  PACKEDARRAY_ASSERT(out != NULL);
  PACKEDARRAY_ASSERT(count != 0);

  while (count--)
  {
    uint32_t value;

    in = &buffer[((uint64_t)offset / 4 * (uint64_t)bitsPerItem) / 32 * 4 + offset % 4];
    startBit = ((uint64_t)offset / 4 * (uint64_t)bitsPerItem) % 32;
    ++offset;

    bitsAvailable = 32 - startBit;

    if (bitsPerItem <= bitsAvailable)
    {
      value = (in[0] >> startBit) & mask;
    }
    else
    {
      // value spans 2 buffer cells
      uint32_t low, high;

      low = in[0] >> startBit;
      high = in[4] << bitsAvailable;

      value = (low | high) & mask;
    }
    *out++ = value;
  }
}

#if defined(__SSE2__) || defined(_M_IX86) || defined(_M_X64)

#include <emmintrin.h>

#define PackedArray_uint32x4_t                      __m128i
#define PackedArray_uint32x4_zero                   _mm_setzero_si128()
#define PackedArray_set_uint32x4(i)                 _mm_set1_epi32(i)
#define PackedArray_sub_uint32x4(lhs, rhs)          _mm_sub_epi32(lhs, rhs)
#define PackedArray_loadu_uint32x4(ptr)             _mm_loadu_si128((const __m128i*)ptr)
#define PackedArray_storeu_uint32x4(ptr, v)         _mm_storeu_si128((__m128i*)ptr, v)
#define PackedArray_load_uint32x4(ptr)              _mm_load_si128((const __m128i*)ptr)
#define PackedArray_store_uint32x4(ptr, v)          _mm_store_si128((__m128i*)ptr, v)
#define PackedArray_shl_uint32x4(v, shift)          _mm_slli_epi32(v, shift)
#define PackedArray_shr_uint32x4(v, shift)          _mm_srli_epi32(v, shift)
#define PackedArray_or_uint32x4(lhs, rhs)           _mm_or_si128(lhs, rhs)
#define PackedArray_and_uint32x4(lhs, rhs)          _mm_and_si128(lhs, rhs)
#define PackedArray_andnot_uint32x4(lhs, rhs)       _mm_andnot_si128(rhs, lhs)
// assumes dst bits are cleared at inserted bit positions
#define PackedArray_vsli0_uint32x4(dst, src, shift) PackedArray_or_uint32x4(dst, PackedArray_shl_uint32x4(src, shift))

#elif defined(__ARM_NEON__) || defined(_M_ARM)

#include <arm_neon.h>

#define PackedArray_uint32x4_t                      uint32x4_t
#define PackedArray_uint32x4_zero                   vdupq_n_u32(0)
#define PackedArray_set_uint32x4(i)                 vdupq_n_u32(i)
#define PackedArray_sub_uint32x4(lhs, rhs)          vsubq_u32(lhs, rhs)
#define PackedArray_loadu_uint32x4(ptr)             vld1q_u32((const uint32_t*)ptr)
#define PackedArray_storeu_uint32x4(ptr, v)         vst1q_u32(ptr, v)
#if defined(__GNUC__)
// because  __builtin_assume_aligned isn't always available...

typedef uint32_t __attribute__((aligned(16)))       PackedArray_aligned_uint32_t;
#define PackedArray_load_uint32x4(ptr)              vld1q_u32((const PackedArray_aligned_uint32_t*)ptr)
#define PackedArray_store_uint32x4(ptr, v)          vst1q_u32((PackedArray_aligned_uint32_t*)ptr, v)
#else
#define PackedArray_load_uint32x4(ptr)              vld1q_u32((const uint32_t*)ptr)
#define PackedArray_store_uint32x4(ptr, v)          vst1q_u32(ptr, v)
#endif
#define PackedArray_shl_uint32x4(v, shift)          vshlq_u32(v, vdupq_n_s32(shift))
#define PackedArray_shr_uint32x4(v, shift)          vshlq_u32(v, vdupq_n_s32(-shift))
#define PackedArray_or_uint32x4(lhs, rhs)           vorrq_u32(lhs, rhs)
#define PackedArray_and_uint32x4(lhs, rhs)          vandq_u32(lhs, rhs)
#define PackedArray_andnot_uint32x4(lhs, rhs)       vbicq_u32(lhs, rhs)
// assumes dst bits are cleared at inserted bit positions
#define PackedArray_vsli0_uint32x4(dst, src, shift) vsliq_n_u32(dst, src, shift)

#else

#error unsupported SIMD platform

#endif

#define PACKEDARRAY_IMPL
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 1
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 2
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 3
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 4
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 5
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 6
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 7
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 8
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 9
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 10
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 11
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 12
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 13
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 14
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 15
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 16
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 17
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 18
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 19
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 20
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 21
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 22
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 23
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 24
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 25
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 26
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 27
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 28
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 29
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 30
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 31
#include PACKEDARRAY_SELF
#define PACKEDARRAY_IMPL_BITS_PER_ITEM 32
#include PACKEDARRAY_SELF
#undef PACKEDARRAY_IMPL


#if !defined(PACKEDARRAY_ALIGNED_MALLOC) || !defined(PACKEDARRAY_FREE)
#include <stdlib.h>
#endif

#if !defined(PACKEDARRAY_ALIGNED_MALLOC)
#if defined (_MSC_VER)
#define PACKEDARRAY_ALIGNED_MALLOC(alignment, size) _aligned_malloc(size, alignment)
#elif defined (ANDROID) || defined (__ANDROID__)
#define PACKEDARRAY_ALIGNED_MALLOC(alignment, size) memalign(alignment, size)
#else
static void* __PackedArray_aligned_malloc(size_t alignment, size_t size)
{
  void* p = NULL;
  posix_memalign(&p, alignment, size);
  return p;
}
#define PACKEDARRAY_ALIGNED_MALLOC(alignment, size) __PackedArray_aligned_malloc(alignment, size)
#endif
#endif

#if !defined(PACKEDARRAY_FREE)
#if defined (_MSC_VER)
#define PACKEDARRAY_FREE(p) _aligned_free(p)
#else
#define PACKEDARRAY_FREE(p) free(p)
#endif
#endif

PackedArray* PackedArray_create(uint32_t bitsPerItem, uint32_t count)
{
  PackedArray* a;
  size_t bufferSize;

  PACKEDARRAY_ASSERT(bitsPerItem > 0);
  PACKEDARRAY_ASSERT(bitsPerItem <= 32);

  bufferSize = sizeof(uint32_t) * (((uint64_t)count / 4 * (uint64_t)bitsPerItem + 31) / 32 * 4);
  bufferSize += count < 4 ? sizeof(uint32_t) * count : sizeof(uint32_t) * 4;
  a = (PackedArray*)PACKEDARRAY_ALIGNED_MALLOC(16, sizeof(PackedArray) + bufferSize);
  PACKEDARRAY_ASSERT((uint64_t)a->buffer % 16 == 0);

  if (a != NULL)
  {
    a->buffer[bufferSize / sizeof(uint32_t) - 1] = 0;
    a->bitsPerItem = bitsPerItem;
    a->count = count;
  }

  return a;
}

void PackedArray_destroy(PackedArray* a)
{
  PACKEDARRAY_ASSERT(a);
  PACKEDARRAY_FREE(a);
}

void PackedArray_pack(PackedArray* a, const uint32_t offset, const uint32_t* in, uint32_t count)
{
  PACKEDARRAY_ASSERT(a != NULL);
  PACKEDARRAY_ASSERT(in != NULL);

  switch (a->bitsPerItem)
  {
    case 1:   __PackedArray_pack_1(a->buffer, offset, in, count); break;
    case 2:   __PackedArray_pack_2(a->buffer, offset, in, count); break;
    case 3:   __PackedArray_pack_3(a->buffer, offset, in, count); break;
    case 4:   __PackedArray_pack_4(a->buffer, offset, in, count); break;
    case 5:   __PackedArray_pack_5(a->buffer, offset, in, count); break;
    case 6:   __PackedArray_pack_6(a->buffer, offset, in, count); break;
    case 7:   __PackedArray_pack_7(a->buffer, offset, in, count); break;
    case 8:   __PackedArray_pack_8(a->buffer, offset, in, count); break;
    case 9:   __PackedArray_pack_9(a->buffer, offset, in, count); break;
    case 10:  __PackedArray_pack_10(a->buffer, offset, in, count); break;
    case 11:  __PackedArray_pack_11(a->buffer, offset, in, count); break;
    case 12:  __PackedArray_pack_12(a->buffer, offset, in, count); break;
    case 13:  __PackedArray_pack_13(a->buffer, offset, in, count); break;
    case 14:  __PackedArray_pack_14(a->buffer, offset, in, count); break;
    case 15:  __PackedArray_pack_15(a->buffer, offset, in, count); break;
    case 16:  __PackedArray_pack_16(a->buffer, offset, in, count); break;
    case 17:  __PackedArray_pack_17(a->buffer, offset, in, count); break;
    case 18:  __PackedArray_pack_18(a->buffer, offset, in, count); break;
    case 19:  __PackedArray_pack_19(a->buffer, offset, in, count); break;
    case 20:  __PackedArray_pack_20(a->buffer, offset, in, count); break;
    case 21:  __PackedArray_pack_21(a->buffer, offset, in, count); break;
    case 22:  __PackedArray_pack_22(a->buffer, offset, in, count); break;
    case 23:  __PackedArray_pack_23(a->buffer, offset, in, count); break;
    case 24:  __PackedArray_pack_24(a->buffer, offset, in, count); break;
    case 25:  __PackedArray_pack_25(a->buffer, offset, in, count); break;
    case 26:  __PackedArray_pack_26(a->buffer, offset, in, count); break;
    case 27:  __PackedArray_pack_27(a->buffer, offset, in, count); break;
    case 28:  __PackedArray_pack_28(a->buffer, offset, in, count); break;
    case 29:  __PackedArray_pack_29(a->buffer, offset, in, count); break;
    case 30:  __PackedArray_pack_30(a->buffer, offset, in, count); break;
    case 31:  __PackedArray_pack_31(a->buffer, offset, in, count); break;
    case 32:  __PackedArray_pack_32(a->buffer, offset, in, count); break;
  }
}

void PackedArray_unpack(const PackedArray* a, const uint32_t offset, uint32_t* out, uint32_t count)
{
  PACKEDARRAY_ASSERT(a != NULL);
  PACKEDARRAY_ASSERT(out != NULL);

  switch (a->bitsPerItem)
  {
    case 1:   __PackedArray_unpack_1(a->buffer, offset, out, count); break;
    case 2:   __PackedArray_unpack_2(a->buffer, offset, out, count); break;
    case 3:   __PackedArray_unpack_3(a->buffer, offset, out, count); break;
    case 4:   __PackedArray_unpack_4(a->buffer, offset, out, count); break;
    case 5:   __PackedArray_unpack_5(a->buffer, offset, out, count); break;
    case 6:   __PackedArray_unpack_6(a->buffer, offset, out, count); break;
    case 7:   __PackedArray_unpack_7(a->buffer, offset, out, count); break;
    case 8:   __PackedArray_unpack_8(a->buffer, offset, out, count); break;
    case 9:   __PackedArray_unpack_9(a->buffer, offset, out, count); break;
    case 10:  __PackedArray_unpack_10(a->buffer, offset, out, count); break;
    case 11:  __PackedArray_unpack_11(a->buffer, offset, out, count); break;
    case 12:  __PackedArray_unpack_12(a->buffer, offset, out, count); break;
    case 13:  __PackedArray_unpack_13(a->buffer, offset, out, count); break;
    case 14:  __PackedArray_unpack_14(a->buffer, offset, out, count); break;
    case 15:  __PackedArray_unpack_15(a->buffer, offset, out, count); break;
    case 16:  __PackedArray_unpack_16(a->buffer, offset, out, count); break;
    case 17:  __PackedArray_unpack_17(a->buffer, offset, out, count); break;
    case 18:  __PackedArray_unpack_18(a->buffer, offset, out, count); break;
    case 19:  __PackedArray_unpack_19(a->buffer, offset, out, count); break;
    case 20:  __PackedArray_unpack_20(a->buffer, offset, out, count); break;
    case 21:  __PackedArray_unpack_21(a->buffer, offset, out, count); break;
    case 22:  __PackedArray_unpack_22(a->buffer, offset, out, count); break;
    case 23:  __PackedArray_unpack_23(a->buffer, offset, out, count); break;
    case 24:  __PackedArray_unpack_24(a->buffer, offset, out, count); break;
    case 25:  __PackedArray_unpack_25(a->buffer, offset, out, count); break;
    case 26:  __PackedArray_unpack_26(a->buffer, offset, out, count); break;
    case 27:  __PackedArray_unpack_27(a->buffer, offset, out, count); break;
    case 28:  __PackedArray_unpack_28(a->buffer, offset, out, count); break;
    case 29:  __PackedArray_unpack_29(a->buffer, offset, out, count); break;
    case 30:  __PackedArray_unpack_30(a->buffer, offset, out, count); break;
    case 31:  __PackedArray_unpack_31(a->buffer, offset, out, count); break;
    case 32:  __PackedArray_unpack_32(a->buffer, offset, out, count); break;
  }
}

void PackedArray_set(PackedArray* a, const uint32_t offset, const uint32_t in)
{
  uint32_t* __restrict out;
  uint32_t bitsPerItem;
  uint32_t startBit;
  uint32_t bitsAvailable;
  uint32_t mask;

  PACKEDARRAY_ASSERT(a != NULL);

  bitsPerItem = a->bitsPerItem;

  out = &a->buffer[4 * (((uint64_t)offset / 4 * (uint64_t)bitsPerItem) / 32) + (offset % 4)];
  startBit = ((uint64_t)offset / 4 * (uint64_t)bitsPerItem) % 32;

  bitsAvailable = 32 - startBit;

  mask = (uint32_t)(1ULL << bitsPerItem) - 1;
  PACKEDARRAY_ASSERT(0 == (~mask & in));

  if (bitsPerItem <= bitsAvailable)
  {
    out[0] = (out[0] & ~(mask << startBit)) | (in << startBit);
  }
  else
  {
    // value spans 2 buffer cells
    uint32_t low, high;

    low = in << startBit;
    high = in >> bitsAvailable;

    out[0] = (out[0] & ~(mask << startBit)) | low;

    out[4] = (out[4] & ~(mask >> (32 - startBit))) | high;
  }
}

uint32_t PackedArray_get(const PackedArray* a, const uint32_t offset)
{
  const uint32_t* __restrict in;
  uint32_t bitsPerItem;
  uint32_t startBit;
  uint32_t bitsAvailable;
  uint32_t mask;
  uint32_t out;

  PACKEDARRAY_ASSERT(a != NULL);

  bitsPerItem = a->bitsPerItem;

  in = &a->buffer[4 * (((uint64_t)offset / 4 * (uint64_t)bitsPerItem) / 32) + (offset % 4)];
  startBit = ((uint64_t)offset / 4 * (uint64_t)bitsPerItem) % 32;

  bitsAvailable = 32 - startBit;

  mask = (uint32_t)(1ULL << bitsPerItem) - 1;

  if (bitsPerItem <= bitsAvailable)
  {
    out = (in[0] >> startBit) & mask;
  }
  else
  {
    // value spans 2 buffer cells
    uint32_t low, high;

    low = in[0] >> startBit;
    high = in[4] << (32 - startBit);

    out = low ^ ((low ^ high) & (mask >> bitsAvailable << bitsAvailable));
  }

  return out;
}

uint32_t PackedArray_bufferSize(const PackedArray* a)
{
  size_t bufferSize;
  uint32_t bitsPerItem, count;

  PACKEDARRAY_ASSERT(a != NULL);

  bitsPerItem = a->bitsPerItem;
  count = a->count;

  bufferSize = ((uint64_t)count / 4 * (uint64_t)bitsPerItem + 31) / 32 * 4;
  bufferSize += count < 4 ? count : 4;

  return (uint32_t)bufferSize;
}

#if !(defined(_MSC_VER) && _MSC_VER >= 1400) && !defined(__GNUC__)
// log base 2 of an integer, aka the position of the highest bit set
static uint32_t __PackedArray_log2(uint32_t v)
{
  // references
  // http://aggregate.org/MAGIC
  // http://graphics.stanford.edu/~seander/bithacks.html

  static const uint32_t multiplyDeBruijnBitPosition[32] =
  {
    0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
    8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
  };

  v |= v >> 1;
  v |= v >> 2;
  v |= v >> 4;
  v |= v >> 8;
  v |= v >> 16;

  return multiplyDeBruijnBitPosition[(uint32_t)(v * 0x7C4ACDDU) >> 27];
}
#endif

// position of the highest bit set
static int __PackedArray_highestBitSet(uint32_t v)
{
#if defined(_MSC_VER) && _MSC_VER >= 1400
  unsigned long index;
  return _BitScanReverse(&index, v) ? index : -1;
#elif defined(__GNUC__)
  return v == 0 ? -1 : 31 - __builtin_clz(v);
#else
  return v != 0 ? __PackedArray_log2(v) : -1;
#endif
}

uint32_t PackedArray_computeBitsPerItem(const uint32_t* in, uint32_t count)
{
  uint32_t i, in_max, bitsPerItem;

  in_max = 0;
  for (i = 0; i < count; ++i)
    in_max = in[i] > in_max ? in[i] : in_max;

  bitsPerItem = __PackedArray_highestBitSet(in_max) + 1;
  return bitsPerItem == 0 ? 1 : bitsPerItem;
}


// - 8< ------------------------------------------------------------------------

#if defined(PACKEDARRAY_SELF_TEST) && defined(PACKEDARRAY_SELF_BENCH)
  #error choose either PACKEDARRAY_SELF_TEST or PACKEDARRAY_SELF_BENCH
#endif

#if defined(PACKEDARRAY_SELF_TEST)

#undef NDEBUG // we want asserts
#include <assert.h>

#include <stdio.h>
#include <string.h> // memcmp

static void PackedArray_pack_reference(PackedArray* a, uint32_t offset, const uint32_t* in, uint32_t count)
{
  uint32_t* __restrict out;
  uint32_t bitsPerItem;
  uint32_t startBit;
  uint32_t bitsAvailable;
  uint32_t mask;

  assert(a != NULL);
  assert(in != NULL);

  bitsPerItem = a->bitsPerItem;

  mask = (uint32_t)((1ULL << bitsPerItem) - 1);

  while (count--)
  {
    uint32_t value = *in++;

    assert(0 == (~mask & value));

    out = &a->buffer[((uint64_t)offset / 4 * (uint64_t)bitsPerItem) / 32 * 4 + offset % 4];
    startBit = ((uint64_t)offset / 4 * (uint64_t)bitsPerItem) % 32;
    ++offset;

    bitsAvailable = 32 - startBit;

    if (bitsPerItem <= bitsAvailable)
    {
      out[0] = (out[0] & ~(mask << startBit)) | (value << startBit);
    }
    else
    {
      // value spans 2 buffer cells
      uint32_t low, high;

      low = value << startBit;
      high = value >> bitsAvailable;

      out[0] = (out[0] & ~(mask << startBit)) | low;

      out[4] = (out[4] & ~(mask >> bitsAvailable)) | high;
    }
  }
}

static void PackedArray_unpack_reference(const PackedArray* a, uint32_t offset, uint32_t* out, uint32_t count)
{
  const uint32_t* __restrict in;
  uint32_t bitsPerItem;
  uint32_t startBit;
  uint32_t bitsAvailable;
  uint32_t mask;

  assert(a != NULL);
  assert(out != NULL);

  bitsPerItem = a->bitsPerItem;

  mask = (uint32_t)((1ULL << bitsPerItem) - 1);

  while (count--)
  {
    uint32_t value;

    in = &a->buffer[((uint64_t)offset / 4 * (uint64_t)bitsPerItem) / 32 * 4 + offset % 4];
    startBit = ((uint64_t)offset / 4 * (uint64_t)bitsPerItem) % 32;
    ++offset;

    bitsAvailable = 32 - startBit;

    if (bitsPerItem <= bitsAvailable)
    {
      value = (in[0] >> startBit) & mask;
    }
    else
    {
      // value spans 2 buffer cells
      uint32_t low, high;

      low = in[0] >> startBit;
      high = in[4] << bitsAvailable;

      value = low ^ ((low ^ high) & (mask >> bitsAvailable << bitsAvailable));
    }
    *out++ = value;
  }
}

int main(void)
{
  uint32_t bitsPerItem;

  printf("-- PackedArray self test -------------------------------------------------------\n");
  printf("\n");

  printf("sizeof(PackedArray) = %d\n", (int)sizeof(PackedArray));
  printf("\n");

  printf("1 by 1 packing / unpacking:\n");
  for (bitsPerItem = 1; bitsPerItem <= 32; ++bitsPerItem)
  {
    uint32_t mask = (uint32_t)(1ULL << bitsPerItem) - 1;
    int count;

    for (count = 1; count <= 1024; ++count)
    {
      PackedArray* a1 = PackedArray_create(bitsPerItem, count);
      PackedArray* a2 = PackedArray_create(bitsPerItem, count);
      int i;

      assert(a1->count == (uint32_t)count);
      assert(a2->count == (uint32_t)count);
      assert(a1->bitsPerItem == bitsPerItem);
      assert(a2->bitsPerItem == bitsPerItem);
      assert(PackedArray_bufferSize(a1) == PackedArray_bufferSize(a2));

      for (i = 0; i < (int)PackedArray_bufferSize(a1); ++i)
        a1->buffer[i] = a2->buffer[i] = rand();

      for (i = 0; i < count; ++i)
      {
        uint32_t v1, v2;

        v1 = rand() & mask;
        v2 = v1 + 1;
        PackedArray_pack(a1, i, &v1, 1);
        PackedArray_pack_reference(a2, i, &v1, 1);
        assert(memcmp(a1->buffer, a2->buffer, sizeof(a1->buffer[0]) * PackedArray_bufferSize(a1)) == 0);

        PackedArray_set(a1, i, v1);
        assert(memcmp(a1->buffer, a2->buffer, sizeof(a1->buffer[0]) * PackedArray_bufferSize(a1)) == 0);

        PackedArray_unpack(a1, i, &v2, 1);
        assert(v1 == v2);
        PackedArray_unpack_reference(a2, i, &v2, 1);
        assert(v1 == v2);
        v2 = PackedArray_get(a2, i);
        assert(v1 == v2);
      }

      for (i = count - 1; i >= 0; --i)
      {
        uint32_t v1, v2;

        v1 = rand() & mask;
        v2 = v1 + 1;
        PackedArray_pack(a1, i, &v1, 1);
        PackedArray_pack_reference(a2, i, &v1, 1);
        assert(memcmp(a1->buffer, a2->buffer, sizeof(a1->buffer[0]) * PackedArray_bufferSize(a1)) == 0);

        PackedArray_set(a1, i, v1);
        assert(memcmp(a1->buffer, a2->buffer, sizeof(a1->buffer[0]) * PackedArray_bufferSize(a1)) == 0);

        PackedArray_unpack(a1, i, &v2, 1);
        assert(v1 == v2);
        PackedArray_unpack_reference(a2, i, &v2, 1);
        assert(v1 == v2);
        v2 = PackedArray_get(a2, i);
        assert(v1 == v2);
      }

      PackedArray_destroy(a1);
      PackedArray_destroy(a2);
    }
    printf("  %2d bits per item -- success.\n", bitsPerItem);
  }

  printf("\n");
  printf("bulk packing / unpacking:\n");
  for (bitsPerItem = 1; bitsPerItem <= 32; ++bitsPerItem)
  {
    uint32_t mask = (uint32_t)(1ULL << bitsPerItem) - 1;
    int count;

    for (count = 1; count <= 128; ++count)
    {
      uint32_t* v1;
      uint32_t* v2;
      PackedArray* a1;
      PackedArray* a2;
      int i, j;

      v1 = (uint32_t*)malloc(sizeof(uint32_t) * count);
      assert(v1 != NULL);
      v2 = (uint32_t*)malloc(sizeof(uint32_t) * count);
      assert(v2 != NULL);

      a1 = PackedArray_create(bitsPerItem, count);
      assert(a1 != NULL);
      a2 = PackedArray_create(bitsPerItem, count);
      assert(a2 != NULL);

      for (i = 0; i < (int)PackedArray_bufferSize(a1); ++i)
        a1->buffer[i] = a2->buffer[i] = rand();

      for (i = 0; i < count; ++i)
        v1[i] = rand() & mask;

      assert(bitsPerItem >= PackedArray_computeBitsPerItem(v1, count));

      for (i = 0; i < count; ++i)
      {
        for (j = 1; j <= count - i; ++j)
        {
          PackedArray_pack(a1, i, v1, j);
          PackedArray_pack_reference(a2, i, v1, j);
          assert(memcmp(a1->buffer, a2->buffer, sizeof(a1->buffer[0]) * PackedArray_bufferSize(a1)) == 0);

          PackedArray_unpack(a1, i, v2, j);
          assert(bitsPerItem >= PackedArray_computeBitsPerItem(v2, j));
          assert(memcmp(v1, v2, j * sizeof(uint32_t)) == 0);
          PackedArray_unpack_reference(a2, i, v2, j);
          assert(memcmp(v1, v2, j * sizeof(uint32_t)) == 0);
        }
      }

      PackedArray_destroy(a1);
      PackedArray_destroy(a2);
      free(v1);
      free(v2);
    }
    printf("  %2d bits per item -- success.\n", bitsPerItem);
  }

  return 0;
}

#elif defined(PACKEDARRAY_SELF_BENCH)  // #if defined(PACKEDARRAY_SELF_TEST)

#ifndef NDEBUG
#error please define NDEBUG to inhibit asserts when compiling the benchmark
#endif

#include <stdio.h>
#include <string.h>
#include <float.h>

#ifndef MIN
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
#endif

#ifndef MAX
#define MAX(x, y) (((x) < (y)) ? (y) : (x))
#endif

#ifdef _MSC_VER
#pragma warning(push, 3)
#include <windows.h>
#pragma warning(pop)
static double getChronometerTime(void)
{
  LARGE_INTEGER frequency;
  LARGE_INTEGER t;

  QueryPerformanceFrequency(&frequency);
  QueryPerformanceCounter(&t);

  return (double)t.QuadPart / (double)frequency.QuadPart * 1000;
}

#else
#include <sys/time.h>

static double getChronometerTime()
{
  struct timeval now = { 0 };
  gettimeofday(&now, NULL);

  return (double)now.tv_sec + (double)now.tv_usec * 1e-6;
}
#endif

#define LOOP_COUNT 1000
static double bench_memcpy(uint32_t* in, uint32_t* out, uint32_t count)
{
  double start, end;
  uint32_t i;

  start = getChronometerTime();

  for (i = 0; i < LOOP_COUNT; ++i)
    memcpy(out, in, count * sizeof(uint32_t));

  end = getChronometerTime();

  return 1e6 * (end - start) / LOOP_COUNT;
}

static double bench_loopcpy(uint32_t* in, uint32_t* out, uint32_t count)
{
  double start, end;
  uint32_t i;

  start = getChronometerTime();

  for (i = 0; i < LOOP_COUNT; ++i)
  {
    uint32_t j;

    for (j = 0; j < count; ++j)
      out[j] = in[j];
  }

  end = getChronometerTime();

  return 1e6 * (end - start) / LOOP_COUNT;
}

static double bench_pack(uint32_t* in, PackedArray* out, uint32_t count)
{
  double start, end;
  int i;

  start = getChronometerTime();

  for (i = 0; i < LOOP_COUNT; ++i)
    PackedArray_pack(out, 0, in, count);

  end = getChronometerTime();

  return 1e6 * (end - start) / LOOP_COUNT;
}

static double bench_unpack(PackedArray* in, uint32_t* out, uint32_t count)
{
  double start, end;
  int i;

  start = getChronometerTime();

  for (i = 0; i < LOOP_COUNT; ++i)
    PackedArray_unpack(in, 0, out, count);

  end = getChronometerTime();

  return 1e6 * (end - start) / LOOP_COUNT;
}

#define MAX_ELEMENT_COUNT (1 << 18)
#define LOG2_MAX_ELEMENT_COUNT 18
int main(void)
{
  double start, end;
  uint32_t* b1;
  uint32_t* b2;
  uint32_t count, bitsPerItem;
  PackedArray** packed;
  uint32_t i;
  double* speed_memcpy;
  double avg_memcpy, min_memcpy, max_memcpy;
  double* speed_loopcpy;
  double avg_loopcpy, min_loopcpy, max_loopcpy;
  double* speed_pack[32];
  double avg_pack, min_pack, max_pack;
  double* speed_unpack[32];
  double avg_unpack, min_unpack, max_unpack;

  printf("-- PackedArray self bench ------------------------------------------------------\n");

  start = getChronometerTime();

  b1 = (uint32_t*)malloc(sizeof(uint32_t) * MAX_ELEMENT_COUNT);
  assert(b1 != NULL);
  b2 = (uint32_t*)malloc(sizeof(uint32_t) * MAX_ELEMENT_COUNT);
  assert(b2 != NULL);

  packed = (PackedArray**)malloc(sizeof(PackedArray*) * 32);
  assert(packed != NULL);
  for (i = 0; i < 32; ++i)
    packed[i] = PackedArray_create(i + 1, MAX_ELEMENT_COUNT);

  for (i = 0; i < MAX_ELEMENT_COUNT; ++i)
    b1[i] = rand();

  speed_memcpy = (double*)malloc(sizeof(double) * (LOG2_MAX_ELEMENT_COUNT + 1));
  assert(speed_memcpy != NULL);
  avg_memcpy = 0;
  min_memcpy = DBL_MAX;
  max_memcpy = 0;

  printf("memcpy:\n");
  printf("bits\tsize (B)\ttime (µs)\tspeed (B/µs)\n");

  for (count = 1, i = 0; count <= MAX_ELEMENT_COUNT; count *= 2, ++i)
  {
    double elapsed = bench_memcpy(b1, b2, count);
    double speed = count * sizeof(uint32_t) / elapsed;
    printf("%4d\t%8d\t%9.3f\t%12.3f\n", 32, (uint32_t)(count * sizeof(uint32_t)), elapsed, speed);

    avg_memcpy += speed;
    min_memcpy = MIN(min_memcpy, speed);
    max_memcpy = MAX(max_memcpy, speed);

    speed_memcpy[i] = speed;
  }

  avg_memcpy /= i;

  printf("avg (B/µs)\tmin (B/µs)\tmax (B/µs)\n");
  printf("%10.3f\t%10.3f\t%10.3f\n", avg_memcpy, min_memcpy, max_memcpy);
  printf("\n");

  speed_loopcpy = (double*)malloc(sizeof(double) * (LOG2_MAX_ELEMENT_COUNT + 1));
  assert(speed_loopcpy != NULL);
  avg_loopcpy = 0;
  min_loopcpy = DBL_MAX;
  max_loopcpy = 0;

  printf("loopcpy:\n");
  printf("bits\tsize (B)\ttime (µs)\tspeed (B/µs)\n");

  for (count = 1, i = 0; count <= MAX_ELEMENT_COUNT; count *= 2, ++i)
  {
    double elapsed = bench_loopcpy(b1, b2, count);
    double speed = count * sizeof(uint32_t) / elapsed;
    printf("%4d\t%8d\t%9.3f\t%12.3f\n", 32, (uint32_t)(count * sizeof(uint32_t)), elapsed, speed);

    avg_loopcpy += speed;
    min_loopcpy = MIN(min_loopcpy, speed);
    max_loopcpy = MAX(max_loopcpy, speed);

    speed_loopcpy[i] = speed;
  }

  avg_loopcpy /= i;

  printf("avg (B/µs)\tmin (B/µs)\tmax (B/µs)\n");
  printf("%10.3f\t%10.3f\t%10.3f\n", avg_loopcpy, min_loopcpy, max_loopcpy);
  printf("\n");

  for (bitsPerItem = 1; bitsPerItem <= 32; ++bitsPerItem)
  {
    avg_pack = 0;
    min_pack = DBL_MAX;
    max_pack = 0;
    avg_unpack = 0;
    min_unpack = DBL_MAX;
    max_unpack = 0;

    printf("pack:\t        \t         \t            \t");
    printf("unpack:\t        \t         \t            \t");
    printf("\n");
    printf("bits\tsize (B)\ttime (µs)\tspeed (B/µs)");
    printf("\t");
    printf("bits\tsize (B)\ttime (µs)\tspeed (B/µs)");
    printf("\n");

    speed_pack[bitsPerItem - 1] = (double*)malloc(sizeof(double) * (LOG2_MAX_ELEMENT_COUNT + 1));
    assert(speed_pack[bitsPerItem - 1] != NULL);
    speed_unpack[bitsPerItem - 1] = (double*)malloc(sizeof(double) * (LOG2_MAX_ELEMENT_COUNT + 1));
    assert(speed_unpack[bitsPerItem - 1] != NULL);
    for (count = 1, i = 0; count <= MAX_ELEMENT_COUNT; count *= 2, ++i)
    {
      uint32_t mask = (uint32_t)(1ULL << bitsPerItem) - 1;
      uint32_t j;
      double elapsed, speed;

      for (j = 0; j < count; ++j)
        b2[j] = b1[j] & mask;

      elapsed = bench_pack(b2, packed[bitsPerItem - 1], count);
      speed = count * sizeof(uint32_t) / elapsed;
      printf("%4d\t%8d\t%9.3f\t%12.3f", bitsPerItem, (uint32_t)(count * sizeof(uint32_t)), elapsed, speed);

      avg_pack += speed;
      min_pack = MIN(min_pack, speed);
      max_pack = MAX(max_pack, speed);

      speed_pack[bitsPerItem - 1][i] = speed;

      printf("\t");

      elapsed = bench_unpack(packed[bitsPerItem - 1], b2, count);
      speed = count * sizeof(uint32_t) / elapsed;
      printf("%4d\t%8d\t%9.3f\t%12.3f", bitsPerItem, (uint32_t)(count * sizeof(uint32_t)), elapsed, speed);

      avg_unpack += speed;
      min_unpack = MIN(min_unpack, speed);
      max_unpack = MAX(max_unpack, speed);

      speed_unpack[bitsPerItem - 1][i] = speed;

      printf("\n");
    }
    printf("%d\n",i);
    assert(i == LOG2_MAX_ELEMENT_COUNT + 1);

    printf("avg (B/µs)\tmin (B/µs)\tmax (B/µs)");
    printf("\t\t");
    printf("avg (B/µs)\tmin (B/µs)\tmax (B/µs)");
    printf("\n");

    avg_pack /= i;
    printf("%10.3f\t%10.3f\t%10.3f", avg_pack, min_pack, max_pack);

    printf("\t\t");

    avg_unpack /= i;
    printf("%10.3f\t%10.3f\t%10.3f", avg_unpack, min_unpack, max_unpack);
    printf("\n");
    printf("\n");
  }

  printf("\n");

  printf("stats by bits per item\n");
  printf("pack:\t          \t          \t          \t");
  printf("unpack:\t          \t          \t          \t");
  printf("\n");
  printf("bits\tavg (B/µs)\tmin (B/µs)\tmax (B/µs)");
  printf("\t");
  printf("avg (B/µs)\tmin (B/µs)\tmax (B/µs)");
  printf("\n");
  for (bitsPerItem = 1; bitsPerItem <= 32; ++bitsPerItem)
  {
    avg_pack = 0;
    min_pack = DBL_MAX;
    max_pack = 0;
    avg_unpack = 0;
    min_unpack = DBL_MAX;
    max_unpack = 0;

    for (count = 1, i = 0; count <= MAX_ELEMENT_COUNT; count *= 2, ++i)
    {
      double speed;

      speed = speed_pack[bitsPerItem - 1][i];

      avg_pack += speed;
      min_pack = MIN(min_pack, speed);
      max_pack = MAX(max_pack, speed);

      speed = speed_unpack[bitsPerItem - 1][i];

      avg_unpack += speed;
      min_unpack = MIN(min_unpack, speed);
      max_unpack = MAX(max_unpack, speed);
    }
    assert(i == LOG2_MAX_ELEMENT_COUNT + 1);

    printf("%4d\t", bitsPerItem);

    avg_pack /= i;
    printf("%10.3f\t%10.3f\t%10.3f", avg_pack, min_pack, max_pack);
    printf("\t");

    avg_unpack /= i;
    printf("%10.3f\t%10.3f\t%10.3f", avg_unpack, min_unpack, max_unpack);
    printf("\n");
  }

  printf("\n");

  printf("stats by size\n");
  printf("pack:\t          \t          \t          \t");
  printf("unpack:\t          \t          \t          \t");
  printf("\n");
  printf("size(B)\tavg (B/µs)\tmin (B/µs)\tmax (B/µs)");
  printf("\t");
  printf("avg (B/µs)\tmin (B/µs)\tmax (B/µs)");
  printf("\n");
  for (count = 1, i = 0; count <= MAX_ELEMENT_COUNT; count *= 2, ++i)
  {
    avg_pack = 0;
    min_pack = DBL_MAX;
    max_pack = 0;
    avg_unpack = 0;
    min_unpack = DBL_MAX;
    max_unpack = 0;

    for (bitsPerItem = 1; bitsPerItem <= 32; ++bitsPerItem)
    {
      double speed;

      speed = speed_pack[bitsPerItem - 1][i];

      avg_pack += speed;
      min_pack = MIN(min_pack, speed);
      max_pack = MAX(max_pack, speed);

      speed = speed_unpack[bitsPerItem - 1][i];

      avg_unpack += speed;
      min_unpack = MIN(min_unpack, speed);
      max_unpack = MAX(max_unpack, speed);
    }

    printf("%7d\t", (uint32_t)sizeof(uint32_t) * count);

    avg_pack /= 32;
    printf("%10.3f\t%10.3f\t%10.3f", avg_pack, min_pack, max_pack);
    printf("\t");

    avg_unpack /= 32;
    printf("%10.3f\t%10.3f\t%10.3f", avg_unpack, min_unpack, max_unpack);
    printf("\n");
  }

  printf("\n");

  free(b1);
  free(b2);
  free(speed_memcpy);
  free(speed_loopcpy);

  for (i = 0; i < 32; ++i)
  {
    PackedArray_destroy(packed[i]);
    free(speed_pack[i]);
    free(speed_unpack[i]);
  }

  free(packed);

  end = getChronometerTime();
  printf("total time (s): %f\n", (end - start));
  printf("\n");

  return 0;
}

#endif // #elif defined(PACKEDARRAY_SELF_BENCH)

#endif // #ifdef PACKEDARRAY_IMPL


================================================
FILE: README.md
================================================
# PackedArray: random access array of tightly packed unsigned integers
[![Build Status](https://travis-ci.org/gpakosz/PackedArray.png?branch=master)](https://travis-ci.org/gpakosz/PackedArray)
## TLDR

*PackedArray comes to the rescue when you're in a desperate need for an uint9_t
or uint17_t array.*

## What?

When you want to hold an unordered sequence of unsigned integers into memory,
the C programming language lets you choose among 4 data types:

- `uint8_t`
- `uint16_t`
- `uint32_t`
- `uint64_t`

If your numbers are within the [0, 100000] range, only 17 bits per integer are
needed since 2<sup>17</sup> = 131072. However, you can't use an array of
`uint16_t` because 16 bits are not enough to store numbers between 65536 and
100000. When you use the next available type, `uint32_t`, you're wasting 15 bits
per integer which represents a 47% overhead in terms of storage requirements.
                                                         
`PackedArray` saves memory by packing integers/items together at the bit-level:

<table class="monospace">
  <tr>
    <td colspan="32">b0</td>
    <td colspan="32">b1</td>
    <td colspan="32">b2</td>
    <td style="border-style: dashed; border-right: none;">...</td>
  </tr>
  <tr>
    <td colspan="9">i0</td>
    <td colspan="9">i1</td>
    <td colspan="9">i2</td>
    <td colspan="9">i3</td>
    <td colspan="9">i4</td>
    <td colspan="9">i5</td>
    <td colspan="9">i6</td>
    <td colspan="9">i7</td>
    <td colspan="9">i8</td>
    <td colspan="9">i9</td>
    <td style="border-style: dashed; border-right: none;">...</td>
  </tr>
</table>

A `PackedArray` is backed by an `uint32_t` buffer. Several items end up being
stored inside the same buffer cell, e.g. i0, i1, and i2. Some items span two
buffer cells, e.g. i3, and i7. `PackedArray` is responsible for
encoding/decoding items into/from the storage buffer.

`PackedArraySIMD` is a `PackedArray` variant that makes use of SSE2 or NEON
instructions.

Going SIMD processes integers 4 by 4 but imposes an interleaved layout in the
storage buffer.
 
`PackedArraySIMD` interleaved layout, 13 bits per item:

<table class="monospace">
  <tr>
    <td colspan="32">b0</td>
    <td colspan="32">b1</td>
    <td colspan="32">b2</td>
    <td colspan="32">b3</td>
    <td style="border-style: dashed; border-right: none;">...</td>
  </tr>
  <tr>
    <td colspan="13">i0</td>
    <td colspan="13">i4</td>
    <td colspan="6">i8a</td>
    <td colspan="13">i1</td>
    <td colspan="13">i5</td>
    <td colspan="6">i9a</td>
    <td colspan="13">i2</td>
    <td colspan="13">i6</td>
    <td colspan="6">i10a</td>
    <td colspan="13">i3</td>
    <td colspan="13">i7</td>
    <td colspan="6">i11a</td>
    <td style="border-style: dashed;">i8b</td>
    <td style="border-style: dashed; border-right: none;">...</td>
  </tr>
</table>

As a consequence, the data layout of `PackedArraySIMD` isn't compatible with its
non SIMD counterpart. In other words, you cannot use `PackedArray` to unpack
data packed with `PackedArraySIMD` or the other way around.

It is also worth noting the implementations of `PackedArraySIMD_pack` and
`PackedArraySIMD_unpack` require more plumbing than their non-SIMD counterparts.
Additional computations are needed to find out and adjust a data window that can
be processed 4 by 4 with SIMD instructions.

`PackedArray` and `PackedArraySIMD` are released under the WTFPL v2 license.

For more information, see the [PackedArray announcement on my personal website].

[PackedArray announcement on my personal website]: http://pempek.net/articles/2013/08/03/packedarray-random-access-array-tightly-packed-unsigned-integers/

## Why?

`PackedArray` is designed as a drop-in replacement for an unsigned integer
array. I couldn't find such a data structure in the wild, so I implemented one.

Instead of writing:

    uint32_t* a = (uint32_t*)malloc(sizeof(uint32_t) * count);
    ...
    value = a[i];
    ...
    a[j] = value;

You write:

    PackedArray* a = PackedArray_create(bitsPerItem, count);
    ...
    value = PackedArray_get(a, i);
    ...
    PackedArray_set(a, j, value);

The `PackedArray_computeBitsPerItem` helper scans a `uint32_t` array and returns
the number of bits needed to create a `PackedArray` capable of holding its
content.

There are also `PackedArray_pack` and `PackedArray_unpack` that operate on
several items in a row. Those two could really have been named
`PackedArray_write` and `PackedArray_read` but I decided "pack" / "unpack"
conveys better something is happening under the hood.

    // bulk packing / unpacking
    PackedArray_pack(a, j, in, count);
    PackedArray_unpack(a, j, out, count);

    // the following are semantically equivalent
    PackedArray_set(a, j, value);
    PackedArray_pack(a, j, &value, 1);

    value = PackedArray_get(a, i);
    PackedArray_unpack(a, i, &value, 1);

--------------------------------------------------------------------------------

## Compiling

In order to use `PackedArray` or `PackedArraySIMD` in your own project, you just
have to bring in the two `PackedArray.h` and `PackedArray.c` (or
`PackedArraySIMD.c`) files. It's that simple.

You can customize `PackedArray.c`'s behavior by defining the following macros:

- `PACKEDARRAY_ASSERT`
- `PACKEDARRAY_MALLOC`
- `PACKEDARARY_FREE`

You can customize `PackedArraySIMD.c`'s behavior by defining the following
macros:

- `PACKEDARRAY_ASSERT`
- `PACKEDARRAY_ALIGNED_MALLOC`
- `PACKEDARARY_FREE`

`PackedArray.c` and `PackedArraySIMD.c` can compile themselves into either a
test program or a micro-benchmark. For that, you have to use one of the
following preprocessor directives:

- `PACKEDARRAY_SELF_TEST`
- `PACKEDARRAY_SELF_BENCH`

For example, from command line:

    $ cc -o PackedArraySelfTest -DPACKEDARRAY_SELF_TEST -O2 -g PackedArray.c
    $ cc -o PackedArraySelfBench -DPACKEDARRAY_SELF_BENCH -DNDEBUG -O2 -g PackedArray.c

    $ cc -o PackedArraySIMDSelfTest -DPACKEDARRAY_SELF_TEST -O2 -g PackedArraySIMD.c
    $ cc -o PackedArraySIMDSelfBench -DPACKEDARRAY_SELF_BENCH -DNDEBUG -O2 -g PackedArraySIMD.c

### Compiling for Windows

There is a Visual Studio 2012 solution in the `_win-vs11/` folder.

### Compiling for Linux or Mac

There is a GNU Make 3.81 `MakeFile` in the `_gnu-make/` folder:

    $ make -C _gnu-make/

### Compiling for Mac

See above if you want to compile from command line. Otherwise there is an Xcode
project located in the `_mac-xcode/` folder.

### Compiling for iOS

There is an Xcode project located in the `_ios-xcode/` folder.

If you prefer compiling from command line and deploying to a jailbroken device
through SSH, use:

    $ make -C _gnu-make/ binsubdir=ios CC="$(xcrun --sdk iphoneos --find clang) -isysroot $(xcrun --sdk iphoneos --show-sdk-path) -arch armv7 -arch armv7s -arch arm64" postbuild="codesign -s 'iPhone Developer'"

### Compiling for Android

You will have to install the Android NDK, and point the `$NDK_ROOT` environment
variable to the NDK path: e.g. `export NDK_ROOT=/opt/android-ndk` (without a
trailing `/` character).

Next, the easy way is to make a standalone Android toolchain with the following
command:

    $ $NDK_ROOT/build/tools/make-standalone-toolchain.sh --system=$(uname -s | tr [A-Z] [a-z])-$(uname -m) --platform=android-3 --toolchain=arm-linux-androideabi-clang3.3 --install-dir=/tmp/android-clang

Now you can compile the self test and self benchmark programs by running:

    $ make -C _gnu-make/ binsubdir=android CC=/tmp/android-clang/bin/clang CFLAGS='-march=armv7-a -mfloat-abi=softfp -mfpu=neon -O2'

--------------------------------------------------------------------------------

## Implementation details, what the hell is going on?

First, in `PackedArray.c` or `PackedArraySIMD.c`, everything that comes below
the `- 8< ----` marker is the code for the self test and self micro-benchmark
programs and can be discarded if you really want to:

If you want to cut down your anxiety, you can use the provided GNU Makefile and
invoke:

    $ make -C _gnu-make/ cut

This produces the `PackedArray.cut.c` and `PackedArraySIMD.cut.c` files.

You may also be troubled by `PackedArray.c` and `PackedArraySIMD.c` including
themselves with `#include PACKEDARRAY_SELF`. By combining preprocessing tricks
and including themselves, `PackedArray.c` and `PackedArraySIMD.c`
"generate the code" for the unrolled pack and unpack implementations.

By default `PACKEDARRAY_SELF` is defined to `"PackedArray.c"` which assumes the
compiler is going to look for the file in the same directory as the file from
which the `#include` statement is being evaluated. This helps compiling when the
build system refers to the source files with relative paths. Depending on your
compiler/build system combination you may want to override `PACKEDARRAY_SELF` to
`__FILE__`.

If you want to see the generated code, you can use the provided GNU Makefile and
invoke:

    $ make -C _gnu-make/ preprocess

This produces the `PackedArray.pp.c` and `PackedArraySIMD.pp.c` files.


--------------------------------------------------------------------------------

If you find `PackedArray` or `PackedArraySIMD` useful and decide to use it in
your own projects please drop me a line [@gpakosz].

If you use it in a commercial project, consider using [Gittip].

[@gpakosz]: https://twitter.com/gpakosz
[Gittip]: https://www.gittip.com/gpakosz/


================================================
FILE: _gnu-make/Makefile
================================================
.PHONY: build test preprocess cut assembly clean

# directories
ifeq ($(realpath .),)
  $(error your version of Make doesn't support $$(realpath names...) - please use GNU Make 3.81 or later)
endif

ifeq ($(platform),)
  __uname_s := $(shell sh -c 'uname -s 2>/dev/null | tr [A-Z] [a-z] || echo unknown-platform')
  __uname_m := $(shell sh -c 'uname -m 2>/dev/null | tr [A-Z] [a-z] || echo unknown-architecture')

  ifeq ($(__uname_s),linux)
    override platform := linux
    override architecture := $(__uname_m)
  endif
  ifeq ($(__uname_s),darwin)
    override platform := mac
    override architecture := $(__uname_m)
  endif
endif
ifeq ($(architecture),)
  override architecture := unknown-architecture
endif

prefix := $(realpath ..)
srcdir := $(realpath ..)
buildir := $(realpath .)/build
binsubdir := $(platform)-$(architecture)
bindir := $(prefix)/bin/$(binsubdir)

CFLAGS := -O2 -g

define _generate_rules
.PHONY: build-$(1)
build: build-$(1)
build-$(1): $(bindir)/$(1)SelfTest $(bindir)/$(1)SelfBench

$(bindir)/$(1)SelfTest: $(srcdir)/$(1).c $(srcdir)/PackedArray.h
	mkdir -p $$(@D)
	$(CC) -o $$@ -DPACKEDARRAY_SELF_TEST -std=c99 -pedantic $(CFLAGS) $$<
	$$(if $(postbuild),$(postbuild) $$@)

$(bindir)/$(1)SelfBench: $(srcdir)/$(1).c $(srcdir)/PackedArray.h
	mkdir -p $$(@D)
	$(CC) -o $$@ -DPACKEDARRAY_SELF_BENCH -DNDEBUG -std=c99 -pedantic $(CFLAGS) $$<
	$$(if $(postbuild),$(postbuild) $$@)

.PHONY: assembly-$(1)
assembly: assembly-$(1)
assembly-$(1): $(bindir)/$(1)SelfTest.s $(bindir)/$(1)SelfBench.s

$(bindir)/$(1)SelfTest.s: $(srcdir)/$(1).c $(srcdir)/PackedArray.h
	@echo Generating $$(@F) assembly listing
	@mkdir -p $$(@D)
	@$(CC) -o $$@ -S -fverbose-asm -g -DPACKEDARRAY_SELF_TEST -std=c99 -pedantic $(CFLAGS) $$<

$(bindir)/$(1)SelfBench.s: $(srcdir)/$(1).c $(srcdir)/PackedArray.h
	@echo Generating $$(@F) assembly listing
	@mkdir -p $$(@D)
	@$(CC) -o $$@ -S -fverbose-asm -g -DPACKEDARRAY_SELF_BENCH -DNDEBUG -std=c99 -pedantic $(CFLAGS) $$<

.PHONY: test-$(1)
test : test-$(1)
test-$(1): build-$(1)
	$(bindir)/$(1)SelfTest

.PHONY: preprocess-$(1)
preprocess: preprocess-$(1)
preprocess-$(1): $(srcdir)/$(1).c
	@echo Preprocessing $(1).c to $(1).pp.c
	$$(eval tmp := $$(shell mktemp -t $(1)XXX))
	@sed -E\
		-e 's/#include (<|").+(>|")/pp(&)/'\
		$(srcdir)/$(1).c > $$(tmp)
	@$(CPP) $(CPPFLAGS) -DPACKEDARRAY_SELF=__FILE__ $(CFLAGS) -xc $$(tmp) | sed -E\
		-e '/^# /d'\
		-e 's/[ \t]*$$$$//'\
		-e 's/pp\((.*)\)/\1/'\
		| cat -s > $(srcdir)/$(1).pp.c
	@rm -f $$(tmp)

.PHONY: cut-$(1)
cut: cut-$(1)
cut-$(1): $(srcdir)/$(1).c
	@echo Cutting down your anxiety to $(1).cut.c
	@sed -n '/- 8<.*/q;p' $(srcdir)/$(1).c > $(srcdir)/$(1).cut.c
endef

$(foreach p,PackedArray PackedArraySIMD, $(eval $(call _generate_rules,$(p))))

clean:
	rm -rf $(buildir)
	rm -rf $(bindir)
	rm -rf $(srcdir)/*.pp.c
	rm -rf $(srcdir)/*.cut.c


================================================
FILE: _ios-xcode/.gitignore
================================================
xcuserdata/
xcshareddata/


================================================
FILE: _ios-xcode/PackedArray-Info.plist
================================================
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
	<key>CFBundleDevelopmentRegion</key>
	<string>en</string>
	<key>CFBundleDisplayName</key>
	<string>${PRODUCT_NAME}</string>
	<key>CFBundleExecutable</key>
	<string>${EXECUTABLE_NAME}</string>
	<key>CFBundleIdentifier</key>
	<string>net.pempek.${PRODUCT_NAME:rfc1034identifier}</string>
	<key>CFBundleInfoDictionaryVersion</key>
	<string>6.0</string>
	<key>CFBundleName</key>
	<string>${PRODUCT_NAME}</string>
	<key>CFBundlePackageType</key>
	<string>APPL</string>
	<key>CFBundleShortVersionString</key>
	<string>1.0</string>
	<key>CFBundleSignature</key>
	<string>????</string>
	<key>CFBundleVersion</key>
	<string>1.0</string>
	<key>LSRequiresIPhoneOS</key>
	<true/>
	<key>UIRequiredDeviceCapabilities</key>
	<array>
		<string>armv7</string>
	</array>
	<key>UISupportedInterfaceOrientations</key>
	<array>
		<string>UIInterfaceOrientationPortrait</string>
		<string>UIInterfaceOrientationLandscapeLeft</string>
		<string>UIInterfaceOrientationLandscapeRight</string>
	</array>
	<key>UISupportedInterfaceOrientations~ipad</key>
	<array>
		<string>UIInterfaceOrientationPortrait</string>
		<string>UIInterfaceOrientationPortraitUpsideDown</string>
		<string>UIInterfaceOrientationLandscapeLeft</string>
		<string>UIInterfaceOrientationLandscapeRight</string>
	</array>
</dict>
</plist>


================================================
FILE: _ios-xcode/PackedArray.xcodeproj/project.pbxproj
================================================
// !$*UTF8*$!
{
	archiveVersion = 1;
	classes = {
	};
	objectVersion = 46;
	objects = {

/* Begin PBXBuildFile section */
		1A98F9EC17A406A700BF09FF /* PackedArray.c in Sources */ = {isa = PBXBuildFile; fileRef = 1A98F9EB17A406A700BF09FF /* PackedArray.c */; };
		1A98F9F017A408F000BF09FF /* PackedArray.c in Sources */ = {isa = PBXBuildFile; fileRef = 1A98F9EB17A406A700BF09FF /* PackedArray.c */; };
		1AC5A25517AD052200249A68 /* PackedArraySIMD.c in Sources */ = {isa = PBXBuildFile; fileRef = 1AC5A24017AD049E00249A68 /* PackedArraySIMD.c */; };
		1AC5A25617AD052400249A68 /* PackedArraySIMD.c in Sources */ = {isa = PBXBuildFile; fileRef = 1AC5A24017AD049E00249A68 /* PackedArraySIMD.c */; };
/* End PBXBuildFile section */

/* Begin PBXFileReference section */
		1A98F9C917A4018400BF09FF /* PackedArraySelfTest.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = PackedArraySelfTest.app; sourceTree = BUILT_PRODUCTS_DIR; };
		1A98F9EB17A406A700BF09FF /* PackedArray.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = PackedArray.c; sourceTree = "<group>"; };
		1A98F9F617A408F000BF09FF /* PackedArraySelfBench.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = PackedArraySelfBench.app; sourceTree = BUILT_PRODUCTS_DIR; };
		1A98FA1A17A4267A00BF09FF /* PackedArray.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PackedArray.h; sourceTree = "<group>"; };
		1AC5A24017AD049E00249A68 /* PackedArraySIMD.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = PackedArraySIMD.c; sourceTree = "<group>"; };
		1AC5A24917AD04A600249A68 /* PackedArraySIMDSelfTest.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = PackedArraySIMDSelfTest.app; sourceTree = BUILT_PRODUCTS_DIR; };
		1AC5A25317AD04A800249A68 /* PackedArraySIMDSelfBench.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = PackedArraySIMDSelfBench.app; sourceTree = BUILT_PRODUCTS_DIR; };
/* End PBXFileReference section */

/* Begin PBXFrameworksBuildPhase section */
		1A98F9C617A4018400BF09FF /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		1A98F9F117A408F000BF09FF /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		1AC5A24417AD04A600249A68 /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		1AC5A24E17AD04A800249A68 /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXFrameworksBuildPhase section */

/* Begin PBXGroup section */
		1A98F9C017A4018400BF09FF = {
			isa = PBXGroup;
			children = (
				1A98F9ED17A406AE00BF09FF /* PackedArray */,
				1A98F9CA17A4018400BF09FF /* Products */,
			);
			sourceTree = "<group>";
		};
		1A98F9CA17A4018400BF09FF /* Products */ = {
			isa = PBXGroup;
			children = (
				1A98F9C917A4018400BF09FF /* PackedArraySelfTest.app */,
				1A98F9F617A408F000BF09FF /* PackedArraySelfBench.app */,
				1AC5A24917AD04A600249A68 /* PackedArraySIMDSelfTest.app */,
				1AC5A25317AD04A800249A68 /* PackedArraySIMDSelfBench.app */,
			);
			name = Products;
			sourceTree = "<group>";
		};
		1A98F9ED17A406AE00BF09FF /* PackedArray */ = {
			isa = PBXGroup;
			children = (
				1A98F9EB17A406A700BF09FF /* PackedArray.c */,
				1A98FA1A17A4267A00BF09FF /* PackedArray.h */,
				1AC5A24017AD049E00249A68 /* PackedArraySIMD.c */,
			);
			name = PackedArray;
			path = ..;
			sourceTree = "<group>";
		};
/* End PBXGroup section */

/* Begin PBXNativeTarget section */
		1A98F9C817A4018400BF09FF /* PackedArraySelfTest */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = 1A98F9E617A4018400BF09FF /* Build configuration list for PBXNativeTarget "PackedArraySelfTest" */;
			buildPhases = (
				1A98F9C517A4018400BF09FF /* Sources */,
				1A98F9C617A4018400BF09FF /* Frameworks */,
				1A98F9C717A4018400BF09FF /* Resources */,
			);
			buildRules = (
			);
			dependencies = (
			);
			name = PackedArraySelfTest;
			productName = PackedArray;
			productReference = 1A98F9C917A4018400BF09FF /* PackedArraySelfTest.app */;
			productType = "com.apple.product-type.application";
		};
		1A98F9EE17A408F000BF09FF /* PackedArraySelfBench */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = 1A98F9F317A408F000BF09FF /* Build configuration list for PBXNativeTarget "PackedArraySelfBench" */;
			buildPhases = (
				1A98F9EF17A408F000BF09FF /* Sources */,
				1A98F9F117A408F000BF09FF /* Frameworks */,
				1A98F9F217A408F000BF09FF /* Resources */,
			);
			buildRules = (
			);
			dependencies = (
			);
			name = PackedArraySelfBench;
			productName = PackedArray;
			productReference = 1A98F9F617A408F000BF09FF /* PackedArraySelfBench.app */;
			productType = "com.apple.product-type.application";
		};
		1AC5A24117AD04A600249A68 /* PackedArraySIMDSelfTest */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = 1AC5A24617AD04A600249A68 /* Build configuration list for PBXNativeTarget "PackedArraySIMDSelfTest" */;
			buildPhases = (
				1AC5A24217AD04A600249A68 /* Sources */,
				1AC5A24417AD04A600249A68 /* Frameworks */,
				1AC5A24517AD04A600249A68 /* Resources */,
			);
			buildRules = (
			);
			dependencies = (
			);
			name = PackedArraySIMDSelfTest;
			productName = PackedArray;
			productReference = 1AC5A24917AD04A600249A68 /* PackedArraySIMDSelfTest.app */;
			productType = "com.apple.product-type.application";
		};
		1AC5A24B17AD04A800249A68 /* PackedArraySIMDSelfBench */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = 1AC5A25017AD04A800249A68 /* Build configuration list for PBXNativeTarget "PackedArraySIMDSelfBench" */;
			buildPhases = (
				1AC5A24C17AD04A800249A68 /* Sources */,
				1AC5A24E17AD04A800249A68 /* Frameworks */,
				1AC5A24F17AD04A800249A68 /* Resources */,
			);
			buildRules = (
			);
			dependencies = (
			);
			name = PackedArraySIMDSelfBench;
			productName = PackedArray;
			productReference = 1AC5A25317AD04A800249A68 /* PackedArraySIMDSelfBench.app */;
			productType = "com.apple.product-type.application";
		};
/* End PBXNativeTarget section */

/* Begin PBXProject section */
		1A98F9C117A4018400BF09FF /* Project object */ = {
			isa = PBXProject;
			attributes = {
				LastUpgradeCheck = 0460;
				ORGANIZATIONNAME = "Gregory Pakosz";
			};
			buildConfigurationList = 1A98F9C417A4018400BF09FF /* Build configuration list for PBXProject "PackedArray" */;
			compatibilityVersion = "Xcode 3.2";
			developmentRegion = English;
			hasScannedForEncodings = 0;
			knownRegions = (
				en,
			);
			mainGroup = 1A98F9C017A4018400BF09FF;
			productRefGroup = 1A98F9CA17A4018400BF09FF /* Products */;
			projectDirPath = "";
			projectRoot = "";
			targets = (
				1A98F9C817A4018400BF09FF /* PackedArraySelfTest */,
				1A98F9EE17A408F000BF09FF /* PackedArraySelfBench */,
				1AC5A24117AD04A600249A68 /* PackedArraySIMDSelfTest */,
				1AC5A24B17AD04A800249A68 /* PackedArraySIMDSelfBench */,
			);
		};
/* End PBXProject section */

/* Begin PBXResourcesBuildPhase section */
		1A98F9C717A4018400BF09FF /* Resources */ = {
			isa = PBXResourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		1A98F9F217A408F000BF09FF /* Resources */ = {
			isa = PBXResourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		1AC5A24517AD04A600249A68 /* Resources */ = {
			isa = PBXResourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		1AC5A24F17AD04A800249A68 /* Resources */ = {
			isa = PBXResourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXResourcesBuildPhase section */

/* Begin PBXSourcesBuildPhase section */
		1A98F9C517A4018400BF09FF /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				1A98F9EC17A406A700BF09FF /* PackedArray.c in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		1A98F9EF17A408F000BF09FF /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				1A98F9F017A408F000BF09FF /* PackedArray.c in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		1AC5A24217AD04A600249A68 /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				1AC5A25517AD052200249A68 /* PackedArraySIMD.c in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		1AC5A24C17AD04A800249A68 /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				1AC5A25617AD052400249A68 /* PackedArraySIMD.c in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXSourcesBuildPhase section */

/* Begin XCBuildConfiguration section */
		1A98F9E417A4018400BF09FF /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				ARCHS = "$(ARCHS_STANDARD_INCLUDING_64_BIT)";
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
				CLANG_CXX_LIBRARY = "libc++";
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
				"CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer";
				COPY_PHASE_STRIP = NO;
				GCC_C_LANGUAGE_STANDARD = gnu99;
				GCC_DYNAMIC_NO_PIC = NO;
				GCC_OPTIMIZATION_LEVEL = 0;
				GCC_SYMBOLS_PRIVATE_EXTERN = NO;
				GCC_WARN_ABOUT_RETURN_TYPE = YES;
				GCC_WARN_UNINITIALIZED_AUTOS = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				INFOPLIST_FILE = "PackedArray-Info.plist";
				IPHONEOS_DEPLOYMENT_TARGET = 7.0;
				ONLY_ACTIVE_ARCH = YES;
				PRODUCT_NAME = "$(TARGET_NAME)";
				SDKROOT = iphoneos;
				TARGETED_DEVICE_FAMILY = "1,2";
			};
			name = Debug;
		};
		1A98F9E517A4018400BF09FF /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				ARCHS = "$(ARCHS_STANDARD_INCLUDING_64_BIT)";
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
				CLANG_CXX_LIBRARY = "libc++";
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
				"CODE_SIGN_IDENTITY[sdk=iphoneos*]" = "iPhone Developer";
				COPY_PHASE_STRIP = YES;
				GCC_C_LANGUAGE_STANDARD = gnu99;
				GCC_OPTIMIZATION_LEVEL = 2;
				GCC_PREPROCESSOR_DEFINITIONS = NDEBUG;
				GCC_WARN_ABOUT_RETURN_TYPE = YES;
				GCC_WARN_UNINITIALIZED_AUTOS = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				INFOPLIST_FILE = "PackedArray-Info.plist";
				IPHONEOS_DEPLOYMENT_TARGET = 7.0;
				OTHER_CFLAGS = "-DNS_BLOCK_ASSERTIONS=1";
				PRODUCT_NAME = "$(TARGET_NAME)";
				SDKROOT = iphoneos;
				TARGETED_DEVICE_FAMILY = "1,2";
				VALIDATE_PRODUCT = YES;
			};
			name = Release;
		};
		1A98F9E717A4018400BF09FF /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				GCC_PREPROCESSOR_DEFINITIONS = PACKEDARRAY_SELF_TEST;
				WRAPPER_EXTENSION = app;
			};
			name = Debug;
		};
		1A98F9E817A4018400BF09FF /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				GCC_PREPROCESSOR_DEFINITIONS = PACKEDARRAY_SELF_TEST;
				WRAPPER_EXTENSION = app;
			};
			name = Release;
		};
		1A98F9F417A408F000BF09FF /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				GCC_PREPROCESSOR_DEFINITIONS = (
					PACKEDARRAY_SELF_BENCH,
					NDEBUG,
					"$(inherited)",
				);
				WRAPPER_EXTENSION = app;
			};
			name = Debug;
		};
		1A98F9F517A408F000BF09FF /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				GCC_PREPROCESSOR_DEFINITIONS = (
					PACKEDARRAY_SELF_BENCH,
					"$(inherited)",
				);
				WRAPPER_EXTENSION = app;
			};
			name = Release;
		};
		1AC5A24717AD04A600249A68 /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ARCHS = "$(ARCHS_STANDARD_INCLUDING_64_BIT)";
				GCC_PREPROCESSOR_DEFINITIONS = PACKEDARRAY_SELF_TEST;
				WRAPPER_EXTENSION = app;
			};
			name = Debug;
		};
		1AC5A24817AD04A600249A68 /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ARCHS = "$(ARCHS_STANDARD_INCLUDING_64_BIT)";
				GCC_PREPROCESSOR_DEFINITIONS = PACKEDARRAY_SELF_TEST;
				WRAPPER_EXTENSION = app;
			};
			name = Release;
		};
		1AC5A25117AD04A800249A68 /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				GCC_PREPROCESSOR_DEFINITIONS = (
					PACKEDARRAY_SELF_BENCH,
					NDEBUG,
					"$(inherited)",
				);
				WRAPPER_EXTENSION = app;
			};
			name = Debug;
		};
		1AC5A25217AD04A800249A68 /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				GCC_PREPROCESSOR_DEFINITIONS = (
					PACKEDARRAY_SELF_BENCH,
					"$(inherited)",
				);
				WRAPPER_EXTENSION = app;
			};
			name = Release;
		};
/* End XCBuildConfiguration section */

/* Begin XCConfigurationList section */
		1A98F9C417A4018400BF09FF /* Build configuration list for PBXProject "PackedArray" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				1A98F9E417A4018400BF09FF /* Debug */,
				1A98F9E517A4018400BF09FF /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		1A98F9E617A4018400BF09FF /* Build configuration list for PBXNativeTarget "PackedArraySelfTest" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				1A98F9E717A4018400BF09FF /* Debug */,
				1A98F9E817A4018400BF09FF /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		1A98F9F317A408F000BF09FF /* Build configuration list for PBXNativeTarget "PackedArraySelfBench" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				1A98F9F417A408F000BF09FF /* Debug */,
				1A98F9F517A408F000BF09FF /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		1AC5A24617AD04A600249A68 /* Build configuration list for PBXNativeTarget "PackedArraySIMDSelfTest" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				1AC5A24717AD04A600249A68 /* Debug */,
				1AC5A24817AD04A600249A68 /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		1AC5A25017AD04A800249A68 /* Build configuration list for PBXNativeTarget "PackedArraySIMDSelfBench" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				1AC5A25117AD04A800249A68 /* Debug */,
				1AC5A25217AD04A800249A68 /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
/* End XCConfigurationList section */
	};
	rootObject = 1A98F9C117A4018400BF09FF /* Project object */;
}


================================================
FILE: _ios-xcode/PackedArray.xcodeproj/project.xcworkspace/contents.xcworkspacedata
================================================
<?xml version="1.0" encoding="UTF-8"?>
<Workspace
   version = "1.0">
   <FileRef
      location = "self:PackedArray.xcodeproj">
   </FileRef>
</Workspace>


================================================
FILE: _mac-xcode/.gitignore
================================================
xcuserdata/
xcshareddata/


================================================
FILE: _mac-xcode/PackedArray.xcodeproj/project.pbxproj
================================================
// !$*UTF8*$!
{
	archiveVersion = 1;
	classes = {
	};
	objectVersion = 46;
	objects = {

/* Begin PBXBuildFile section */
		1A98FA1017A424D700BF09FF /* PackedArray.c in Sources */ = {isa = PBXBuildFile; fileRef = 1A98FA0E17A424D700BF09FF /* PackedArray.c */; };
		1A98FA1317A4262700BF09FF /* PackedArray.c in Sources */ = {isa = PBXBuildFile; fileRef = 1A98FA0E17A424D700BF09FF /* PackedArray.c */; };
		1AEF60E217AD035900CA6B64 /* PackedArraySIMD.c in Sources */ = {isa = PBXBuildFile; fileRef = 1AEF60CF17AD02B000CA6B64 /* PackedArraySIMD.c */; };
		1AEF60E317AD035A00CA6B64 /* PackedArraySIMD.c in Sources */ = {isa = PBXBuildFile; fileRef = 1AEF60CF17AD02B000CA6B64 /* PackedArraySIMD.c */; };
/* End PBXBuildFile section */

/* Begin PBXCopyFilesBuildPhase section */
		1A98F9FF17A4249200BF09FF /* CopyFiles */ = {
			isa = PBXCopyFilesBuildPhase;
			buildActionMask = 2147483647;
			dstPath = /usr/share/man/man1/;
			dstSubfolderSpec = 0;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 1;
		};
		1A98FA1517A4262700BF09FF /* CopyFiles */ = {
			isa = PBXCopyFilesBuildPhase;
			buildActionMask = 2147483647;
			dstPath = /usr/share/man/man1/;
			dstSubfolderSpec = 0;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 1;
		};
		1AEF60D417AD02BA00CA6B64 /* CopyFiles */ = {
			isa = PBXCopyFilesBuildPhase;
			buildActionMask = 2147483647;
			dstPath = /usr/share/man/man1/;
			dstSubfolderSpec = 0;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 1;
		};
		1AEF60DD17AD02BD00CA6B64 /* CopyFiles */ = {
			isa = PBXCopyFilesBuildPhase;
			buildActionMask = 2147483647;
			dstPath = /usr/share/man/man1/;
			dstSubfolderSpec = 0;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 1;
		};
/* End PBXCopyFilesBuildPhase section */

/* Begin PBXFileReference section */
		1A98FA0117A4249200BF09FF /* PackedArraySelfTest */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = PackedArraySelfTest; sourceTree = BUILT_PRODUCTS_DIR; };
		1A98FA0E17A424D700BF09FF /* PackedArray.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = PackedArray.c; sourceTree = "<group>"; };
		1A98FA0F17A424D700BF09FF /* PackedArray.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PackedArray.h; sourceTree = "<group>"; };
		1A98FA1917A4262700BF09FF /* PackedArraySelfBench */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = PackedArraySelfBench; sourceTree = BUILT_PRODUCTS_DIR; };
		1AEF60CF17AD02B000CA6B64 /* PackedArraySIMD.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = PackedArraySIMD.c; sourceTree = "<group>"; };
		1AEF60D817AD02BA00CA6B64 /* PackedArraySIMDSelfTest */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = PackedArraySIMDSelfTest; sourceTree = BUILT_PRODUCTS_DIR; };
		1AEF60E117AD02BD00CA6B64 /* PackedArraySIMDSelfBench */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = PackedArraySIMDSelfBench; sourceTree = BUILT_PRODUCTS_DIR; };
/* End PBXFileReference section */

/* Begin PBXFrameworksBuildPhase section */
		1A98F9FE17A4249200BF09FF /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		1A98FA1417A4262700BF09FF /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		1AEF60D317AD02BA00CA6B64 /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		1AEF60DC17AD02BD00CA6B64 /* Frameworks */ = {
			isa = PBXFrameworksBuildPhase;
			buildActionMask = 2147483647;
			files = (
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXFrameworksBuildPhase section */

/* Begin PBXGroup section */
		1A98F9F817A4249200BF09FF = {
			isa = PBXGroup;
			children = (
				1A98FA0D17A424BE00BF09FF /* PackedArray */,
				1A98FA0217A4249200BF09FF /* Products */,
			);
			sourceTree = "<group>";
		};
		1A98FA0217A4249200BF09FF /* Products */ = {
			isa = PBXGroup;
			children = (
				1A98FA0117A4249200BF09FF /* PackedArraySelfTest */,
				1A98FA1917A4262700BF09FF /* PackedArraySelfBench */,
				1AEF60D817AD02BA00CA6B64 /* PackedArraySIMDSelfTest */,
				1AEF60E117AD02BD00CA6B64 /* PackedArraySIMDSelfBench */,
			);
			name = Products;
			sourceTree = "<group>";
		};
		1A98FA0D17A424BE00BF09FF /* PackedArray */ = {
			isa = PBXGroup;
			children = (
				1A98FA0E17A424D700BF09FF /* PackedArray.c */,
				1A98FA0F17A424D700BF09FF /* PackedArray.h */,
				1AEF60CF17AD02B000CA6B64 /* PackedArraySIMD.c */,
			);
			name = PackedArray;
			path = ..;
			sourceTree = "<group>";
		};
/* End PBXGroup section */

/* Begin PBXNativeTarget section */
		1A98FA0017A4249200BF09FF /* PackedArraySelfTest */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = 1A98FA0A17A4249200BF09FF /* Build configuration list for PBXNativeTarget "PackedArraySelfTest" */;
			buildPhases = (
				1A98F9FD17A4249200BF09FF /* Sources */,
				1A98F9FE17A4249200BF09FF /* Frameworks */,
				1A98F9FF17A4249200BF09FF /* CopyFiles */,
			);
			buildRules = (
			);
			dependencies = (
			);
			name = PackedArraySelfTest;
			productName = PackedArray;
			productReference = 1A98FA0117A4249200BF09FF /* PackedArraySelfTest */;
			productType = "com.apple.product-type.tool";
		};
		1A98FA1117A4262700BF09FF /* PackedArraySelfBench */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = 1A98FA1617A4262700BF09FF /* Build configuration list for PBXNativeTarget "PackedArraySelfBench" */;
			buildPhases = (
				1A98FA1217A4262700BF09FF /* Sources */,
				1A98FA1417A4262700BF09FF /* Frameworks */,
				1A98FA1517A4262700BF09FF /* CopyFiles */,
			);
			buildRules = (
			);
			dependencies = (
			);
			name = PackedArraySelfBench;
			productName = PackedArray;
			productReference = 1A98FA1917A4262700BF09FF /* PackedArraySelfBench */;
			productType = "com.apple.product-type.tool";
		};
		1AEF60D017AD02BA00CA6B64 /* PackedArraySIMDSelfTest */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = 1AEF60D517AD02BA00CA6B64 /* Build configuration list for PBXNativeTarget "PackedArraySIMDSelfTest" */;
			buildPhases = (
				1AEF60D117AD02BA00CA6B64 /* Sources */,
				1AEF60D317AD02BA00CA6B64 /* Frameworks */,
				1AEF60D417AD02BA00CA6B64 /* CopyFiles */,
			);
			buildRules = (
			);
			dependencies = (
			);
			name = PackedArraySIMDSelfTest;
			productName = PackedArray;
			productReference = 1AEF60D817AD02BA00CA6B64 /* PackedArraySIMDSelfTest */;
			productType = "com.apple.product-type.tool";
		};
		1AEF60D917AD02BD00CA6B64 /* PackedArraySIMDSelfBench */ = {
			isa = PBXNativeTarget;
			buildConfigurationList = 1AEF60DE17AD02BD00CA6B64 /* Build configuration list for PBXNativeTarget "PackedArraySIMDSelfBench" */;
			buildPhases = (
				1AEF60DA17AD02BD00CA6B64 /* Sources */,
				1AEF60DC17AD02BD00CA6B64 /* Frameworks */,
				1AEF60DD17AD02BD00CA6B64 /* CopyFiles */,
			);
			buildRules = (
			);
			dependencies = (
			);
			name = PackedArraySIMDSelfBench;
			productName = PackedArray;
			productReference = 1AEF60E117AD02BD00CA6B64 /* PackedArraySIMDSelfBench */;
			productType = "com.apple.product-type.tool";
		};
/* End PBXNativeTarget section */

/* Begin PBXProject section */
		1A98F9F917A4249200BF09FF /* Project object */ = {
			isa = PBXProject;
			attributes = {
				LastUpgradeCheck = 0460;
				ORGANIZATIONNAME = "Gregory Pakosz";
			};
			buildConfigurationList = 1A98F9FC17A4249200BF09FF /* Build configuration list for PBXProject "PackedArray" */;
			compatibilityVersion = "Xcode 3.2";
			developmentRegion = English;
			hasScannedForEncodings = 0;
			knownRegions = (
				en,
			);
			mainGroup = 1A98F9F817A4249200BF09FF;
			productRefGroup = 1A98FA0217A4249200BF09FF /* Products */;
			projectDirPath = "";
			projectRoot = "";
			targets = (
				1A98FA0017A4249200BF09FF /* PackedArraySelfTest */,
				1A98FA1117A4262700BF09FF /* PackedArraySelfBench */,
				1AEF60D017AD02BA00CA6B64 /* PackedArraySIMDSelfTest */,
				1AEF60D917AD02BD00CA6B64 /* PackedArraySIMDSelfBench */,
			);
		};
/* End PBXProject section */

/* Begin PBXSourcesBuildPhase section */
		1A98F9FD17A4249200BF09FF /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				1A98FA1017A424D700BF09FF /* PackedArray.c in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		1A98FA1217A4262700BF09FF /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				1A98FA1317A4262700BF09FF /* PackedArray.c in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		1AEF60D117AD02BA00CA6B64 /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				1AEF60E217AD035900CA6B64 /* PackedArraySIMD.c in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
		1AEF60DA17AD02BD00CA6B64 /* Sources */ = {
			isa = PBXSourcesBuildPhase;
			buildActionMask = 2147483647;
			files = (
				1AEF60E317AD035A00CA6B64 /* PackedArraySIMD.c in Sources */,
			);
			runOnlyForDeploymentPostprocessing = 0;
		};
/* End PBXSourcesBuildPhase section */

/* Begin XCBuildConfiguration section */
		1A98FA0817A4249200BF09FF /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				ARCHS = "$(ARCHS_STANDARD_64_BIT)";
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
				CLANG_CXX_LIBRARY = "libc++";
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
				COPY_PHASE_STRIP = NO;
				GCC_C_LANGUAGE_STANDARD = gnu99;
				GCC_DYNAMIC_NO_PIC = NO;
				GCC_ENABLE_OBJC_EXCEPTIONS = YES;
				GCC_OPTIMIZATION_LEVEL = 0;
				GCC_SYMBOLS_PRIVATE_EXTERN = NO;
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES;
				GCC_WARN_UNINITIALIZED_AUTOS = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				MACOSX_DEPLOYMENT_TARGET = 10.8;
				ONLY_ACTIVE_ARCH = YES;
				PRODUCT_NAME = "$(TARGET_NAME)";
				SDKROOT = macosx;
			};
			name = Debug;
		};
		1A98FA0917A4249200BF09FF /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				ALWAYS_SEARCH_USER_PATHS = NO;
				ARCHS = "$(ARCHS_STANDARD_64_BIT)";
				CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x";
				CLANG_CXX_LIBRARY = "libc++";
				CLANG_WARN_CONSTANT_CONVERSION = YES;
				CLANG_WARN_EMPTY_BODY = YES;
				CLANG_WARN_ENUM_CONVERSION = YES;
				CLANG_WARN_INT_CONVERSION = YES;
				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
				COPY_PHASE_STRIP = YES;
				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
				GCC_C_LANGUAGE_STANDARD = gnu99;
				GCC_ENABLE_OBJC_EXCEPTIONS = YES;
				GCC_OPTIMIZATION_LEVEL = 2;
				GCC_PREPROCESSOR_DEFINITIONS = NDEBUG;
				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
				GCC_WARN_ABOUT_RETURN_TYPE = YES;
				GCC_WARN_UNINITIALIZED_AUTOS = YES;
				GCC_WARN_UNUSED_VARIABLE = YES;
				MACOSX_DEPLOYMENT_TARGET = 10.8;
				PRODUCT_NAME = "$(TARGET_NAME)";
				SDKROOT = macosx;
			};
			name = Release;
		};
		1A98FA0B17A4249200BF09FF /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				GCC_PREPROCESSOR_DEFINITIONS = (
					PACKEDARRAY_SELF_TEST,
					"$(inherited)",
				);
			};
			name = Debug;
		};
		1A98FA0C17A4249200BF09FF /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				GCC_PREPROCESSOR_DEFINITIONS = (
					PACKEDARRAY_SELF_TEST,
					"$(inherited)",
				);
			};
			name = Release;
		};
		1A98FA1717A4262700BF09FF /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				GCC_PREPROCESSOR_DEFINITIONS = (
					PACKEDARRAY_SELF_BENCH,
					NDEBUG,
					"$(inherited)",
				);
			};
			name = Debug;
		};
		1A98FA1817A4262700BF09FF /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				GCC_PREPROCESSOR_DEFINITIONS = (
					PACKEDARRAY_SELF_BENCH,
					NDEBUG,
					"$(inherited)",
				);
			};
			name = Release;
		};
		1AEF60D617AD02BA00CA6B64 /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				GCC_PREPROCESSOR_DEFINITIONS = (
					PACKEDARRAY_SELF_TEST,
					"$(inherited)",
				);
			};
			name = Debug;
		};
		1AEF60D717AD02BA00CA6B64 /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				GCC_PREPROCESSOR_DEFINITIONS = (
					PACKEDARRAY_SELF_TEST,
					"$(inherited)",
				);
			};
			name = Release;
		};
		1AEF60DF17AD02BD00CA6B64 /* Debug */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				GCC_PREPROCESSOR_DEFINITIONS = (
					PACKEDARRAY_SELF_BENCH,
					NDEBUG,
					"$(inherited)",
				);
			};
			name = Debug;
		};
		1AEF60E017AD02BD00CA6B64 /* Release */ = {
			isa = XCBuildConfiguration;
			buildSettings = {
				GCC_PREPROCESSOR_DEFINITIONS = (
					PACKEDARRAY_SELF_BENCH,
					NDEBUG,
					"$(inherited)",
				);
			};
			name = Release;
		};
/* End XCBuildConfiguration section */

/* Begin XCConfigurationList section */
		1A98F9FC17A4249200BF09FF /* Build configuration list for PBXProject "PackedArray" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				1A98FA0817A4249200BF09FF /* Debug */,
				1A98FA0917A4249200BF09FF /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		1A98FA0A17A4249200BF09FF /* Build configuration list for PBXNativeTarget "PackedArraySelfTest" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				1A98FA0B17A4249200BF09FF /* Debug */,
				1A98FA0C17A4249200BF09FF /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		1A98FA1617A4262700BF09FF /* Build configuration list for PBXNativeTarget "PackedArraySelfBench" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				1A98FA1717A4262700BF09FF /* Debug */,
				1A98FA1817A4262700BF09FF /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		1AEF60D517AD02BA00CA6B64 /* Build configuration list for PBXNativeTarget "PackedArraySIMDSelfTest" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				1AEF60D617AD02BA00CA6B64 /* Debug */,
				1AEF60D717AD02BA00CA6B64 /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
		1AEF60DE17AD02BD00CA6B64 /* Build configuration list for PBXNativeTarget "PackedArraySIMDSelfBench" */ = {
			isa = XCConfigurationList;
			buildConfigurations = (
				1AEF60DF17AD02BD00CA6B64 /* Debug */,
				1AEF60E017AD02BD00CA6B64 /* Release */,
			);
			defaultConfigurationIsVisible = 0;
			defaultConfigurationName = Release;
		};
/* End XCConfigurationList section */
	};
	rootObject = 1A98F9F917A4249200BF09FF /* Project object */;
}


================================================
FILE: _mac-xcode/PackedArray.xcodeproj/project.xcworkspace/contents.xcworkspacedata
================================================
<?xml version="1.0" encoding="UTF-8"?>
<Workspace
   version = "1.0">
   <FileRef
      location = "self:PackedArray.xcodeproj">
   </FileRef>
</Workspace>


================================================
FILE: _win-vs11/.gitignore
================================================
/build
/ipch

*.suo
*.sdf
*.opensdf
*.user
*.sln.docstates


================================================
FILE: _win-vs11/Common.props
================================================
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <PropertyGroup Label="UserMacros">
    <BIN_DIR>..\bin\Windows$(PLATFORM_SUFFIX)-$(ARCH)$(CONF_SUFFIX)</BIN_DIR>
    <LIB_DIR>..\lib\Windows$(PLATFORM_SUFFIX)-$(ARCH)-vs11$(CONF_SUFFIX)</LIB_DIR>
    <BUILD_DIR>build\$(ProjectName)-win$(PLATFORM_SUFFIX)-$(ARCH)-vs11$(CONF_SUFFIX)</BUILD_DIR>
  </PropertyGroup>
  <PropertyGroup>
    <IntDir>$(BUILD_DIR)\</IntDir>
    <OutDir>$(BIN_DIR)\</OutDir>
    <LinkIncremental>false</LinkIncremental>
  </PropertyGroup>
  <ItemDefinitionGroup>
    <ClCompile>
      <PreprocessorDefinitions>_CRT_SECURE_NO_DEPRECATE;_CRT_NONSTDC_NO_DEPRECATE;_SCL_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <MultiProcessorCompilation>true</MultiProcessorCompilation>
      <AdditionalOptions>/we4013 /wd4820 /we4289 /wd4342 /wd4347 /wd4514 /we4545 /we4546 /we4547 /we4548 /we4549 /we4619 /we4623 /we4625 /we4626 /wd4710 /we4836 /we4905 /we4906 /we4928 /we4946 /wd4986 /wd4711 /wd4350</AdditionalOptions>
      <PrecompiledHeader>NotUsing</PrecompiledHeader>
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <WarningLevel>EnableAllWarnings</WarningLevel>
    </ClCompile>
    <Link>
      <GenerateDebugInformation>true</GenerateDebugInformation>
      <SubSystem>Console</SubSystem>
      <AdditionalOptions>/time %(AdditionalOptions)</AdditionalOptions>
    </Link>
  </ItemDefinitionGroup>
  <ItemGroup>
    <BuildMacro Include="BIN_DIR">
      <Value>$(BIN_DIR)</Value>
    </BuildMacro>
    <BuildMacro Include="LIB_DIR">
      <Value>$(LIB_DIR)</Value>
    </BuildMacro>
    <BuildMacro Include="BUILD_DIR">
      <Value>$(BUILD_DIR)</Value>
    </BuildMacro>
  </ItemGroup>
</Project>

================================================
FILE: _win-vs11/Debug.props
================================================
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <PropertyGroup Label="UserMacros">
    <CONF_SUFFIX>-Debug</CONF_SUFFIX>
  </PropertyGroup>
  <ItemDefinitionGroup>
    <ClCompile>
      <Optimization>Disabled</Optimization>
      <WarningLevel>Level3</WarningLevel>
      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
      <IntrinsicFunctions>true</IntrinsicFunctions>
      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
    </ClCompile>
  </ItemDefinitionGroup>
  <ItemGroup>
    <BuildMacro Include="CONF_SUFFIX">
      <Value>$(CONF_SUFFIX)</Value>
    </BuildMacro>
  </ItemGroup>
</Project>


================================================
FILE: _win-vs11/PackedArray.sln
================================================

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Express 2012 for Windows Desktop
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PackedArraySelfTest", "PackedArraySelfTest.vcxproj", "{7BC8C873-A2C7-43A1-BD8A-2F1731F3CB81}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PackedArraySelfBench", "PackedArraySelfBench.vcxproj", "{686B991E-01AD-4433-897E-DFD5E751DAF5}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PackedArraySIMDSelfBench", "PackedArraySIMDSelfBench.vcxproj", "{6082A77F-4ADC-4E49-89BA-BFE1C9E69C9D}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PackedArraySIMDSelfTest", "PackedArraySIMDSelfTest.vcxproj", "{2F15B46B-D5F3-46B4-A5A2-ECD62B9FD848}"
EndProject
Global
	GlobalSection(SolutionConfigurationPlatforms) = preSolution
		Debug|Win32 = Debug|Win32
		Debug|x64 = Debug|x64
		Release|Win32 = Release|Win32
		Release|x64 = Release|x64
	EndGlobalSection
	GlobalSection(ProjectConfigurationPlatforms) = postSolution
		{7BC8C873-A2C7-43A1-BD8A-2F1731F3CB81}.Debug|Win32.ActiveCfg = Debug|Win32
		{7BC8C873-A2C7-43A1-BD8A-2F1731F3CB81}.Debug|Win32.Build.0 = Debug|Win32
		{7BC8C873-A2C7-43A1-BD8A-2F1731F3CB81}.Debug|x64.ActiveCfg = Debug|x64
		{7BC8C873-A2C7-43A1-BD8A-2F1731F3CB81}.Debug|x64.Build.0 = Debug|x64
		{7BC8C873-A2C7-43A1-BD8A-2F1731F3CB81}.Release|Win32.ActiveCfg = Release|Win32
		{7BC8C873-A2C7-43A1-BD8A-2F1731F3CB81}.Release|Win32.Build.0 = Release|Win32
		{7BC8C873-A2C7-43A1-BD8A-2F1731F3CB81}.Release|x64.ActiveCfg = Release|x64
		{7BC8C873-A2C7-43A1-BD8A-2F1731F3CB81}.Release|x64.Build.0 = Release|x64
		{686B991E-01AD-4433-897E-DFD5E751DAF5}.Debug|Win32.ActiveCfg = Debug|Win32
		{686B991E-01AD-4433-897E-DFD5E751DAF5}.Debug|Win32.Build.0 = Debug|Win32
		{686B991E-01AD-4433-897E-DFD5E751DAF5}.Debug|x64.ActiveCfg = Debug|x64
		{686B991E-01AD-4433-897E-DFD5E751DAF5}.Debug|x64.Build.0 = Debug|x64
		{686B991E-01AD-4433-897E-DFD5E751DAF5}.Release|Win32.ActiveCfg = Release|Win32
		{686B991E-01AD-4433-897E-DFD5E751DAF5}.Release|Win32.Build.0 = Release|Win32
		{686B991E-01AD-4433-897E-DFD5E751DAF5}.Release|x64.ActiveCfg = Release|x64
		{686B991E-01AD-4433-897E-DFD5E751DAF5}.Release|x64.Build.0 = Release|x64
		{6082A77F-4ADC-4E49-89BA-BFE1C9E69C9D}.Debug|Win32.ActiveCfg = Debug|Win32
		{6082A77F-4ADC-4E49-89BA-BFE1C9E69C9D}.Debug|Win32.Build.0 = Debug|Win32
		{6082A77F-4ADC-4E49-89BA-BFE1C9E69C9D}.Debug|x64.ActiveCfg = Debug|x64
		{6082A77F-4ADC-4E49-89BA-BFE1C9E69C9D}.Debug|x64.Build.0 = Debug|x64
		{6082A77F-4ADC-4E49-89BA-BFE1C9E69C9D}.Release|Win32.ActiveCfg = Release|Win32
		{6082A77F-4ADC-4E49-89BA-BFE1C9E69C9D}.Release|Win32.Build.0 = Release|Win32
		{6082A77F-4ADC-4E49-89BA-BFE1C9E69C9D}.Release|x64.ActiveCfg = Release|x64
		{6082A77F-4ADC-4E49-89BA-BFE1C9E69C9D}.Release|x64.Build.0 = Release|x64
		{2F15B46B-D5F3-46B4-A5A2-ECD62B9FD848}.Debug|Win32.ActiveCfg = Debug|Win32
		{2F15B46B-D5F3-46B4-A5A2-ECD62B9FD848}.Debug|Win32.Build.0 = Debug|Win32
		{2F15B46B-D5F3-46B4-A5A2-ECD62B9FD848}.Debug|x64.ActiveCfg = Debug|x64
		{2F15B46B-D5F3-46B4-A5A2-ECD62B9FD848}.Debug|x64.Build.0 = Debug|x64
		{2F15B46B-D5F3-46B4-A5A2-ECD62B9FD848}.Release|Win32.ActiveCfg = Release|Win32
		{2F15B46B-D5F3-46B4-A5A2-ECD62B9FD848}.Release|Win32.Build.0 = Release|Win32
		{2F15B46B-D5F3-46B4-A5A2-ECD62B9FD848}.Release|x64.ActiveCfg = Release|x64
		{2F15B46B-D5F3-46B4-A5A2-ECD62B9FD848}.Release|x64.Build.0 = Release|x64
	EndGlobalSection
	GlobalSection(SolutionProperties) = preSolution
		HideSolutionNode = FALSE
	EndGlobalSection
EndGlobal


================================================
FILE: _win-vs11/PackedArraySIMDSelfBench.vcxproj
================================================
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <ItemGroup Label="ProjectConfigurations">
    <ProjectConfiguration Include="Debug|Win32">
      <Configuration>Debug</Configuration>
      <Platform>Win32</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Debug|x64">
      <Configuration>Debug</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Release|Win32">
      <Configuration>Release</Configuration>
      <Platform>Win32</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Release|x64">
      <Configuration>Release</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
  </ItemGroup>
  <PropertyGroup Label="Globals">
    <ProjectGuid>{6082A77F-4ADC-4E49-89BA-BFE1C9E69C9D}</ProjectGuid>
    <Keyword>Win32Proj</Keyword>
    <RootNamespace>PackedArraySIMD</RootNamespace>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>true</UseDebugLibraries>
    <PlatformToolset>v110</PlatformToolset>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>true</UseDebugLibraries>
    <PlatformToolset>v110</PlatformToolset>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
    <PlatformToolset>v110</PlatformToolset>
    <WholeProgramOptimization>true</WholeProgramOptimization>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
    <PlatformToolset>v110</PlatformToolset>
    <WholeProgramOptimization>true</WholeProgramOptimization>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
  <ImportGroup Label="ExtensionSettings">
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
    <Import Project="x86.props" />
    <Import Project="Debug.props" />
    <Import Project="Common.props" />
  </ImportGroup>
  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
    <Import Project="x64.props" />
    <Import Project="Debug.props" />
    <Import Project="Common.props" />
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
    <Import Project="x86.props" />
    <Import Project="Release.props" />
    <Import Project="Common.props" />
  </ImportGroup>
  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
    <Import Project="x64.props" />
    <Import Project="Release.props" />
    <Import Project="Common.props" />
  </ImportGroup>
  <PropertyGroup Label="UserMacros" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <ClCompile>
      <PreprocessorDefinitions>PACKEDARRAY_SELF_BENCH;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </ClCompile>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <ClCompile>
      <PreprocessorDefinitions>PACKEDARRAY_SELF_BENCH;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </ClCompile>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
    <ClCompile>
      <PreprocessorDefinitions>PACKEDARRAY_SELF_BENCH;NDEBUG%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </ClCompile>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <ClCompile>
      <PreprocessorDefinitions>PACKEDARRAY_SELF_BENCH;NDEBUG%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </ClCompile>
  </ItemDefinitionGroup>
  <ItemGroup>
    <ClCompile Include="..\PackedArraySIMD.c" />
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="..\PackedArray.h" />
  </ItemGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
  </ImportGroup>
</Project>

================================================
FILE: _win-vs11/PackedArraySIMDSelfTest.vcxproj
================================================
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <ItemGroup Label="ProjectConfigurations">
    <ProjectConfiguration Include="Debug|Win32">
      <Configuration>Debug</Configuration>
      <Platform>Win32</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Debug|x64">
      <Configuration>Debug</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Release|Win32">
      <Configuration>Release</Configuration>
      <Platform>Win32</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Release|x64">
      <Configuration>Release</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
  </ItemGroup>
  <PropertyGroup Label="Globals">
    <ProjectGuid>{2F15B46B-D5F3-46B4-A5A2-ECD62B9FD848}</ProjectGuid>
    <Keyword>Win32Proj</Keyword>
    <RootNamespace>PackedArraySIMD</RootNamespace>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>true</UseDebugLibraries>
    <PlatformToolset>v110</PlatformToolset>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>true</UseDebugLibraries>
    <PlatformToolset>v110</PlatformToolset>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
    <PlatformToolset>v110</PlatformToolset>
    <WholeProgramOptimization>true</WholeProgramOptimization>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
    <PlatformToolset>v110</PlatformToolset>
    <WholeProgramOptimization>true</WholeProgramOptimization>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
  <ImportGroup Label="ExtensionSettings">
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
    <Import Project="x86.props" />
    <Import Project="Debug.props" />
    <Import Project="Common.props" />
  </ImportGroup>
  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
    <Import Project="x64.props" />
    <Import Project="Debug.props" />
    <Import Project="Common.props" />
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
    <Import Project="x86.props" />
    <Import Project="Release.props" />
    <Import Project="Common.props" />
  </ImportGroup>
  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
    <Import Project="x64.props" />
    <Import Project="Release.props" />
    <Import Project="Common.props" />
  </ImportGroup>
  <PropertyGroup Label="UserMacros" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <ClCompile>
      <PreprocessorDefinitions>PACKEDARRAY_SELF_TEST;%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </ClCompile>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <ClCompile>
      <PreprocessorDefinitions>PACKEDARRAY_SELF_TEST;%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </ClCompile>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
    <ClCompile>
      <PreprocessorDefinitions>PACKEDARRAY_SELF_TEST;%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </ClCompile>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <ClCompile>
      <PreprocessorDefinitions>PACKEDARRAY_SELF_TEST;%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </ClCompile>
  </ItemDefinitionGroup>
  <ItemGroup>
    <ClCompile Include="..\PackedArraySIMD.c" />
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="..\PackedArray.h" />
  </ItemGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
  </ImportGroup>
</Project>

================================================
FILE: _win-vs11/PackedArraySelfBench.vcxproj
================================================
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <ItemGroup Label="ProjectConfigurations">
    <ProjectConfiguration Include="Debug|Win32">
      <Configuration>Debug</Configuration>
      <Platform>Win32</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Debug|x64">
      <Configuration>Debug</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Release|Win32">
      <Configuration>Release</Configuration>
      <Platform>Win32</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Release|x64">
      <Configuration>Release</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
  </ItemGroup>
  <PropertyGroup Label="Globals">
    <ProjectGuid>{686B991E-01AD-4433-897E-DFD5E751DAF5}</ProjectGuid>
    <Keyword>Win32Proj</Keyword>
    <RootNamespace>PackedArray</RootNamespace>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>true</UseDebugLibraries>
    <PlatformToolset>v110</PlatformToolset>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>true</UseDebugLibraries>
    <PlatformToolset>v110</PlatformToolset>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
    <PlatformToolset>v110</PlatformToolset>
    <WholeProgramOptimization>true</WholeProgramOptimization>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
    <PlatformToolset>v110</PlatformToolset>
    <WholeProgramOptimization>true</WholeProgramOptimization>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
  <ImportGroup Label="ExtensionSettings">
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
    <Import Project="x86.props" />
    <Import Project="Debug.props" />
    <Import Project="Common.props" />
  </ImportGroup>
  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
    <Import Project="x64.props" />
    <Import Project="Debug.props" />
    <Import Project="Common.props" />
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
    <Import Project="x86.props" />
    <Import Project="Release.props" />
    <Import Project="Common.props" />
  </ImportGroup>
  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
    <Import Project="x64.props" />
    <Import Project="Release.props" />
    <Import Project="Common.props" />
  </ImportGroup>
  <PropertyGroup Label="UserMacros" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <ClCompile>
      <PreprocessorDefinitions>PACKEDARRAY_SELF_BENCH;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </ClCompile>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <ClCompile>
      <PreprocessorDefinitions>PACKEDARRAY_SELF_BENCH;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </ClCompile>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
    <ClCompile>
      <PreprocessorDefinitions>PACKEDARRAY_SELF_BENCH;NDEBUG%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </ClCompile>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <ClCompile>
      <PreprocessorDefinitions>PACKEDARRAY_SELF_BENCH;NDEBUG%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </ClCompile>
  </ItemDefinitionGroup>
  <ItemGroup>
    <ClCompile Include="..\PackedArray.c" />
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="..\PackedArray.h" />
  </ItemGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
  </ImportGroup>
</Project>

================================================
FILE: _win-vs11/PackedArraySelfTest.vcxproj
================================================
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <ItemGroup Label="ProjectConfigurations">
    <ProjectConfiguration Include="Debug|Win32">
      <Configuration>Debug</Configuration>
      <Platform>Win32</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Debug|x64">
      <Configuration>Debug</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Release|Win32">
      <Configuration>Release</Configuration>
      <Platform>Win32</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Release|x64">
      <Configuration>Release</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
  </ItemGroup>
  <PropertyGroup Label="Globals">
    <ProjectGuid>{7BC8C873-A2C7-43A1-BD8A-2F1731F3CB81}</ProjectGuid>
    <Keyword>Win32Proj</Keyword>
    <RootNamespace>PackedArray</RootNamespace>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>true</UseDebugLibraries>
    <PlatformToolset>v110</PlatformToolset>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>true</UseDebugLibraries>
    <PlatformToolset>v110</PlatformToolset>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
    <PlatformToolset>v110</PlatformToolset>
    <WholeProgramOptimization>true</WholeProgramOptimization>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
    <PlatformToolset>v110</PlatformToolset>
    <WholeProgramOptimization>true</WholeProgramOptimization>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
  <ImportGroup Label="ExtensionSettings">
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
    <Import Project="x86.props" />
    <Import Project="Debug.props" />
    <Import Project="Common.props" />
  </ImportGroup>
  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
    <Import Project="x64.props" />
    <Import Project="Debug.props" />
    <Import Project="Common.props" />
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
    <Import Project="x86.props" />
    <Import Project="Release.props" />
    <Import Project="Common.props" />
  </ImportGroup>
  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
    <Import Project="x64.props" />
    <Import Project="Release.props" />
    <Import Project="Common.props" />
  </ImportGroup>
  <PropertyGroup Label="UserMacros" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" />
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <ClCompile>
      <PreprocessorDefinitions>PACKEDARRAY_SELF_TEST;%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </ClCompile>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <ClCompile>
      <PreprocessorDefinitions>PACKEDARRAY_SELF_TEST;%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </ClCompile>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
    <ClCompile>
      <PreprocessorDefinitions>PACKEDARRAY_SELF_TEST;%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </ClCompile>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <ClCompile>
      <PreprocessorDefinitions>PACKEDARRAY_SELF_TEST;%(PreprocessorDefinitions)</PreprocessorDefinitions>
    </ClCompile>
  </ItemDefinitionGroup>
  <ItemGroup>
    <ClCompile Include="..\PackedArray.c" />
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="..\PackedArray.h" />
  </ItemGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
  </ImportGroup>
</Project>

================================================
FILE: _win-vs11/Release.props
================================================
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <PropertyGroup Label="UserMacros">
    <CONF_SUFFIX />
  </PropertyGroup>
  <PropertyGroup>
    <LinkIncremental>false</LinkIncremental>
  </PropertyGroup>
  <ItemDefinitionGroup>
    <ClCompile>
      <Optimization>MaxSpeed</Optimization>
      <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
      <IntrinsicFunctions>true</IntrinsicFunctions>
      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
      <OmitFramePointers>true</OmitFramePointers>
      <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
      <WholeProgramOptimization>true</WholeProgramOptimization>
      <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <StringPooling>true</StringPooling>
      <WarningLevel>Level3</WarningLevel>
      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
      <BufferSecurityCheck>false</BufferSecurityCheck>
      <AdditionalOptions>/d2Zi+ %(AdditionalOptions)</AdditionalOptions>
    </ClCompile>
    <Link>
      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
      <EnableCOMDATFolding>true</EnableCOMDATFolding>
    </Link>
    <Lib>
      <LinkTimeCodeGeneration>true</LinkTimeCodeGeneration>
    </Lib>
  </ItemDefinitionGroup>
  <ItemGroup>
    <BuildMacro Include="CONF_SUFFIX">
      <Value>$(CONF_SUFFIX)</Value>
    </BuildMacro>
  </ItemGroup>
</Project>


================================================
FILE: _win-vs11/x64.props
================================================
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <PropertyGroup Label="UserMacros">
    <ARCH>x64</ARCH>
  </PropertyGroup>
  <ItemDefinitionGroup>
    <Link>
      <TargetMachine>MachineX64</TargetMachine>
    </Link>
  </ItemDefinitionGroup>
  <ItemGroup>
    <BuildMacro Include="ARCH">
      <Value>$(ARCH)</Value>
    </BuildMacro>
  </ItemGroup>
</Project>


================================================
FILE: _win-vs11/x86.props
================================================
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <PropertyGroup Label="UserMacros">
    <ARCH>x86</ARCH>
  </PropertyGroup>
  <ItemDefinitionGroup>
    <Link>
      <TargetMachine>MachineX86</TargetMachine>
    </Link>
    <ClCompile>
      <EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
    </ClCompile>
  </ItemDefinitionGroup>
  <ItemGroup>
    <BuildMacro Include="ARCH">
      <Value>$(ARCH)</Value>
    </BuildMacro>
  </ItemGroup>
</Project>

================================================
FILE: benchmark/PackedArraySIMDSelfBench-unrolled-galaxy-note-cortex-a9-1.4GHz.txt
================================================
-- PackedArray self bench ------------------------------------------------------
memcpy:
bits	size (B)	time (µs)	speed (B/µs)
  32	       4	    0.260	      15.392
  32	       8	    0.225	      35.583
  32	      16	    0.223	      71.774
  32	      32	    0.363	      88.127
  32	      64	    0.424	     150.976
  32	     128	    0.509	     251.462
  32	     256	    0.767	     333.771
  32	     512	    1.049	     488.175
  32	    1024	    1.824	     561.434
  32	    2048	    3.328	     615.413
  32	    4096	    6.297	     650.482
  32	    8192	   12.142	     674.686
  32	   16384	   23.797	     688.489
  32	   32768	   10.177	    3219.842
  32	   65536	   27.135	    2415.171
  32	  131072	   55.138	    2377.168
  32	  262144	  132.314	    1981.227
  32	  524288	  351.372	    1492.117
  32	 1048576	 1400.464	     748.735
avg (B/µs)	min (B/µs)	max (B/µs)
   887.370	    15.392	  3219.842

loopcpy:
bits	size (B)	time (µs)	speed (B/µs)
  32	       4	    0.037	     108.240
  32	       8	    0.038	     209.715
  32	      16	    0.038	     422.068
  32	      32	    0.063	     506.482
  32	      64	    0.071	     900.790
  32	     128	    0.086	    1491.308
  32	     256	    0.116	    2209.345
  32	     512	    0.176	    2909.869
  32	    1024	    0.306	    3344.990
  32	    2048	    0.546	    3751.063
  32	    4096	    1.026	    3991.605
  32	    8192	    2.497	    3280.792
  32	   16384	    4.508	    3634.413
  32	   32768	    8.462	    3872.392
  32	   65536	   27.297	    2400.848
  32	  131072	   55.625	    2356.342
  32	  262144	  185.746	    1411.304
  32	  524288	  630.057	     832.128
  32	 1048576	 1377.527	     761.202
avg (B/µs)	min (B/µs)	max (B/µs)
  2020.784	   108.240	  3991.605

pack:	        	         	            	unpack:	        	         	            	
bits	size (B)	time (µs)	speed (B/µs)	bits	size (B)	time (µs)	speed (B/µs)
   1	       4	    0.038	     105.517	   1	       4	    0.062	      64.528
   1	       8	    0.044	     182.361	   1	       8	    0.039	     204.600
   1	      16	    0.052	     307.839	   1	      16	    0.039	     409.200
   1	      32	    0.054	     591.268	   1	      32	    0.051	     627.186
   1	      64	    0.060	    1065.220	   1	      64	    0.065	     986.895
   1	     128	    0.074	    1731.842	   1	     128	    0.095	    1345.541
   1	     256	    0.106	    2412.903	   1	     256	    0.155	    1651.910
   1	     512	    0.166	    3081.038	   1	     512	    0.277	    1848.093
   1	    1024	    0.303	    3379.203	   1	    1024	    0.517	    1980.160
   1	    2048	    0.579	    3537.864	   1	    2048	    1.001	    2046.197
   1	    4096	    1.187	    3450.466	   1	    4096	    1.969	    2080.139
   1	    8192	    2.261	    3623.298	   1	    8192	    3.902	    2099.459
   1	   16384	    4.463	    3670.912	   1	   16384	    7.797	    2101.320
   1	   32768	   12.762	    2567.609	   1	   32768	   15.681	    2089.659
   1	   65536	   31.981	    2049.217	   1	   65536	   31.053	    2110.468
   1	  131072	   71.963	    1821.379	   1	  131072	   62.428	    2099.571
   1	  262144	  151.636	    1728.770	   1	  262144	  124.880	    2099.166
   1	  524288	  309.744	    1692.649	   1	  524288	  252.277	    2078.224
   1	 1048576	  773.824	    1355.058	   1	 1048576	  533.997	    1963.636
avg (B/µs)	min (B/µs)	max (B/µs)		avg (B/µs)	min (B/µs)	max (B/µs)
  2018.653	   105.517	  3670.912		  1572.945	    64.528	  2110.468

pack:	        	         	            	unpack:	        	         	            	
bits	size (B)	time (µs)	speed (B/µs)	bits	size (B)	time (µs)	speed (B/µs)
   2	       4	    0.039	     102.928	   2	       4	    0.067	      59.705
   2	       8	    0.049	     162.886	   2	       8	    0.043	     185.384
   2	      16	    0.054	     296.942	   2	      16	    0.043	     372.827
   2	      32	    0.057	     561.580	   2	      32	    0.050	     642.190
   2	      64	    0.063	    1016.801	   2	      64	    0.065	     983.280
   2	     128	    0.076	    1682.981	   2	     128	    0.095	    1345.541
   2	     256	    0.103	    2485.513	   2	     256	    0.155	    1651.910
   2	     512	    0.176	    2909.869	   2	     512	    0.279	    1833.889
   2	    1024	    0.316	    3239.040	   2	    1024	    0.521	    1965.660
   2	    2048	    0.637	    3215.999	   2	    2048	    1.007	    2033.602
   2	    4096	    1.179	    3474.190	   2	    4096	    1.987	    2061.419
   2	    8192	    2.334	    3510.036	   2	    8192	    3.953	    2072.361
   2	   16384	    4.602	    3560.041	   2	   16384	    7.857	    2085.315
   2	   32768	   13.053	    2510.392	   2	   32768	   15.936	    2056.238
   2	   65536	   34.460	    1901.795	   2	   65536	   31.283	    2094.946
   2	  131072	   73.934	    1772.823	   2	  131072	   62.702	    2090.398
   2	  262144	  149.351	    1755.222	   2	  262144	  133.610	    1962.009
   2	  524288	  432.895	    1211.121	   2	  524288	  275.559	    1902.635
   2	 1048576	  888.796	    1179.771	   2	 1048576	  556.340	    1884.776
avg (B/µs)	min (B/µs)	max (B/µs)		avg (B/µs)	min (B/µs)	max (B/µs)
  1923.681	   102.928	  3560.041		  1541.268	    59.705	  2094.946

pack:	        	         	            	unpack:	        	         	            	
bits	size (B)	time (µs)	speed (B/µs)	bits	size (B)	time (µs)	speed (B/µs)
   3	       4	    0.042	      95.325	   3	       4	    0.070	      57.260
   3	       8	    0.054	     147.817	   3	       8	    0.047	     170.327
   3	      16	    0.056	     285.570	   3	      16	    0.039	     409.200
   3	      32	    0.058	     550.073	   3	      32	    0.051	     627.186
   3	      64	    0.064	    1001.625	   3	      64	    0.066	     972.592
   3	     128	    0.078	    1636.802	   3	     128	    0.096	    1335.500
   3	     256	    0.113	    2265.278	   3	     256	    0.165	    1551.650
   3	     512	    0.177	    2890.288	   3	     512	    0.296	    1730.446
   3	    1024	    0.322	    3181.457	   3	    1024	    0.556	    1841.753
   3	    2048	    0.614	    3335.897	   3	    2048	    1.076	    1903.376
   3	    4096	    1.248	    3281.732	   3	    4096	    2.116	    1935.760
   3	    8192	    2.378	    3444.931	   3	    8192	    4.213	    1944.414
   3	   16384	    4.734	    3460.892	   3	   16384	    8.385	    1953.979
   3	   32768	   13.400	    2445.402	   3	   32768	   16.724	    1959.327
   3	   65536	   34.637	    1892.082	   3	   65536	   33.432	    1960.277
   3	  131072	   77.480	    1691.686	   3	  131072	   67.093	    1953.590
   3	  262144	  173.980	    1506.748	   3	  262144	  136.542	    1919.877
   3	  524288	  337.184	    1554.902	   3	  524288	  287.618	    1822.861
   3	 1048576	  817.251	    1283.052	   3	 1048576	  647.711	    1618.895
avg (B/µs)	min (B/µs)	max (B/µs)		avg (B/µs)	min (B/µs)	max (B/µs)
  1892.187	    95.325	  3460.892		  1456.225	    57.260	  1960.277

pack:	        	         	            	unpack:	        	         	            	
bits	size (B)	time (µs)	speed (B/µs)	bits	size (B)	time (µs)	speed (B/µs)
   4	       4	    0.083	      48.210	   4	       4	    0.034	     117.323
   4	       8	    0.051	     156.796	   4	       8	    0.044	     182.361
   4	      16	    0.057	     279.620	   4	      16	    0.040	     399.458
   4	      32	    0.058	     552.336	   4	      32	    0.049	     651.542
   4	      64	    0.065	     983.280	   4	      64	    0.063	    1016.801
   4	     128	    0.067	    1910.573	   4	     128	    0.094	    1362.617
   4	     256	    0.105	    2440.322	   4	     256	    0.155	    1651.910
   4	     512	    0.180	    2844.349	   4	     512	    0.281	    1821.445
   4	    1024	    0.330	    3103.300	   4	    1024	    0.526	    1946.066
   4	    2048	    0.630	    3250.070	   4	    2048	    1.071	    1912.274
   4	    4096	    1.245	    3289.902	   4	    4096	    2.081	    1968.137
   4	    8192	    2.497	    3280.792	   4	    8192	    4.149	    1974.471
   4	   16384	    4.888	    3352.006	   4	   16384	    8.034	    2039.335
   4	   32768	   14.355	    2282.698	   4	   32768	   16.054	    2041.122
   4	   65536	   37.628	    1741.685	   4	   65536	   45.911	    1427.463
   4	  131072	   91.421	    1433.720	   4	  131072	   70.325	    1863.800
   4	  262144	  176.782	    1482.865	   4	  262144	  151.455	    1730.838
   4	  524288	  377.989	    1387.046	   4	  524288	  287.518	    1823.496
   4	 1048576	  864.127	    1213.451	   4	 1048576	  601.741	    1742.570
avg (B/µs)	min (B/µs)	max (B/µs)		avg (B/µs)	min (B/µs)	max (B/µs)
  1843.843	    48.210	  3352.006		  1456.475	   117.323	  2041.122

pack:	        	         	            	unpack:	        	         	            	
bits	size (B)	time (µs)	speed (B/µs)	bits	size (B)	time (µs)	speed (B/µs)
   5	       4	    0.082	      48.771	   5	       4	    0.040	      99.864
   5	       8	    0.057	     140.395	   5	       8	    0.053	     150.468
   5	      16	    0.055	     291.778	   5	      16	    0.038	     422.068
   5	      32	    0.058	     552.336	   5	      32	    0.051	     630.130
   5	      64	    0.085	     754.032	   5	      64	    0.066	     969.081
   5	     128	    0.083	    1542.733	   5	     128	    0.103	    1242.757
   5	     256	    0.116	    2204.809	   5	     256	    0.174	    1470.879
   5	     512	    0.185	    2767.376	   5	     512	    0.315	    1625.650
   5	    1024	    0.339	    3020.371	   5	    1024	    0.593	    1726.967
   5	    2048	    0.648	    3160.388	   5	    2048	    1.191	    1719.707
   5	    4096	    1.278	    3204.602	   5	    4096	    2.262	    1810.694
   5	    8192	    2.593	    3159.516	   5	    8192	    4.507	    1817.591
   5	   16384	    5.010	    3270.176	   5	   16384	    8.975	    1825.558
   5	   32768	   14.040	    2333.904	   5	   32768	   17.914	    1829.202
   5	   65536	   33.340	    1965.688	   5	   65536	   36.246	    1808.086
   5	  131072	   73.165	    1791.459	   5	  131072	   71.988	    1820.751
   5	  262144	  165.369	    1585.208	   5	  262144	  154.613	    1695.485
   5	  524288	  323.975	    1618.297	   5	  524288	  333.541	    1571.885
   5	 1048576	  975.722	    1074.667	   5	 1048576	  914.320	    1146.837
avg (B/µs)	min (B/µs)	max (B/µs)		avg (B/µs)	min (B/µs)	max (B/µs)
  1815.079	    48.771	  3270.176		  1335.982	    99.864	  1829.202

pack:	        	         	            	unpack:	        	         	            	
bits	size (B)	time (µs)	speed (B/µs)	bits	size (B)	time (µs)	speed (B/µs)
   6	       4	    0.108	      37.036	   6	       4	    0.040	     100.462
   6	       8	    0.136	      58.867	   6	       8	    0.055	     145.257
   6	      16	    0.057	     280.790	   6	      16	    0.040	     399.458
   6	      32	    0.059	     543.392	   6	      32	    0.051	     627.186
   6	      64	    0.066	     969.081	   6	      64	    0.066	     969.081
   6	     128	    0.080	    1597.830	   6	     128	    0.106	    1206.451
   6	     256	    0.112	    2284.557	   6	     256	    0.174	    1470.879
   6	     512	    0.191	    2681.003	   6	     512	    0.316	    1619.520
   6	    1024	    0.349	    2933.721	   6	    1024	    0.596	    1717.987
   6	    2048	    0.665	    3079.934	   6	    2048	    1.155	    1772.948
   6	    4096	    3.067	    1335.500	   6	    4096	    2.279	    1797.245
   6	    8192	    2.619	    3127.878	   6	    8192	    4.562	    1795.743
   6	   16384	    5.137	    3189.431	   6	   16384	   10.831	    1512.712
   6	   32768	   16.548	    1980.189	   6	   32768	   21.795	    1503.462
   6	   65536	   42.263	    1550.670	   6	   65536	   38.935	    1683.218
   6	  131072	   87.775	    1493.269	   6	  131072	   73.146	    1791.926
   6	  262144	  165.623	    1582.774	   6	  262144	  163.291	    1605.380
   6	  524288	  328.345	    1596.759	   6	  524288	  372.375	    1407.957
   6	 1048576	  966.395	    1085.039	   6	 1048576	  723.185	    1449.942
avg (B/µs)	min (B/µs)	max (B/µs)		avg (B/µs)	min (B/µs)	max (B/µs)
  1653.038	    37.036	  3189.431		  1293.517	   100.462	  1797.245

pack:	        	         	            	unpack:	        	         	            	
bits	size (B)	time (µs)	speed (B/µs)	bits	size (B)	time (µs)	speed (B/µs)
   7	       4	    0.079	      50.534	   7	       4	    0.050	      80.274
   7	       8	    0.057	     140.395	   7	       8	    0.055	     145.257
   7	      16	    0.056	     285.570	   7	      16	    0.038	     419.430
   7	      32	    0.059	     541.201	   7	      32	    0.051	     627.186
   7	      64	    0.065	     983.280	   7	      64	    0.066	     969.081
   7	     128	    0.080	    1597.830	   7	     128	    0.104	    1231.355
   7	     256	    0.121	    2113.665	   7	     256	    0.183	    1398.101
   7	     512	    0.194	    2638.186	   7	     512	    0.332	    1541.625
   7	    1024	    0.356	    2876.736	   7	    1024	    0.676	    1514.980
   7	    2048	    0.681	    3006.627	   7	    2048	    1.222	    1676.085
   7	    4096	    1.343	    3049.861	   7	    4096	    2.425	    1689.103
   7	    8192	    2.644	    3098.263	   7	    8192	    4.798	    1707.316
   7	   16384	    5.262	    3113.564	   7	   16384	    9.565	    1712.934
   7	   32768	   14.529	    2255.353	   7	   32768	   19.097	    1715.885
   7	   65536	   35.618	    1839.965	   7	   65536	   38.983	    1681.138
   7	  131072	   82.084	    1596.800	   7	  131072	   80.429	    1629.659
   7	  262144	  163.769	    1600.694	   7	  262144	  173.056	    1514.792
   7	  524288	  369.750	    1417.953	   7	  524288	  422.401	    1241.209
   7	 1048576	 1355.063	     773.821	   7	 1048576	  827.127	    1267.733
avg (B/µs)	min (B/µs)	max (B/µs)		avg (B/µs)	min (B/µs)	max (B/µs)
  1735.805	    50.534	  3113.564		  1250.692	    80.274	  1715.885

pack:	        	         	            	unpack:	        	         	            	
bits	size (B)	time (µs)	speed (B/µs)	bits	size (B)	time (µs)	speed (B/µs)
   8	       4	    0.042	      95.325	   8	       4	    0.075	      53.261
   8	       8	    0.052	     153.919	   8	       8	    0.045	     177.537
   8	      16	    0.054	     296.942	   8	      16	    0.039	     411.711
   8	      32	    0.056	     571.139	   8	      32	    0.049	     651.542
   8	      64	    0.057	    1123.161	   8	      64	    0.063	    1012.964
   8	     128	    0.073	    1754.480	   8	     128	    0.095	    1345.541
   8	     256	    0.114	    2246.322	   8	     256	    0.159	    1609.808
   8	     512	    0.198	    2587.330	   8	     512	    0.286	    1789.570
   8	    1024	    0.364	    2812.683	   8	    1024	    0.539	    1899.587
   8	    2048	    0.697	    2938.739	   8	    2048	    1.106	    1851.678
   8	    4096	    1.376	    2976.931	   8	    4096	    2.062	    1986.342
   8	    8192	    2.726	    3005.050	   8	    8192	    4.112	    1992.215
   8	   16384	    5.475	    2992.487	   8	   16384	    8.238	    1988.814
   8	   32768	   15.495	    2114.738	   8	   32768	   16.305	    2009.694
   8	   65536	   38.939	    1683.043	   8	   65536	   33.180	    1975.166
   8	  131072	   85.077	    1540.627	   8	  131072	   73.070	    1793.786
   8	  262144	  178.622	    1467.591	   8	  262144	  203.704	    1286.888
   8	  524288	  380.350	    1378.435	   8	  524288	  325.536	    1610.538
   8	 1048576	 1532.529	     684.213	   8	 1048576	 1173.707	     893.388
avg (B/µs)	min (B/µs)	max (B/µs)		avg (B/µs)	min (B/µs)	max (B/µs)
  1706.482	    95.325	  3005.050		  1386.317	    53.261	  2009.694

pack:	        	         	            	unpack:	        	         	            	
bits	size (B)	time (µs)	speed (B/µs)	bits	size (B)	time (µs)	speed (B/µs)
   9	       4	    0.065	      61.455	   9	       4	    0.110	      36.393
   9	       8	    0.086	      92.949	   9	       8	    0.084	      95.325
   9	      16	    0.082	     195.653	   9	      16	    0.060	     266.305
   9	      32	    0.086	     372.827	   9	      32	    0.074	     432.960
   9	      64	    0.106	     603.226	   9	      64	    0.101	     633.102
   9	     128	    0.137	     935.315	   9	     128	    0.161	     795.364
   9	     256	    0.194	    1319.093	   9	     256	    0.286	     895.531
   9	     512	    0.361	    1418.417	   9	     512	    0.526	     973.474
   9	    1024	    0.564	    1815.286	   9	    1024	    0.997	    1027.258
   9	    2048	    1.077	    1901.690	   9	    2048	    1.962	    1043.861
   9	    4096	    2.116	    1935.760	   9	    4096	    3.860	    1061.141
   9	    8192	    4.684	    1748.943	   9	    8192	    7.636	    1072.803
   9	   16384	    8.370	    1957.429	   9	   16384	   15.304	    1070.564
   9	   32768	   23.168	    1414.359	   9	   32768	   30.751	    1065.592
   9	   65536	   55.319	    1184.691	   9	   65536	   63.291	    1035.470
   9	  131072	  121.022	    1083.043	   9	  131072	  146.014	     897.667
   9	  262144	  263.057	     996.529	   9	  262144	  291.697	     898.686
   9	  524288	  544.491	     962.896	   9	  524288	  587.299	     892.711
   9	 1048576	 1402.228	     747.793	   9	 1048576	 1429.417	     733.569
avg (B/µs)	min (B/µs)	max (B/µs)		avg (B/µs)	min (B/µs)	max (B/µs)
  1091.966	    61.455	  1957.429		   785.672	    36.393	  1072.803

pack:	        	         	            	unpack:	        	         	            	
bits	size (B)	time (µs)	speed (B/µs)	bits	size (B)	time (µs)	speed (B/µs)
  10	       4	    0.105	      38.130	  10	       4	    0.059	      67.924
  10	       8	    0.086	      92.949	  10	       8	    0.083	      96.145
  10	      16	    0.083	     192.842	  10	      16	    0.059	     270.600
  10	      32	    0.087	     367.720	  10	      32	    0.076	     420.745
  10	      64	    0.108	     592.573	  10	      64	    0.102	     628.654
  10	     128	    0.127	    1007.262	  10	     128	    0.170	     752.975
  10	     256	    0.181	    1414.680	  10	     256	    0.290	     883.011
  10	     512	    0.311	    1645.581	  10	     512	    0.529	     967.771
  10	    1024	    0.572	    1790.316	  10	    1024	    1.066	     960.628
  10	    2048	    1.650	    1241.141	  10	    2048	    1.955	    1047.553
  10	    4096	    2.206	    1856.882	  10	    4096	    3.874	    1057.288
  10	    8192	    4.287	    1910.891	  10	    8192	    7.675	    1067.371
  10	   16384	    8.569	    1912.008	  10	   16384	   15.414	    1062.930
  10	   32768	   23.449	    1397.405	  10	   32768	   30.574	    1071.757
  10	   65536	   58.764	    1115.236	  10	   65536	   62.770	    1044.067
  10	  131072	  125.489	    1044.490	  10	  131072	  136.313	     961.552
  10	  262144	  181.845	    1441.580	  10	  262144	  200.107	    1310.020
  10	  524288	  452.774	    1157.947	  10	  524288	  420.063	    1248.117
  10	 1048576	 1170.650	     895.721	  10	 1048576	  919.921	    1139.854
avg (B/µs)	min (B/µs)	max (B/µs)		avg (B/µs)	min (B/µs)	max (B/µs)
  1111.334	    38.130	  1912.008		   845.209	    67.924	  1310.020

pack:	        	         	            	unpack:	        	         	            	
bits	size (B)	time (µs)	speed (B/µs)	bits	size (B)	time (µs)	speed (B/µs)
  11	       4	    0.079	      50.686	  11	       4	    0.049	      81.443
  11	       8	    0.057	     140.395	  11	       8	    0.054	     148.471
  11	      16	    0.057	     279.620	  11	      16	    0.039	     411.711
  11	      32	    0.059	     543.392	  11	      32	    0.052	     615.678
  11	      64	    0.068	     941.879	  11	      64	    0.075	     852.176
  11	     128	    0.084	    1520.881	  11	     128	    0.117	    1093.423
  11	     256	    0.133	    1924.269	  11	     256	    0.202	    1267.700
  11	     512	    0.216	    2370.291	  11	     512	    0.371	    1380.131
  11	    1024	    0.397	    2579.560	  11	    1024	    0.703	    1456.415
  11	    2048	    0.755	    2712.325	  11	    2048	    1.424	    1438.368
  11	    4096	    1.558	    2628.901	  11	    4096	    2.733	    1498.724
  11	    8192	    2.911	    2814.065	  11	    8192	    5.391	    1519.536
  11	   16384	    5.844	    2803.618	  11	   16384	   11.170	    1466.798
  11	   32768	   18.177	    1802.714	  11	   32768	   24.650	    1329.325
  11	   65536	   41.731	    1570.444	  11	   65536	   54.228	    1208.525
  11	  131072	   92.313	    1419.864	  11	  131072	  112.020	    1170.077
  11	  262144	  205.082	    1278.240	  11	  262144	  231.958	    1130.135
  11	  524288	  372.994	    1405.621	  11	  524288	  490.544	    1068.789
  11	 1048576	 1569.861	     667.942	  11	 1048576	 1417.162	     739.913
avg (B/µs)	min (B/µs)	max (B/µs)		avg (B/µs)	min (B/µs)	max (B/µs)
  1550.248	    50.686	  2814.065		  1046.176	    81.443	  1519.536

pack:	        	         	            	unpack:	        	         	            	
bits	size (B)	time (µs)	speed (B/µs)	bits	size (B)	time (µs)	speed (B/µs)
  12	       4	    0.065	      61.681	  12	       4	    0.079	      50.686
  12	       8	    0.086	      92.949	  12	       8	    0.084	      95.325
  12	      16	    0.083	     192.842	  12	      16	    0.058	     276.168
  12	      32	    0.087	     367.720	  12	      32	    0.077	     415.535
  12	      64	    0.100	     639.132	  12	      64	    0.111	     576.042
  12	     128	    0.127	    1007.262	  12	     128	    0.174	     735.440
  12	     256	    0.195	    1312.643	  12	     256	    0.295	     868.021
  12	     512	    0.335	    1528.458	  12	     512	    0.786	     651.345
  12	    1024	    0.662	    1547.178	  12	    1024	    1.016	    1007.972
  12	    2048	    1.160	    1765.297	  12	    2048	    2.951	     694.024
  12	    4096	    2.268	    1806.126	  12	    4096	    4.365	     938.380
  12	    8192	    4.829	    1696.442	  12	    8192	    7.971	    1027.719
  12	   16384	    9.323	    1757.397	  12	   16384	   16.783	     976.226
  12	   32768	   26.191	    1251.117	  12	   32768	   33.979	     964.362
  12	   65536	   65.279	    1003.937	  12	   65536	   80.837	     810.718
  12	  131072	  140.986	     929.681	  12	  131072	  162.005	     809.061
  12	  262144	  278.053	     942.784	  12	  262144	  317.797	     824.879
  12	  524288	  599.059	     875.186	  12	  524288	  643.090	     815.264
  12	 1048576	 1606.330	     652.777	  12	 1048576	  995.421	    1053.400
avg (B/µs)	min (B/µs)	max (B/µs)		avg (B/µs)	min (B/µs)	max (B/µs)
  1022.664	    61.681	  1806.126		   715.293	    50.686	  1053.400

pack:	        	         	            	unpack:	        	         	            	
bits	size (B)	time (µs)	speed (B/µs)	bits	size (B)	time (µs)	speed (B/µs)
  13	       4	    0.043	      93.207	  13	       4	    0.077	      51.942
  13	       8	    0.057	     139.810	  13	       8	    0.056	     142.785
  13	      16	    0.056	     285.570	  13	      16	    0.040	     399.458
  13	      32	    0.060	     532.610	  13	      32	    0.052	     615.678
  13	      64	    0.068	     941.879	  13	      64	    0.075	     852.176
  13	     128	    0.097	    1319.093	  13	     128	    0.119	    1075.894
  13	     256	    0.136	    1883.758	  13	     256	    0.211	    1213.268
  13	     512	    0.225	    2274.877	  13	     512	    0.390	    1312.643
  13	    1024	    0.413	    2479.773	  13	    1024	    0.800	    1280.169
  13	    2048	    0.789	    2595.146	  13	    2048	    1.444	    1418.183
  13	    4096	    1.546	    2649.579	  13	    4096	    2.866	    1429.155
  13	    8192	    3.095	    2646.721	  13	    8192	    5.691	    1439.453
  13	   16384	    6.112	    2680.690	  13	   16384	   12.151	    1348.366
  13	   32768	   17.636	    1858.011	  13	   32768	   28.618	    1145.019
  13	   65536	   39.021	    1679.505	  13	   65536	   61.931	    1058.212
  13	  131072	  122.128	    1073.235	  13	  131072	  116.900	    1121.230
  13	  262144	  204.158	    1284.025	  13	  262144	  252.843	    1036.785
  13	  524288	  411.603	    1273.770	  13	  524288	  494.814	    1059.566
  13	 1048576	 1636.328	     640.810	  13	 1048576	 1534.200	     683.468
avg (B/µs)	min (B/µs)	max (B/µs)		avg (B/µs)	min (B/µs)	max (B/µs)
  1491.162	    93.207	  2680.690		   983.339	    51.942	  1439.453

pack:	        	         	            	unpack:	        	         	            	
bits	size (B)	time (µs)	speed (B/µs)	bits	size (B)	time (µs)	speed (B/µs)
  14	       4	    0.099	      40.427	  14	       4	    0.059	      67.650
  14	       8	    0.085	      93.990	  14	       8	    0.083	      96.421
  14	      16	    0.087	     183.860	  14	      16	    0.062	     258.111
  14	      32	    0.091	     351.355	  14	      32	    0.077	     415.535
  14	      64	    0.105	     610.081	  14	      64	    0.111	     576.042
  14	     128	    0.136	     940.229	  14	     128	    0.183	     699.962
  14	     256	    0.200	    1279.788	  14	     256	    0.321	     797.136
  14	     512	    0.347	    1474.920	  14	     512	    0.586	     874.027
  14	    1024	    0.634	    1615.257	  14	    1024	    1.166	     878.316
  14	    2048	    1.210	    1692.598	  14	    2048	    2.175	     941.672
  14	    4096	    2.395	    1710.291	  14	    4096	    5.866	     698.255
  14	    8192	    4.798	    1707.401	  14	    8192	    8.873	     923.252
  14	   16384	    9.408	    1741.497	  14	   16384	   17.200	     952.559
  14	   32768	   25.395	    1290.325	  14	   32768	   36.607	     895.129
  14	   65536	   62.286	    1052.180	  14	   65536	   81.975	     799.463
  14	  131072	  131.350	     997.883	  14	  131072	  196.949	     665.512
  14	  262144	  338.609	     774.179	  14	  262144	  375.820	     697.525
  14	  524288	  567.484	     923.882	  14	  524288	  705.064	     743.603
  14	 1048576	 1573.973	     666.197	  14	 1048576	 1619.872	     647.320
avg (B/µs)	min (B/µs)	max (B/µs)		avg (B/µs)	min (B/µs)	max (B/µs)
  1007.702	    40.427	  1741.497		   664.605	    67.650	   952.559

pack:	        	         	            	unpack:	        	         	            	
bits	size (B)	time (µs)	speed (B/µs)	bits	size (B)	time (µs)	speed (B/µs)
  15	       4	    0.080	      49.932	  15	       4	    0.048	      83.469
  15	       8	    0.057	     140.395	  15	       8	    0.055	     145.257
  15	      16	    0.121	     132.365	  15	      16	    0.040	     399.458
  15	      32	    0.059	     541.201	  15	      32	    0.051	     627.186
  15	      64	    0.068	     941.879	  15	      64	    0.091	     702.711
  15	     128	    0.093	    1376.592	  15	     128	    0.123	    1040.448
  15	     256	    0.143	    1789.570	  15	     256	    0.220	    1163.317
  15	     512	    0.233	    2198.039	  15	     512	    0.408	    1254.371
  15	    1024	    0.433	    2365.070	  15	    1024	    0.777	    1318.283
  15	    2048	    0.825	    2481.923	  15	    2048	    1.545	    1325.607
  15	    4096	    1.616	    2534.652	  15	    4096	    2.997	    1366.736
  15	    8192	    3.202	    2558.241	  15	    8192	    5.980	    1369.896
  15	   16384	    6.434	    2546.486	  15	   16384	   11.934	    1372.907
  15	   32768	   17.127	    1913.259	  15	   32768	   25.486	    1285.726
  15	   65536	   40.597	    1614.299	  15	   65536	   57.525	    1139.263
  15	  131072	   98.897	    1325.339	  15	  131072	  145.304	     902.054
  15	  262144	  244.431	    1072.466	  15	  262144	  255.128	    1027.500
  15	  524288	  461.736	    1135.471	  15	  524288	  489.405	    1071.276
  15	 1048576	 1404.463	     746.603	  15	 1048576	 2050.766	     511.309
avg (B/µs)	min (B/µs)	max (B/µs)		avg (B/µs)	min (B/µs)	max (B/µs)
  1445.462	    49.932	  2558.241		   952.988	    83.469	  1372.907

pack:	        	         	            	unpack:	        	         	            	
bits	size (B)	time (µs)	speed (B/µs)	bits	size (B)	time (µs)	speed (B/µs)
  16	       4	    0.094	      42.582	  16	       4	    0.092	      43.464
  16	       8	    0.109	      73.423	  16	       8	    0.070	     114.520
  16	      16	    0.116	     137.801	  16	      16	    0.056	     285.570
  16	      32	    0.172	     185.897	  16	      32	    0.073	     438.620
  16	      64	    0.126	     508.400	  16	      64	    0.101	     633.102
  16	     128	    0.163	     784.899	  16	     128	    0.153	     836.248
  16	     256	    0.239	    1070.530	  16	     256	    0.253	    1012.009
  16	     512	    0.392	    1306.255	  16	     512	    0.489	    1047.042
  16	    1024	    0.692	    1479.493	  16	    1024	    0.888	    1153.011
  16	    2048	    1.312	    1560.955	  16	    2048	    1.688	    1213.268
  16	    4096	    2.503	    1636.490	  16	    4096	    3.310	    1237.475
  16	    8192	    4.995	    1640.083	  16	    8192	    6.515	    1257.447
  16	   16384	   10.376	    1579.032	  16	   16384	   12.994	    1260.885
  16	   32768	   25.901	    1265.121	  16	   32768	   30.259	    1082.921
  16	   65536	   65.375	    1002.461	  16	   65536	   71.519	     916.342
  16	  131072	  141.552	     925.964	  16	  131072	  146.769	     893.049
  16	  262144	  292.107	     897.424	  16	  262144	  302.898	     865.453
  16	  524288	  610.461	     858.839	  16	  524288	  618.427	     847.777
  16	 1048576	 1353.084	     774.953	  16	 1048576	 1520.120	     689.798
avg (B/µs)	min (B/µs)	max (B/µs)		avg (B/µs)	min (B/µs)	max (B/µs)
   933.190	    42.582	  1640.083		   833.053	    43.464	  1260.885

pack:	        	         	            	unpack:	        	         	            	
bits	size (B)	time (µs)	speed (B/µs)	bits	size (B)	time (µs)	speed (B/µs)
  17	       4	    0.079	      50.686	  17	       4	    0.040	      99.864
  17	       8	    0.054	     147.817	  17	       8	    0.166	      48.210
  17	      16	    0.063	     254.200	  17	      16	    0.044	     362.751
  17	      32	    0.071	     450.395	  17	      32	    0.460	      69.579
  17	      64	    0.073	     877.240	  17	      64	    0.075	     854.890
  17	     128	    0.098	    1306.255	  17	     128	    0.124	    1032.444
  17	     256	    0.148	    1729.053	  17	     256	    0.220	    1163.317
  17	     512	    0.247	    2072.861	  17	     512	    0.459	    1115.576
  17	    1024	    0.451	    2270.067	  17	    1024	    0.807	    1268.823
  17	    2048	    0.859	    2384.106	  17	    2048	    1.583	    1293.665
  17	    4096	    1.698	    2412.225	  17	    4096	    3.136	    1306.156
  17	    8192	    3.370	    2430.827	  17	    8192	    6.253	    1310.090
  17	   16384	    6.630	    2471.212	  17	   16384	   12.585	    1301.875
  17	   32768	   17.129	    1913.019	  17	   32768	   27.963	    1171.827
  17	   65536	   39.869	    1643.781	  17	   65536	   60.414	    1084.780
  17	  131072	   90.382	    1450.202	  17	  131072	  122.092	    1073.551
  17	  262144	  215.820	    1214.641	  17	  262144	  260.138	    1007.711
  17	  524288	  393.345	    1332.896	  17	  524288	  509.908	    1028.201
  17	 1048576	 1345.493	     779.325	  17	 1048576	 1979.598	     529.691
avg (B/µs)	min (B/µs)	max (B/µs)		avg (B/µs)	min (B/µs)	max (B/µs)
  1431.095	    50.686	  2471.212		   901.211	    48.210	  1310.090

pack:	        	         	            	unpack:	        	         	            	
bits	size (B)	time (µs)	speed (B/µs)	bits	size (B)	time (µs)	speed (B/µs)
  18	       4	    0.098	      40.820	  18	       4	    0.069	      57.852
  18	       8	    0.081	      98.690	  18	       8	    0.084	      95.325
  18	      16	    0.084	     190.650	  18	      16	    0.059	     271.696
  18	      32	    0.095	     336.385	  18	      32	    0.076	     420.745
  18	      64	    0.111	     576.042	  18	      64	    0.113	     566.320
  18	     128	    0.147	     870.131	  18	     128	    0.186	     688.296
  18	     256	    0.220	    1163.317	  18	     256	    0.340	     752.447
  18	     512	    0.378	    1354.879	  18	     512	    0.633	     808.845
  18	    1024	    0.690	    1484.094	  18	    1024	    1.271	     805.659
  18	    2048	    1.316	    1556.148	  18	    2048	    2.380	     860.456
  18	    4096	    2.601	    1574.834	  18	    4096	    4.747	     862.833
  18	    8192	    5.135	    1595.308	  18	    8192	    9.417	     869.911
  18	   16384	   10.156	    1613.209	  18	   16384	   19.189	     853.828
  18	   32768	   27.494	    1191.826	  18	   32768	   43.777	     748.521
  18	   65536	   64.277	    1019.584	  18	   65536	   90.913	     720.866
  18	  131072	  145.292	     902.128	  18	  131072	  184.628	     709.925
  18	  262144	  289.841	     904.441	  18	  262144	  375.030	     698.995
  18	  524288	  631.107	     830.743	  18	  524288	  787.607	     665.672
  18	 1048576	 1943.711	     539.471	  18	 1048576	 1868.307	     561.244
avg (B/µs)	min (B/µs)	max (B/µs)		avg (B/µs)	min (B/µs)	max (B/µs)
   939.090	    40.820	  1613.209		   632.602	    57.852	   869.911

pack:	        	         	            	unpack:	        	         	            	
bits	size (B)	time (µs)	speed (B/µs)	bits	size (B)	time (µs)	speed (B/µs)
  19	       4	    0.063	      63.550	  19	       4	    0.097	      41.222
  19	       8	    0.082	      97.542	  19	       8	    0.084	      95.325
  19	      16	    0.084	     190.650	  19	      16	    0.059	     271.696
  19	      32	    0.095	     336.385	  19	      32	    0.079	     405.492
  19	      64	    0.110	     581.029	  19	      64	    0.115	     555.767
  19	     128	    0.145	     881.561	  19	     128	    0.196	     652.334
  19	     256	    0.307	
Download .txt
gitextract_wmqt4bm2/

├── .gitattributes
├── .gitignore
├── .travis.yml
├── LICENSE
├── PackedArray.c
├── PackedArray.h
├── PackedArraySIMD.c
├── README.md
├── _gnu-make/
│   └── Makefile
├── _ios-xcode/
│   ├── .gitignore
│   ├── PackedArray-Info.plist
│   └── PackedArray.xcodeproj/
│       ├── project.pbxproj
│       └── project.xcworkspace/
│           └── contents.xcworkspacedata
├── _mac-xcode/
│   ├── .gitignore
│   └── PackedArray.xcodeproj/
│       ├── project.pbxproj
│       └── project.xcworkspace/
│           └── contents.xcworkspacedata
├── _win-vs11/
│   ├── .gitignore
│   ├── Common.props
│   ├── Debug.props
│   ├── PackedArray.sln
│   ├── PackedArraySIMDSelfBench.vcxproj
│   ├── PackedArraySIMDSelfTest.vcxproj
│   ├── PackedArraySelfBench.vcxproj
│   ├── PackedArraySelfTest.vcxproj
│   ├── Release.props
│   ├── x64.props
│   └── x86.props
└── benchmark/
    ├── PackedArraySIMDSelfBench-unrolled-galaxy-note-cortex-a9-1.4GHz.txt
    ├── PackedArraySIMDSelfBench-unrolled-ipad2-cortex-a9-1GHz.txt
    ├── PackedArraySIMDSelfBench-unrolled-iphone5-a6-1.3GHz.txt
    ├── PackedArraySIMDSelfBench-unrolled-mbp-corei7-M620-2.67GHz.txt
    ├── PackedArraySelfBench-reference-galaxy-note-cortex-a9-1.4GHz.txt
    ├── PackedArraySelfBench-reference-ipad2-cortex-a9-1GHz.txt
    ├── PackedArraySelfBench-reference-iphone5-a6-1.3GHz.txt
    ├── PackedArraySelfBench-reference-mbp-corei7-M620-2.67GHz.txt
    ├── PackedArraySelfBench-unrolled-galaxy-note-cortex-a9-1.4GHz.txt
    ├── PackedArraySelfBench-unrolled-ipad2-cortex-a9-1GHz.txt
    ├── PackedArraySelfBench-unrolled-iphone5-a6-1.3GHz.txt
    └── PackedArraySelfBench-unrolled-mbp-corei7-M620-2.67GHz.txt
Download .txt
SYMBOL INDEX (44 symbols across 3 files)

FILE: PackedArray.c
  function PackedArray (line 389) | PackedArray* PackedArray_create(uint32_t bitsPerItem, uint32_t count)
  function PackedArray_destroy (line 410) | void PackedArray_destroy(PackedArray* a)
  function PackedArray_pack (line 416) | void PackedArray_pack(PackedArray* a, const uint32_t offset, const uint3...
  function PackedArray_unpack (line 458) | void PackedArray_unpack(const PackedArray* a, const uint32_t offset, uin...
  function PackedArray_set (line 500) | void PackedArray_set(PackedArray* a, const uint32_t offset, const uint32...
  function PackedArray_get (line 538) | uint32_t PackedArray_get(const PackedArray* a, const uint32_t offset)
  function PackedArray_bufferSize (line 576) | uint32_t PackedArray_bufferSize(const PackedArray* a)
  function __PackedArray_log2 (line 584) | static uint32_t __PackedArray_log2(uint32_t v)
  function __PackedArray_highestBitSet (line 607) | static int __PackedArray_highestBitSet(uint32_t v)
  function PackedArray_computeBitsPerItem (line 619) | uint32_t PackedArray_computeBitsPerItem(const uint32_t* in, uint32_t count)
  function PackedArray_pack_reference (line 646) | static void PackedArray_pack_reference(PackedArray* a, const uint32_t of...
  function PackedArray_unpack_reference (line 717) | static void PackedArray_unpack_reference(const PackedArray* a, const uin...
  function main (line 780) | int main(void)
  function getChronometerTime (line 941) | static double getChronometerTime(void)
  function getChronometerTime (line 955) | static double getChronometerTime()
  function bench_memcpy (line 965) | static double bench_memcpy(uint32_t* in, uint32_t* out, uint32_t count)
  function bench_loopcpy (line 980) | static double bench_loopcpy(uint32_t* in, uint32_t* out, uint32_t count)
  function bench_pack (line 1000) | static double bench_pack(uint32_t* in, PackedArray* out, uint32_t count)
  function bench_unpack (line 1015) | static double bench_unpack(PackedArray* in, uint32_t* out, uint32_t count)
  function main (line 1032) | int main(void)

FILE: PackedArray.h
  type _PackedArray (line 34) | struct _PackedArray
  type PackedArray (line 49) | typedef struct _PackedArray PackedArray;

FILE: PackedArraySIMD.c
  function __PackedArray_pack_scalar (line 377) | static void __PackedArray_pack_scalar(uint32_t* buffer, const uint32_t b...
  function __PackedArray_unpack_scalar (line 418) | static void __PackedArray_unpack_scalar(const uint32_t* buffer, const ui...
  function PackedArray (line 606) | PackedArray* PackedArray_create(uint32_t bitsPerItem, uint32_t count)
  function PackedArray_destroy (line 629) | void PackedArray_destroy(PackedArray* a)
  function PackedArray_pack (line 635) | void PackedArray_pack(PackedArray* a, const uint32_t offset, const uint3...
  function PackedArray_unpack (line 677) | void PackedArray_unpack(const PackedArray* a, const uint32_t offset, uin...
  function PackedArray_set (line 719) | void PackedArray_set(PackedArray* a, const uint32_t offset, const uint32...
  function PackedArray_get (line 757) | uint32_t PackedArray_get(const PackedArray* a, const uint32_t offset)
  function PackedArray_bufferSize (line 795) | uint32_t PackedArray_bufferSize(const PackedArray* a)
  function __PackedArray_log2 (line 813) | static uint32_t __PackedArray_log2(uint32_t v)
  function __PackedArray_highestBitSet (line 836) | static int __PackedArray_highestBitSet(uint32_t v)
  function PackedArray_computeBitsPerItem (line 848) | uint32_t PackedArray_computeBitsPerItem(const uint32_t* in, uint32_t count)
  function PackedArray_pack_reference (line 875) | static void PackedArray_pack_reference(PackedArray* a, uint32_t offset, ...
  function PackedArray_unpack_reference (line 921) | static void PackedArray_unpack_reference(const PackedArray* a, uint32_t ...
  function main (line 964) | int main(void)
  function getChronometerTime (line 1125) | static double getChronometerTime(void)
  function getChronometerTime (line 1139) | static double getChronometerTime()
  function bench_memcpy (line 1149) | static double bench_memcpy(uint32_t* in, uint32_t* out, uint32_t count)
  function bench_loopcpy (line 1164) | static double bench_loopcpy(uint32_t* in, uint32_t* out, uint32_t count)
  function bench_pack (line 1184) | static double bench_pack(uint32_t* in, PackedArray* out, uint32_t count)
  function bench_unpack (line 1199) | static double bench_unpack(PackedArray* in, uint32_t* out, uint32_t count)
  function main (line 1216) | int main(void)
Condensed preview — 39 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (978K chars).
[
  {
    "path": ".gitattributes",
    "chars": 470,
    "preview": "* text=auto\n\n# sources\n*.h text diff=cpp\n*.c text diff=cpp\n*.cpp text diff=cpp\n*.rb text diff=ruby\n*.html text diff=html"
  },
  {
    "path": ".gitignore",
    "chars": 103,
    "preview": ".DS_Store\n*~\n*.swp\n\n/bin\nPackedArray.pp.c\nPackedArray.cut.c\nPackedArraySIMD.pp.c\nPackedArraySIMD.cut.c\n"
  },
  {
    "path": ".travis.yml",
    "chars": 176,
    "preview": "language: cpp\ncompiler:\n  - clang\n  - gcc\nenv:\n  - TARGET=build\n  - TARGET=test\n  - TARGET=preprocess\n  - TARGET=cut\n  -"
  },
  {
    "path": "LICENSE",
    "chars": 561,
    "preview": "        DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE \n                    Version 2, December 2004 \n\n Copyright (C) 2004 "
  },
  {
    "path": "PackedArray.c",
    "chars": 37878,
    "preview": "// see README.md for usage instructions.\n// (‑●‑●)> released under the WTFPL v2 license, by Gregory Pakosz (@gpakosz)\n\n#"
  },
  {
    "path": "PackedArray.h",
    "chars": 2082,
    "preview": "#ifndef PACKEDARRAY_H\n#define PACKEDARRAY_H\n\n#ifdef __cplusplus\nextern \"C\" {\n#endif\n\n#include <stdint.h>\n\n/*\n\nPackedArra"
  },
  {
    "path": "PackedArraySIMD.c",
    "chars": 45622,
    "preview": "// see README.md for usage instructions.\n// (‑●‑●)> released under the WTFPL v2 license, by Gregory Pakosz (@gpakosz)\n\n#"
  },
  {
    "path": "README.md",
    "chars": 9358,
    "preview": "# PackedArray: random access array of tightly packed unsigned integers\n[![Build Status](https://travis-ci.org/gpakosz/Pa"
  },
  {
    "path": "_gnu-make/Makefile",
    "chars": 2849,
    "preview": ".PHONY: build test preprocess cut assembly clean\n\n# directories\nifeq ($(realpath .),)\n  $(error your version of Make doe"
  },
  {
    "path": "_ios-xcode/.gitignore",
    "chars": 26,
    "preview": "xcuserdata/\nxcshareddata/\n"
  },
  {
    "path": "_ios-xcode/PackedArray-Info.plist",
    "chars": 1457,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE plist PUBLIC \"-//Apple//DTD PLIST 1.0//EN\" \"http://www.apple.com/DTDs/P"
  },
  {
    "path": "_ios-xcode/PackedArray.xcodeproj/project.pbxproj",
    "chars": 15240,
    "preview": "// !$*UTF8*$!\n{\n\tarchiveVersion = 1;\n\tclasses = {\n\t};\n\tobjectVersion = 46;\n\tobjects = {\n\n/* Begin PBXBuildFile section *"
  },
  {
    "path": "_ios-xcode/PackedArray.xcodeproj/project.xcworkspace/contents.xcworkspacedata",
    "chars": 156,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<Workspace\n   version = \"1.0\">\n   <FileRef\n      location = \"self:PackedArray.xco"
  },
  {
    "path": "_mac-xcode/.gitignore",
    "chars": 26,
    "preview": "xcuserdata/\nxcshareddata/\n"
  },
  {
    "path": "_mac-xcode/PackedArray.xcodeproj/project.pbxproj",
    "chars": 15082,
    "preview": "// !$*UTF8*$!\n{\n\tarchiveVersion = 1;\n\tclasses = {\n\t};\n\tobjectVersion = 46;\n\tobjects = {\n\n/* Begin PBXBuildFile section *"
  },
  {
    "path": "_mac-xcode/PackedArray.xcodeproj/project.xcworkspace/contents.xcworkspacedata",
    "chars": 156,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<Workspace\n   version = \"1.0\">\n   <FileRef\n      location = \"self:PackedArray.xco"
  },
  {
    "path": "_win-vs11/.gitignore",
    "chars": 59,
    "preview": "/build\n/ipch\n\n*.suo\n*.sdf\n*.opensdf\n*.user\n*.sln.docstates\n"
  },
  {
    "path": "_win-vs11/Common.props",
    "chars": 1855,
    "preview": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\r\n<Project DefaultTargets=\"Build\" ToolsVersion=\"4.0\" xmlns=\"http://schemas.micros"
  },
  {
    "path": "_win-vs11/Debug.props",
    "chars": 825,
    "preview": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\r\n<Project DefaultTargets=\"Build\" ToolsVersion=\"4.0\" xmlns=\"http://schemas.micros"
  },
  {
    "path": "_win-vs11/PackedArray.sln",
    "chars": 3632,
    "preview": "\r\nMicrosoft Visual Studio Solution File, Format Version 12.00\r\n# Visual Studio Express 2012 for Windows Desktop\r\nProjec"
  },
  {
    "path": "_win-vs11/PackedArraySIMDSelfBench.vcxproj",
    "chars": 5775,
    "preview": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\r\n<Project DefaultTargets=\"Build\" ToolsVersion=\"4.0\" xmlns=\"http://schemas.micros"
  },
  {
    "path": "_win-vs11/PackedArraySIMDSelfTest.vcxproj",
    "chars": 5745,
    "preview": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\r\n<Project DefaultTargets=\"Build\" ToolsVersion=\"4.0\" xmlns=\"http://schemas.micros"
  },
  {
    "path": "_win-vs11/PackedArraySelfBench.vcxproj",
    "chars": 5767,
    "preview": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\r\n<Project DefaultTargets=\"Build\" ToolsVersion=\"4.0\" xmlns=\"http://schemas.micros"
  },
  {
    "path": "_win-vs11/PackedArraySelfTest.vcxproj",
    "chars": 5737,
    "preview": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\r\n<Project DefaultTargets=\"Build\" ToolsVersion=\"4.0\" xmlns=\"http://schemas.micros"
  },
  {
    "path": "_win-vs11/Release.props",
    "chars": 1630,
    "preview": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\r\n<Project DefaultTargets=\"Build\" ToolsVersion=\"4.0\" xmlns=\"http://schemas.micros"
  },
  {
    "path": "_win-vs11/x64.props",
    "chars": 484,
    "preview": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\r\n<Project DefaultTargets=\"Build\" ToolsVersion=\"4.0\" xmlns=\"http://schemas.micros"
  },
  {
    "path": "_win-vs11/x86.props",
    "chars": 610,
    "preview": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\r\n<Project DefaultTargets=\"Build\" ToolsVersion=\"4.0\" xmlns=\"http://schemas.micros"
  },
  {
    "path": "benchmark/PackedArraySIMDSelfBench-unrolled-galaxy-note-cortex-a9-1.4GHz.txt",
    "chars": 59988,
    "preview": "-- PackedArray self bench ------------------------------------------------------\nmemcpy:\nbits\tsize (B)\ttime (µs)\tspeed ("
  },
  {
    "path": "benchmark/PackedArraySIMDSelfBench-unrolled-ipad2-cortex-a9-1GHz.txt",
    "chars": 60085,
    "preview": "-- PackedArray self bench ------------------------------------------------------\nmemcpy:\nbits\tsize (B)\ttime (µs)\tspeed ("
  },
  {
    "path": "benchmark/PackedArraySIMDSelfBench-unrolled-iphone5-a6-1.3GHz.txt",
    "chars": 59989,
    "preview": "-- PackedArray self bench ------------------------------------------------------\nmemcpy:\nbits\tsize (B)\ttime (µs)\tspeed ("
  },
  {
    "path": "benchmark/PackedArraySIMDSelfBench-unrolled-mbp-corei7-M620-2.67GHz.txt",
    "chars": 59987,
    "preview": "-- PackedArray self bench ------------------------------------------------------\nmemcpy:\nbits\tsize (B)\ttime (µs)\tspeed ("
  },
  {
    "path": "benchmark/PackedArraySelfBench-reference-galaxy-note-cortex-a9-1.4GHz.txt",
    "chars": 59988,
    "preview": "-- PackedArray self bench ------------------------------------------------------\nmemcpy:\nbits\tsize (B)\ttime (µs)\tspeed ("
  },
  {
    "path": "benchmark/PackedArraySelfBench-reference-ipad2-cortex-a9-1GHz.txt",
    "chars": 59989,
    "preview": "-- PackedArray self bench ------------------------------------------------------\nmemcpy:\nbits\tsize (B)\ttime (µs)\tspeed ("
  },
  {
    "path": "benchmark/PackedArraySelfBench-reference-iphone5-a6-1.3GHz.txt",
    "chars": 59989,
    "preview": "-- PackedArray self bench ------------------------------------------------------\nmemcpy:\nbits\tsize (B)\ttime (µs)\tspeed ("
  },
  {
    "path": "benchmark/PackedArraySelfBench-reference-mbp-corei7-M620-2.67GHz.txt",
    "chars": 59989,
    "preview": "-- PackedArray self bench ------------------------------------------------------\nmemcpy:\nbits\tsize (B)\ttime (µs)\tspeed ("
  },
  {
    "path": "benchmark/PackedArraySelfBench-unrolled-galaxy-note-cortex-a9-1.4GHz.txt",
    "chars": 59988,
    "preview": "-- PackedArray self bench ------------------------------------------------------\nmemcpy:\nbits\tsize (B)\ttime (µs)\tspeed ("
  },
  {
    "path": "benchmark/PackedArraySelfBench-unrolled-ipad2-cortex-a9-1GHz.txt",
    "chars": 59989,
    "preview": "-- PackedArray self bench ------------------------------------------------------\nmemcpy:\nbits\tsize (B)\ttime (µs)\tspeed ("
  },
  {
    "path": "benchmark/PackedArraySelfBench-unrolled-iphone5-a6-1.3GHz.txt",
    "chars": 59987,
    "preview": "-- PackedArray self bench ------------------------------------------------------\nmemcpy:\nbits\tsize (B)\ttime (µs)\tspeed ("
  },
  {
    "path": "benchmark/PackedArraySelfBench-unrolled-mbp-corei7-M620-2.67GHz.txt",
    "chars": 59989,
    "preview": "-- PackedArray self bench ------------------------------------------------------\nmemcpy:\nbits\tsize (B)\ttime (µs)\tspeed ("
  }
]

About this extraction

This page contains the full source code of the gpakosz/PackedArray GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 39 files (862.6 KB), approximately 390.9k tokens, and a symbol index with 44 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!