[
  {
    "path": ".gitignore",
    "content": "# git-ls-files --others --exclude-from=.git/info/exclude\n# Lines that start with '#' are comments.\n# For a project mostly in C, the following would be a good set of\n# exclude patterns (uncomment them if you want to use them):\n# *.[oa]\n*~\n.DS_Store\n\n*.beam\n# Emacs Tag files\nTAGS\n\n# c_src\n/c_src/*.o\n/c_src/*.exp\n/c_src/*.lib\n/c_src/*.pdb\n# Derivates\n/_build/*\n/priv/*\nrebar.lock"
  },
  {
    "path": "COPYRIGHT",
    "content": "Copyright (C) 2007 - 2012, Rogvall Invest AB, <tony@rogvall.se>\n\nPermission is hereby granted, free of charge, to any person obtaining a copy of \nthis software and associated documentation files (the \"Software\"), to deal in \nthe Software without restriction, including without limitation the rights to \nuse, copy, modify, merge, publish, distribute, sublicense, and/or sell copies \nof the Software, and to permit persons to whom the Software is furnished to do \nso, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all \ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR \nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, \nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE \nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER \nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, \nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN \nTHE SOFTWARE.\n\nExcept as contained in this notice, the name(s) of the above copyright holders \nshall not be used in advertising or otherwise to promote the sale, use or other \ndealings in this Software without prior written authorization.\n\n"
  },
  {
    "path": "Makefile",
    "content": "#@BEGIN-DIR-DEFAULT-RULES@\nall:\n\t@if [ -d \"src\" -a -f \"src/Makefile\" ]; then (cd src && $(MAKE) all); fi\n\t@if [ -d \"c_src\" -a -f \"c_src/Makefile\" ]; then (cd c_src && $(MAKE) all); fi\n\t@if [ -d \"test\" -a -f \"test/Makefile\" ]; then (cd test && $(MAKE) all); fi\n\nclean:\n\t@if [ -d \"src\" -a -f \"src/Makefile\" ]; then (cd src && $(MAKE) clean); fi\n\t@if [ -d \"c_src\" -a -f \"c_src/Makefile\" ]; then (cd c_src && $(MAKE) clean); fi\n\t@if [ -d \"test\" -a -f \"test/Makefile\" ]; then (cd test && $(MAKE) clean); fi\n#@END-DIR-DEFAULT-RULES@\n"
  },
  {
    "path": "README",
    "content": "Welcome to the Erlang OpenCL binding\n\nTo get started you need erlang, preferably R16B or later.\nYou also need 'rebar3' and a 'C' compiler, i.e.\nGCC (or CL.EXE) and a machine with OpenCL installed.\n\nTo build and test: rebar3 do compile, edoc, ct\n\nTo build examples: Goto the examples directory and run make.\n\nWindows Users:\n--------------\nThe build look for the OpenCL files in /opt/local/ by default.\nYou can also set the environment variable OPENCL_DIR to point\nto another location.\n\nThis is an example of howto set up the building env:\nI'm assuming you got an mingw environment.\n\nDownload a development kit from Nvidia, ATI or Intel:\ne.g. http://software.intel.com/en-us/vcsource/tools/opencl-sdk-2013\ncp -R /c/Intel SDK/lib /opt/local/\ncp -R /c/Intel SDK/include /opt/local/\n\nExample building 64b\n------------------------------\nSetup windows build environment\n\nc:\\src\\cl> \"c:\\Program Files\\Microsoft SDKs\\Windows\\v7.1\\Bin\\SetEnv.cmd\" x64\n\nbefore starting msys (with inherited env)\n\ncl> export PATH=\"/c/Program Files/erl5.10.1/bin\";%PATH%\ncl> export PATH=\"/c/tools/git/cmd\";%PATH%\ncl> export OPENCL_DIR=\"c:\\Intel~1\\\"\n\nAnd build\n\ncl> ../rebar/rebar.cmd compile\n=======\nIf you want to force build with mingw gcc use:\nNOCL=true rebar compile\n\n"
  },
  {
    "path": "c_src/Makefile",
    "content": "#\n# Copyright (C) 2016, Rogvall Invest AB, <tony@rogvall.se>\n#\n# This software is licensed as described in the file COPYRIGHT, which\n# you should have received as part of this distribution. The terms\n# are also available at http://www.rogvall.se/docs/copyright.txt.\n#\n# You may opt to use, copy, modify, merge, publish, distribute and/or sell\n# copies of the Software, and permit persons to whom the Software is\n# furnished to do so, under the terms of the COPYRIGHT file.\n#\n# This software is distributed on an \"AS IS\" basis, WITHOUT WARRANTY OF ANY\n# KIND, either express or implied.\n#\n\n## us NOCL=true make to force build with gcc on windows\n\nOSNAME := $(shell uname -s)\nMACHINE := $(shell uname -m)\nOUT_C = -o\nOUT_L = -o\nOBJ = o\n\n\nMAC_OS_X  = No\nWIN32_GCC = No\nWIN32_CL  = No\nLINUX     = No\n\n\nERL       = erl\nEXT       = so\nPRIVDIR=../priv\n\n\nifneq (, $(findstring MINGW,$(OSNAME)))\nMINGW = Yes\nERL   = erl.exe\nendif\n\nifeq ($(WSLcross), true)\n# cross building from Windows WSL linux to native windows\nERL   = erl.exe\nendif\n\nWORDSIZE = $(shell $(ERL) -noshell -eval \"io:format([126,119,126,110],[erlang:system_info(wordsize)*8])\" -s erlang halt)\n\n# uncomment to enable use of dirty scheduler\n# CFLAGS += -DUSE_DIRTY_SCHEDULER\n\nifeq ($(OSNAME)$(WSLcross), Linux)\nLINUX = Yes\nCFLAGS += -I/usr/include/nvidia-current\nCFLAGS += -I/opt/AMDAPP/include\nifeq ($(WORDSIZE), 32)\nCFLAGS += -O3 -fPIC -m32\nendif\nifeq ($(WORDSIZE), 64)\nCFLAGS += -O3 -fPIC -m64\nendif\nLD_SHARED\t:= $(CC) -shared\nLDFLAGS\t        += -lOpenCL\nendif\n\nifeq ($(OSNAME), Darwin)\nMAC_OS_X = Yes\nifeq ($(WORDSIZE), 32)\nCFLAGS += -O3 -fPIC -m32 -DDARWIN -no-cpp-precomp\nLD_SHARED\t:= $(CC) -m32 -bundle -flat_namespace -undefined suppress\nendif\nifeq ($(WORDSIZE), 64)\nCFLAGS += -O3 -fPIC -m64 -DDARWIN -no-cpp-precomp\nLD_SHARED\t:= $(CC) -m64 -bundle -flat_namespace -undefined suppress\nendif\nLDFLAGS += -framework OpenCL\nendif\n\nERLDIR := $(shell $(ERL) -noshell -eval \"io:format([126,115,126,110],[code:root_dir()])\" -s erlang halt)\nERL_C_INCLUDE_DIR := \"$(ERLDIR)/usr/include\"\n\nifeq ($(TYPE), debug)\nCFLAGS += -Ddebug -DDEBUG -g -Wall -Wextra -Wswitch-default -Wswitch-enum -D_THREAD_SAFE\nCFLAGS += -D_REENTRANT -fno-common -I$(ERL_C_INCLUDE_DIR)\nWIN_DEBUG = -Ddebug -DDEBUG\nendif\n\nifeq ($(TYPE), release)\nCFLAGS += -Wall -Wextra -Wswitch-default -Wswitch-enum -D_THREAD_SAFE -D_REENTRANT -fno-common\nCFLAGS += -Wno-deprecated-declarations -Wno-missing-field-initializers -I$(ERL_C_INCLUDE_DIR)\nendif\n\n############### WINDOWS\n\nifeq ($(MINGW), Yes)\n  EXT = dll\n  ifeq ($(NOCL), true)\n    WIN32_GCC=Yes\n  else\n    ifneq ($(findstring Microsoft,$(shell cl.exe 2>&1)), )\n      WIN32_CL=Yes\n      MS2C = MSYS2_ARG_CONV_EXCL=*\n    else\n      WIN32_GCC=Yes\n    endif\n  endif\nendif\n\nifeq ($(WSLcross), true)\n  # Only support MCL for WSLcross for now\n  WIN32_CL=Yes\n  EXT = dll\nendif\n\nifeq ($(WIN32_CL), Yes)\n## Use Microsoft CL\n  CC=cl.exe\n  OUT_C = /Fo\n  ifeq ($(OPENCL_DIR), )\n    OPENCL_DIR = c:/msys64/opt/local/\n  endif\n  CFLAGS = /FS /Zi /nologo /W1 -DWIN32 -D__WIN32__ $(WIN_DEBUG)\n  CFLAGS += /I$(OPENCL_DIR)/include /I$(ERL_C_INCLUDE_DIR)\n  LD_SHARED=link.exe /DLL\n  OUT_L=/OUT:\n  ifeq ($(WORDSIZE), 32)\n    LDFLAGS += /NOLOGO $(OPENCL_DIR)/lib/x86/OpenCL.lib\n  else\n    CFLAGS  += -DWIN_X64\n    LDFLAGS += /NOLOGO $(OPENCL_DIR)/lib/x64/OpenCL.lib\n  endif\nendif\n\nifeq ($(WIN32_GCC), Yes)\n  #CC=gcc\n  CFLAGS += -D__WIN32__ $(WIN_DEBUG)\n  ifeq ($(OPENCL_DIR), )\n    OPENCL_DIR = /opt/local/\n  endif\n\n  ifeq ($(WORDSIZE), 32)\n    CFLAGS += -shared -I$(OPENCL_DIR)/include -m32 -DWIN32\n    LDFLAGS += -L$(OPENCL_DIR)/lib/x86 -lOpenCL\n  endif\n  ifeq ($(WORDSIZE), 64)\n    CFLAGS += -shared -I$(OPENCL_DIR)/include -m64 -DWIN32\n    CFLAGS += -DWIN_X64\n    LDFLAGS += -L$(OPENCL_DIR)/lib/x64 -lOpenCL\n  endif\n\n  LD_SHARED\t:= $(CC) -shared\n## Optimizations is broken on mingw 4.4.0 (it crashes with it on)\n  GCC_VERSION = $(shell gcc -dumpversion)\n  ifneq ($(GCC_VERSION), 4.4.0)\n    CFLAGS += -O3\n  endif\nendif\n############### WINDOWS end\n\nCL_NIF = $(PRIVDIR)/cl_nif.$(EXT)\n\nCL_NIF_OBJS = \\\n\tcl_nif.$(OBJ) \\\n\tcl_hash.$(OBJ)\n\nCL_NIF_SRC = \\\n\tcl_nif.c \\\n\tcl_hash.c\n\nall:\n\t$(MAKE) nif TYPE=release\n\ndebug:\n\t$(MAKE) nif TYPE=debug\n\nclean:\n\trm -f $(CL_NIF_OBJS)\n\trm -f $(CL_NIF)\n\nrelease:\n\t$(MAKE) nif TYPE=release\n\nnif: $(CL_NIF)\n\ncl_nif.$(OBJ): cl_hash.h\n\nclean_internal:\n\t-rm -f *.$(OBJ)\n\t-rm -f $(PRIVDIR)/*.$(EXT)\n\n%.$(OBJ): %.c\n\t$(MS2C) $(CC) -c $(OUT_C)$@ $(CFLAGS) $<\n\n$(CL_NIF): $(OCL_LIB) $(CL_NIF_OBJS)\n\t@mkdir -p $(PRIVDIR)\n\t$(MS2C) $(LD_SHARED) $(OUT_L)$@ $(CL_NIF_OBJS) $(LDFLAGS)\n\n"
  },
  {
    "path": "c_src/cl_hash.c",
    "content": "/****** BEGIN COPYRIGHT *******************************************************\n *\n * Copyright (C) 2007 - 2012, Rogvall Invest AB, <tony@rogvall.se>\n *\n * This software is licensed as described in the file COPYRIGHT, which\n * you should have received as part of this distribution. The terms\n * are also available at http://www.rogvall.se/docs/copyright.txt.\n *\n * You may opt to use, copy, modify, merge, publish, distribute and/or sell\n * copies of the Software, and permit persons to whom the Software is\n * furnished to do so, under the terms of the COPYRIGHT file.\n *\n * This software is distributed on an \"AS IS\" basis, WITHOUT WARRANTY OF ANY\n * KIND, either express or implied.\n *\n ****** END COPYRIGHT ********************************************************/\n/*\n** Linear hash \n*/\n#include <stdio.h>\n#include <stdlib.h>\n#include <memory.h>\n\n#include \"cl_hash.h\"\n\n#define LHASH_SZEXP   8\n#define LHASH_SEGSZ   (1 << LHASH_SZEXP)\n#define LHASH_SZMASK  ((1 << LHASH_SZEXP)-1)\n\n#define LHASH_SEG(i)  ((i)>>LHASH_SZEXP)\n#define LHASH_POS(i)  ((i)&LHASH_SZMASK)\n\n#define LHASH_SEG_LEN         256   /* When growing init segs */\n#define LHASH_SEG_INCREAMENT  128   /* Number of segments to grow */\n\n#define LHASH_BUCKET(lh, i) (lh)->seg[LHASH_SEG(i)][LHASH_POS(i)]\n\n#define LHASH_IX(lh, hval) \\\n    (((((hval) & (lh)->szm)) < (lh)->p) ? \\\n       ((hval) & (((lh)->szm << 1) | 1)) : \\\n       (((hval) & (lh)->szm)))\n\n#ifndef WIN32\n#define INLINE inline\n#else\n#define INLINE\n#endif\n\nstatic lhash_bucket_t** lhash_alloc_seg(int seg_sz)\n{\n    lhash_bucket_t** bp;\n    int sz = sizeof(lhash_bucket_t*)*seg_sz;\n\n    bp = (lhash_bucket_t**) malloc(sz);\n    memset(bp, 0, sz);\n    return bp;\n}\n\nINLINE static lhash_bucket_t** lhash_HLOOKUP(lhash_t* lh,\n\t\t\t\t\t     lhash_value_t hval,\n\t\t\t\t\t     void* key)\n{\n    int ix = LHASH_IX(lh, hval);\n    lhash_bucket_t** bpp = &LHASH_BUCKET(lh, ix);\n    lhash_bucket_t* b = *bpp;\n\n    while(b != (lhash_bucket_t*) 0) {\n\tif ((b->hvalue == hval) && (lh->func.cmp(key, (void*) b) == 0))\n\t    return bpp;\n\tbpp = &b->next;\n\tb = b->next;\n    }\n    return bpp;\n}\n\n/* scan bucket for key return bucket */\nINLINE static lhash_bucket_t** lhash_LOOKUP(lhash_t* lh, void* key)\n{\n    return lhash_HLOOKUP(lh, lh->func.hash(key), key);\n}\n\n\nlhash_t* lhash_init(lhash_t* lh, char* name, int thres, lhash_func_t* func)\n{\n    lhash_bucket_t*** bp;\n\n    if (!(bp = (lhash_bucket_t***) malloc(sizeof(lhash_bucket_t**))))\n\treturn 0;\n    lh->func    = *func;\n    lh->is_allocated = 0;\n    lh->name = name;\n    lh->thres = thres;\n    lh->szm = LHASH_SZMASK;\n    lh->nactive = LHASH_SEGSZ;\n    lh->nitems = 0;\n    lh->p = 0;\n    lh->nsegs = 1;\n    lh->seg = bp;\n    lh->seg[0] = lhash_alloc_seg(LHASH_SEGSZ);\n    lh->nslots = LHASH_SEGSZ;\n    lh->n_seg_alloc = 1;\n    lh->n_seg_free  = 0;\n    lh->n_resize    = 0;\n    return lh;\n}\n\n\nstatic void lhash_grow(lhash_t* lh)\n{\n    lhash_bucket_t** bp;\n    lhash_bucket_t** bps;\n    lhash_bucket_t* b;\n    unsigned int ix;\n    unsigned int nszm = (lh->szm << 1) | 1;\n\n    if (lh->nactive >= lh->nslots) {\n\t/* Time to get a new array */\n\tif (LHASH_POS(lh->nactive) == 0) {\n\t    unsigned int six = LHASH_SEG(lh->nactive);\n\t    if (six == lh->nsegs) {\n\t\tint i, sz;\n\n\t\tif (lh->nsegs == 1)\n\t\t    sz = LHASH_SEG_LEN;\n\t\telse\n\t\t    sz = lh->nsegs + LHASH_SEG_INCREAMENT;\n\t\tlh->seg = (lhash_bucket_t***) realloc(lh->seg,\n\t\t\t\t\t\t      sizeof(lhash_bucket_t**)*sz);\n\t\tlh->nsegs = sz;\n\t\tlh->n_resize++;\n\t\tfor (i = six+1; i < sz; i++)\n\t\t    lh->seg[i] = 0;\n\t    }\n\t    lh->seg[six] = lhash_alloc_seg(LHASH_SEGSZ);\n\t    lh->nslots += LHASH_SEGSZ;\n\t    lh->n_seg_alloc++;\n\t}\n    }\n\n    ix = lh->p;\n    bp = &LHASH_BUCKET(lh, ix);\n    ix += (lh->szm+1);\n    bps = &LHASH_BUCKET(lh, ix);\n    b = *bp;\n\n    while (b != 0) {\n\tix = b->hvalue & nszm;\n\n\tif (ix == lh->p)\n\t    bp = &b->next;          /* object stay */\n\telse {\n\t    *bp = b->next;  \t    /* unlink */\n\t    b->next = *bps;         /* link */\n\t    *bps = b;\n\t}\n\tb = *bp;\n    }\n\n    lh->nactive++;\n    if (lh->p == lh->szm) {\n\tlh->p = 0;\n\tlh->szm = nszm;\n    }\n    else\n\tlh->p++;\n}\n\n/*\n** Shrink the hash table\n** Remove segments if they are empty\n** but do not reallocate the segment index table !!!\n*/\nstatic void lhash_shrink(lhash_t* lh)\n{\n    lhash_bucket_t** bp;\n\n    if (lh->nactive == LHASH_SEGSZ)\n\treturn;\n\n    lh->nactive--;\n    if (lh->p == 0) {\n\tlh->szm >>= 1;\n\tlh->p = lh->szm;\n    }\n    else\n\tlh->p--;\n\n    bp = &LHASH_BUCKET(lh, lh->p);\n    while(*bp != 0) \n\tbp = &(*bp)->next;\n\n    *bp = LHASH_BUCKET(lh, lh->nactive);\n    LHASH_BUCKET(lh, lh->nactive) = 0;\n\n    if ((lh->nactive & LHASH_SZMASK) == LHASH_SZMASK) {\n\tint six = LHASH_SEG(lh->nactive)+1;\n\n\tfree(lh->seg[six]);\n\tlh->seg[six] = 0;\n\tlh->nslots -= LHASH_SEGSZ;\n\tlh->n_seg_free++;\n    }\n}\n\nlhash_t* lhash_new(char* name, int thres, lhash_func_t* func)\n{\n    lhash_t* tp;\n\n    if (!(tp = (lhash_t*) malloc(sizeof(lhash_t))))\n\treturn 0;\n    \n    if (!lhash_init(tp, name, thres, func)) {\n\tfree(tp);\n\treturn 0;\n    }\n    tp->is_allocated = 1;\n    return tp;\n}\n\n\nvoid lhash_delete(lhash_t* lh)\n{\n    lhash_bucket_t*** sp = lh->seg;\n    int n = lh->nsegs;\n\n    while(n--) {\n\tlhash_bucket_t** bp = *sp;\n\tif (bp != 0) {\n\t    int m = LHASH_SEGSZ;\n\t    while(m--) {\n\t\tlhash_bucket_t* p = *bp++;\n\t\twhile(p != 0) {\n\t\t    lhash_bucket_t* next = p->next;\n\t\t    if (lh->func.release)\n\t\t\tlh->func.release((void*) p);\n\t\t    p = next;\n\t\t}\n\t    }\n\t    free(*sp);\n\t}\n\tsp++;\n    }\n    free(lh->seg);\n\n    if (lh->is_allocated)\n\tfree(lh);\n}\n\nvoid* lhash_insert_new(lhash_t* lh, void* key, void* data)\n{\n    lhash_value_t hval = lh->func.hash(key);\n    lhash_bucket_t** bpp = lhash_HLOOKUP(lh, hval, key);\n    lhash_bucket_t* b = *bpp;\n\n    if (b) {\n\t/* release data if copy function is not defined */\n\tif (!lh->func.copy) {\n\t    if (lh->func.release) lh->func.release(data);\n\t}\n\treturn 0;\n    }\n    b = (lhash_bucket_t*) (lh->func.copy ? lh->func.copy(data) : data);\n    b->hvalue = hval;\n    b->next = *bpp;\n    *bpp = b;\n    lh->nitems++;\n\n    if ((lh->nitems / lh->nactive) >= lh->thres)\n\tlhash_grow(lh);\n    return (void*) b;\n}\n\nvoid* lhash_Insert(lhash_t* lh, void* key, void* data)\n{\n    lhash_value_t hval = lh->func.hash(key);\n    lhash_bucket_t** bpp = lhash_HLOOKUP(lh, hval, key);\n    lhash_bucket_t* b = *bpp;\n\n    if (b) {\n\tlhash_bucket_t* b_next = b->next;\n\tif (lh->func.release) lh->func.release(b);\n\tb = (lhash_bucket_t*) (lh->func.copy ? lh->func.copy(data) : data);\n\tb->hvalue = hval;\n\tb->next = b_next;\n\t*bpp = b;\n    }\n    else {\n\tb = (lhash_bucket_t*) (lh->func.copy ? lh->func.copy(data) : data);\n\tb->hvalue = hval;\n\tb->next   = 0;\n\t*bpp = b;\n\tlh->nitems++;\n\n\tif ((lh->nitems / lh->nactive) >= lh->thres)\n\t    lhash_grow(lh);\n    }\n    return (void*) b;\n\n}\n\n\nvoid* lhash_lookup(lhash_t* lh, void* key)\n{\n    lhash_bucket_t** bpp = lhash_LOOKUP(lh, key);\n    return *bpp;\n}\n\n/*\n** Erase an item\n*/\nvoid* lhash_erase(lhash_t* lh, void* key)\n{\n    lhash_bucket_t** bpp = lhash_LOOKUP(lh, key);\n    lhash_bucket_t* b = *bpp;\n\n    if (b) {\n\t*bpp = b->next;  /* unlink */\n\tif (lh->func.release) lh->func.release((void*) b);\n\tlh->nitems--;\n\tif ((lh->nitems / lh->nactive) < lh->thres)\n\t    lhash_shrink(lh);\n    }\n    return (void*)b;\n}\n\nvoid lhash_each(lhash_t* lh, void (elem)(lhash_t* lh, void* elem, void* arg),\n\t       void* arg)\n{\n    int i;\n    int nslots = lh->nslots;\n\n    for (i = 0; i < nslots; i++) {\n\tlhash_bucket_t* list = LHASH_BUCKET(lh, i);\n\twhile(list) {\n\t    lhash_bucket_t* next = list->next;\n\t    elem(lh, (void*) list, arg);\n\t    list = next;\n\t}\n    }\n}\n\n\nvoid lhash_info(lhash_t* lh)\n{\n    unsigned int i;\n    int depth = 0;\n\n    for (i = 0; i < lh->nslots; i++) {\n\tlhash_bucket_t* list = LHASH_BUCKET(lh, i);\n\tint d = 0;\n\n\twhile(list) {\n \t    list = list->next;\n\t    d++;\n\t}\n\tif (d > depth)\n\t    depth = d;\n    }\n    printf(\"  Name: %s\\r\\n\", lh->name);\n    printf(\"  Size: %d\\r\\n\", lh->szm+1);\n    printf(\"Active: %d\\r\\n\", lh->nactive);    \n    printf(\" Split: %d\\r\\n\", lh->p);\n    printf(\" Items: %d\\r\\n\", lh->nitems);\n    printf(\" Slots: %d\\r\\n\", lh->nslots);\n    printf(\"  Segs: %d\\r\\n\", lh->nsegs);\n    printf(\" Thres: %d\\r\\n\", lh->thres);\n    printf(\" Ratio: %e\\r\\n\", (float) lh->nitems / (float) lh->nactive);\n    printf(\"   Max: %d\\r\\n\", depth);\n    printf(\"Resize: %d\\r\\n\", lh->n_resize);\n    printf(\" Alloc: %d\\r\\n\", lh->n_seg_alloc);\n    printf(\"  Free: %d\\r\\n\", lh->n_seg_free);\n}\n"
  },
  {
    "path": "c_src/cl_hash.h",
    "content": "/****** BEGIN COPYRIGHT *******************************************************\n *\n * Copyright (C) 2007 - 2012, Rogvall Invest AB, <tony@rogvall.se>\n *\n * This software is licensed as described in the file COPYRIGHT, which\n * you should have received as part of this distribution. The terms\n * are also available at http://www.rogvall.se/docs/copyright.txt.\n *\n * You may opt to use, copy, modify, merge, publish, distribute and/or sell\n * copies of the Software, and permit persons to whom the Software is\n * furnished to do so, under the terms of the COPYRIGHT file.\n *\n * This software is distributed on an \"AS IS\" basis, WITHOUT WARRANTY OF ANY\n * KIND, either express or implied.\n *\n ****** END COPYRIGHT ********************************************************/\n#ifndef __ECL_HASH_H__\n#define __ECL_HASH_H__\n\n#include <stdint.h>\n\ntypedef uintptr_t lhash_value_t;\n\ntypedef struct _lhash_bucket_t {\n    struct _lhash_bucket_t* next;\n    lhash_value_t hvalue;\n} lhash_bucket_t;\n\ntypedef struct {\n    lhash_value_t (*hash)(void*);  // calculate hash\n    int (*cmp)(void*, void*);      // compare data items\n    void (*release)(void*);        // data release (free)\n    void* (*copy)(void*);          // copy (may be used with insert)\n} lhash_func_t;    \n\ntypedef struct {\n    lhash_func_t func;         // functions\n\n    int is_allocated;\n    char* name;\n\n    unsigned int thres;        // Medium bucket chain len, for grow\n    unsigned int szm;          // current size mask\n    unsigned int nactive;      // Number of \"active\" slots\n    unsigned int nslots;       // Total number of slots\n    unsigned int nitems;       // Total number of items\n    unsigned int p;            // Split position\n    unsigned int nsegs;        // Number of segments\n    unsigned int n_resize;     // Number of index realloc calls\n    unsigned int n_seg_alloc;  // Number of segment allocations\n    unsigned int n_seg_free;   // Number of segment destroy\n    lhash_bucket_t*** seg;\n} lhash_t;\n\nextern lhash_t* lhash_new(char* name, int thres, lhash_func_t* func);\nextern lhash_t* lhash_init(lhash_t* lh, char* name, int thres, \n\t\t\t   lhash_func_t* func);\nextern void  lhash_delete(lhash_t* lh);\nextern void* lhash_lookup(lhash_t* lh, void* key);\nextern void* lhash_insert(lhash_t* lh, void* key, void* data);\nextern void* lhash_insert_new(lhash_t* lh, void* key, void* data);\nextern void* lhash_erase(lhash_t* lh, void* key);\nextern void  lhash_each(lhash_t* lh, \n\t\t\tvoid (elem)(lhash_t* lh, void* elem, void* arg),\n\t\t\tvoid* arg);\nextern void lhash_Info(lhash_t* lh);\n\n#endif\n"
  },
  {
    "path": "c_src/cl_nif.c",
    "content": "/****** BEGIN COPYRIGHT *******************************************************\n *\n * Copyright (C) 2007 - 2012, Rogvall Invest AB, <tony@rogvall.se>\n *\n * This software is licensed as described in the file COPYRIGHT, which\n * you should have received as part of this distribution. The terms\n * are also available at http://www.rogvall.se/docs/copyright.txt.\n *\n * You may opt to use, copy, modify, merge, publish, distribute and/or sell\n * copies of the Software, and permit persons to whom the Software is\n * furnished to do so, under the terms of the COPYRIGHT file.\n *\n * This software is distributed on an \"AS IS\" basis, WITHOUT WARRANTY OF ANY\n * KIND, either express or implied.\n *\n ****** END COPYRIGHT ********************************************************/\n//\n// NIF interface for OpenCL binding\n//\n\n#include <stdio.h>\n\n#ifndef WIN32\n#include <stdbool.h>\n#include <stdlib.h>\n#include <stdarg.h>\n#include <stdint.h>\n#include <string.h>\n#include <errno.h>\n#include <dlfcn.h>\n#else\n#include <windows.h>\n#endif\n\n#define CL_USE_DEPRECATED_OPENCL_1_1_APIS 1\n#define CL_TARGET_OPENCL_VERSION 210\n\n#ifdef DARWIN\n#include <OpenCL/opencl.h>\n#else\n#include <CL/cl.h>\n#include <CL/cl_ext.h>\n#endif\n\n// Old cl_platform doesn't have the CL_CALLBACK\n#ifndef CL_CALLBACK\n#define CL_CALLBACK\n#endif\n\n#ifdef WIN32\ntypedef cl_bool bool;\n#define true 1\n#define false 0\n#endif \n\n\n#ifdef WIN_X64\n#define ecl_get_sizet(a1,a2,a3) enif_get_uint64(a1,a2,a3)\n#define ecl_make_sizet(a1,a2) enif_make_uint64(a1,a2)\n#else\n#define ecl_get_sizet(a1,a2,a3) enif_get_ulong(a1,a2,(unsigned long*)a3)\n#define ecl_make_sizet(a1,a2) enif_make_ulong(a1,a2)\n#endif\n\n#define UNUSED(a) ((void) a)\n\n#include \"erl_nif.h\"\n#include \"cl_hash.h\"\n\n#define sizeof_array(a) (sizeof(a) / sizeof(a[0]))\n\n// #define DEBUG\n\n#ifdef DEBUG\n#include <stdarg.h>\nstatic void ecl_emit_error(char* file, int line, ...);\n#define DBG(...) ecl_emit_error(__FILE__,__LINE__,__VA_ARGS__)\n#else\n#define DBG(...)\n#endif\n\n#define CL_ERROR(...) ecl_emit_error(__FILE__,__LINE__,__VA_ARGS__)\n\n// soft limits\n#define MAX_INFO_SIZE   1024\n#define MAX_DEVICES     128\n#define MAX_PLATFORMS   128\n#define MAX_OPTION_LIST 1024\n#define MAX_KERNEL_NAME 1024\n#define MAX_KERNELS     1024\n#define MAX_SOURCES     128\n#define MAX_WAIT_LIST   128\n#define MAX_WORK_SIZE   3\n#define MAX_IMAGE_FORMATS 128\n#define MAX_MEM_OBJECTS 128\n\n// Atom macros\n#define ATOM(name) atm_##name\n\n#define DECL_ATOM(name) \\\n    ERL_NIF_TERM atm_##name = 0\n\n// require env in context (ugly)\n#define LOAD_ATOM(name)\t\t\t\\\n    atm_##name = enif_make_atom(env,#name)\n\n#define LOAD_ATOM_STRING(name,string)\t\t\t\\\n    atm_##name = enif_make_atom(env,string)\n\n// Wrapper to handle reource atom name etc.\ntypedef struct {\n    char* name;\n    ERL_NIF_TERM type;         // resource atom name\n    ErlNifResourceType* res;   // the resource type\n    size_t              size;  // \"real\" object size\n} ecl_resource_t;\n\nstruct _ecl_object_t;\n\ntypedef struct _ecl_platform_t {\n    struct _ecl_object_t* o_platform;\n    cl_uint ndevices;\n    struct _ecl_object_t** o_device;\n} ecl_platform_t;\n\nstruct _ecl_env_t;\n\ntypedef struct _ecl_object_t {\n    lhash_bucket_t        hbucket;   // inheritance: map: cl->ecl\n    struct _ecl_env_t*    env;\n    cl_int                version;\n    struct _ecl_object_t* parent;     // parent resource object\n    union {\n\tcl_platform_id   platform;\n\tcl_device_id     device;\n\tcl_context       context;\n\tcl_command_queue queue;\n\tcl_mem           mem;\n\tcl_sampler       sampler;\n\tcl_program       program;\n\tcl_kernel        kernel;\n\tcl_event         event;\n\tvoid*            opaque;\n    };\n} ecl_object_t;\n\n// \"inherits\" ecl_object_t and add special binary objects (read/write)\ntypedef struct _ecl_event_t {\n    ecl_object_t obj;       // FIXED place for inhertiance\n    bool          rd;       // Read binary operation\n    bool          rl;       // Do not release if true\n    ErlNifEnv*    bin_env;  // environment to hold binary term data\n    ErlNifBinary* bin;      // read/write data\n} ecl_event_t;\n\n#define KERNEL_ARG_OTHER   0\n#define KERNEL_ARG_MEM     1\n#define KERNEL_ARG_SAMPLER 2\n\n// This is a special construct inorder to kee\ntypedef struct {\n    int type;    // 0=other, 1=mem, 2=samper\n    union {\n\tcl_mem      mem;\n\tcl_sampler  sampler;\n\tvoid*       other;\n\tvoid*       value;\n    };\n} ecl_kernel_arg_t;\n\n// \"inherits\" ecl_object_t and reference count kernel args\ntypedef struct _ecl_kernel_t {\n    ecl_object_t      obj;       // FIXED place for inhertiance\n    cl_uint           num_args;  // number of arguments used by the kernel\n    ecl_kernel_arg_t* arg;       // array of current args \n} ecl_kernel_t;\n\n\ntypedef enum {\n    OCL_CHAR,          // cl_char\n    OCL_UCHAR,         // cl_uchar\n    OCL_SHORT,         // cl_short\n    OCL_USHORT,        // cl_ushort\n    OCL_INT,           // cl_int\n    OCL_UINT,          // cl_uint\n    OCL_LONG,          // cl_long\n    OCL_ULONG,         // cl_ulong\n    OCL_HALF,          // cl_half\n    OCL_FLOAT,         // cl_float\n    OCL_DOUBLE,        // cl_double\n    OCL_BOOL,          // cl_bool \n    OCL_STRING,        // cl_char*\n    OCL_BITFIELD,      // cl_ulong\n    OCL_ENUM,          // cl_int\n    OCL_POINTER,       // void*\n    OCL_SIZE,          // size_t\n    OCL_PLATFORM,      // void*\n    OCL_DEVICE,        // void*\n    OCL_CONTEXT,       // void*\n    OCL_PROGRAM,       // void*\n    OCL_COMMAND_QUEUE, // void*\n    OCL_IMAGE_FORMAT,   // cl_image_format\n#if CL_VERSION_1_2 == 1\n    OCL_DEVICE_PARTITION, // cl_device_partition_property\n#endif\n    OCL_NUM_TYPES\n} ocl_type_t;\n\n#define OCL_DEVICE_TYPE                  OCL_BITFIELD\n#define OCL_DEVICE_FP_CONFIG             OCL_BITFIELD\n#define OCL_DEVICE_GLOBAL_MEM_CACHE_TYPE OCL_ENUM\n#define OCL_PLATFORM_INFO                OCL_UINT\n#define OCL_DEVICE_INFO                  OCL_UINT\n#define OCL_DEVICE_EXEC_CAPABILITIES     OCL_BITFIELD\n#define OCL_QUEUE_PROPERTIES             OCL_BITFIELD\n#define OCL_DEVICE_LOCAL_MEM_TYPE        OCL_ENUM\n#define OCL_MEM_OBJECT_TYPE              OCL_ENUM\n#define OCL_MEM_FLAGS                    OCL_BITFIELD\n#define OCL_SAMPLER_ADDRESSING_MODE      OCL_ENUM\n#define OCL_SAMPLER_FILTER_MODE          OCL_ENUM\n#define OCL_BUILD_STATUS                 OCL_ENUM\n#define OCL_DEVICE_DOUBLE_FP_CONFIG      OCL_BITFIELD\n#define OCL_PROGRAM_BINARY_TYPE          OCL_ENUM\n\ntypedef struct {\n    ERL_NIF_TERM*  key;\n    ErlNifUInt64   value;\n} ecl_kv_t;\n\ntypedef struct {\n    ERL_NIF_TERM*  info_key;    // Atom\n    cl_uint        info_id;     // Information\n    bool           is_array;    // return type is a vector of data\n    ocl_type_t     info_type;   // info data type\n    void*          extern_info; // Encode/Decode enum/bitfields\n    size_t         def_size;    // Def size in bytes (if == 0 query driver)\n} ecl_info_t;\n\ntypedef enum {\n    ECL_MESSAGE_STOP,           // time to die\n    ECL_MESSAGE_UPGRADE,        // time to upgrade\n    ECL_MESSAGE_SYNC,           // synk\n    ECL_MESSAGE_SYNC_ACK,       // synk return message\n    ECL_MESSAGE_FLUSH,          // call clFlush\n    ECL_MESSAGE_FINISH,         // call clFinish\n    ECL_MESSAGE_WAIT_FOR_EVENT  // call clWaitForEvents (only one event!)\n} ecl_message_type_t;\n\nstruct _ecl_thread_t;\n\ntypedef struct ecl_message_t\n{\n    ecl_message_type_t type;\n    ErlNifPid        sender;  // sender pid\n    ErlNifEnv*          env;  // message environment (ref, bin's etc)\n    ERL_NIF_TERM        ref;  // ref (in env!)\n    union {\n\tecl_object_t* queue;  // ECL_MESSAGE_FLUSH/ECL_MESSAGE_FINISH\n\tecl_event_t* event;   // ECL_MESSAGE_WAIT_FOR_EVENT\n\tvoid* (*upgrade)(void*); // ECL_MESSAGE_UPGRADE\n    };\n} ecl_message_t;\n\ntypedef struct _ecl_qlink_t {\n    struct _ecl_qlink_t* next;\n    ecl_message_t mesg;\n} ecl_qlink_t;\n\n#define MAX_QLINK  8  // pre-allocated qlinks\n\ntypedef struct {\n    ErlNifMutex*   mtx;\n    ErlNifCond*    cv;\n    int len;\n    ecl_qlink_t*   front;   // pick from front\n    ecl_qlink_t*   rear;    // insert at rear\n    ecl_qlink_t*   free;    // free list in ql\n    ecl_qlink_t  ql[MAX_QLINK];  // \"pre\" allocated qlinks\n} ecl_queue_t;\n\ntypedef struct _ecl_thread_t {\n    ErlNifTid   tid;     // thread id\n    ecl_queue_t q;       // message queue\n    void*       arg;     // thread init argument\n} ecl_thread_t;\n\n// \"inherits\" ecl_object_t and add keep track of the context thread\ntypedef struct _ecl_context_t {\n    ecl_object_t obj;             // FIXED place for inhertiance\n    struct _ecl_context_t* next;  // next context in list\n    ecl_thread_t* thr;            // The context thread\n    int upgrade_count;            // upgrade tick\n} ecl_context_t;\n\ntypedef struct _ecl_env_t {\n    int         ref_count;  // ref count the load/upgrade/unload\n    lhash_t     ref;        // cl -> ecl\n    ErlNifRWLock* ref_lock; // lhash operation lock\n    ecl_queue_t q;          // sync queue\n    cl_uint nplatforms;\n    ecl_platform_t* platform;\n    ErlNifRWLock* context_list_lock;\n    ecl_context_t*  context_list;\n    cl_int icd_version;\n    int dirty_scheduler_support;\n} ecl_env_t;\n\ntypedef struct _ecl_func_t {\n    char* name;\n    void* func;\n    int   version;  // 10,11,12,20...\n} ecl_func_t;\n\n//\n// ECL_FUNC function list is used multiple times by\n// updating the meaning of ECL_FUNC\n//\n#define ECL_FUNC_LIST\t\t     \\\n    ECL_FUNC(clGetPlatformIDs,10),\t\t\\\n\tECL_FUNC(clGetPlatformInfo,10),\t\\\n\tECL_FUNC(clGetDeviceIDs,10),\t\t\\\n\tECL_FUNC(clGetDeviceInfo,10),\t\t\\\n\tECL_FUNC(clCreateSubDevices,12),\t\\\n\tECL_FUNC(clRetainDevice,12),\t\t\\\n\tECL_FUNC(clReleaseDevice,12),\t\t\\\n\tECL_FUNC(clCreateContext,10),\t\t\\\n\tECL_FUNC(clCreateContextFromType,10),\t\\\n\tECL_FUNC(clRetainContext,10),\t\t\\\n\tECL_FUNC(clReleaseContext,10),\t\t\\\n\tECL_FUNC(clGetContextInfo,10),\t\t\\\n\tECL_FUNC(clCreateCommandQueue,10),\t\\\n\tECL_FUNC(clRetainCommandQueue,10),\t\\\n\tECL_FUNC(clReleaseCommandQueue,10),\t\\\n\tECL_FUNC(clGetCommandQueueInfo,10),\t\\\n\tECL_FUNC(clCreateBuffer,10),\t\t\\\n\tECL_FUNC(clCreateSubBuffer,11),\t\\\n\tECL_FUNC(clCreateImage,12),\t\t\\\n\tECL_FUNC(clCreatePipe,20),\t\t\\\n\tECL_FUNC(clRetainMemObject,10),\t\\\n\tECL_FUNC(clReleaseMemObject,10),     \\\n\tECL_FUNC(clGetSupportedImageFormats,10),\t\\\n\tECL_FUNC(clGetMemObjectInfo,10),\t\t\\\n\tECL_FUNC(clGetImageInfo,10),\t\t\t\\\n\tECL_FUNC(clGetPipeInfo,20),\t\t\t\\\n\tECL_FUNC(clSetMemObjectDestructorCallback,11),\t\\\n\tECL_FUNC(clSVMAlloc,20),\t\t   \\\n\tECL_FUNC(clSVMFree,20),\t\t\t   \\\n\tECL_FUNC(clCreateSampler,10),\t\t   \\\n\tECL_FUNC(clCreateSamplerWithProperties,20),\t\\\n\tECL_FUNC(clRetainSampler,10),\t\t\t\\\n\tECL_FUNC(clReleaseSampler,10),\t\t\t\\\n\tECL_FUNC(clGetSamplerInfo,10),\t\t\t\\\n\tECL_FUNC(clCreateProgramWithSource,10),\t\t\\\n\tECL_FUNC(clCreateProgramWithBinary,10),\t\t\\\n\tECL_FUNC(clCreateProgramWithBuiltInKernels,12), \\\n\tECL_FUNC(clRetainProgram,10),\t\t\t\\\n\tECL_FUNC(clReleaseProgram,10),\t\t\t\\\n\tECL_FUNC(clBuildProgram,10),\t\t\t\\\n\tECL_FUNC(clCompileProgram,12),\t\t\t\\\n\tECL_FUNC(clLinkProgram,12),\t\t\t\\\n\tECL_FUNC(clUnloadPlatformCompiler,12),\t\t\\\n\tECL_FUNC(clGetProgramInfo,10),\t\t\t\\\n\tECL_FUNC(clGetProgramBuildInfo,10),\t\t\\\n\tECL_FUNC(clCreateKernel,10),\t\t\t\\\n\tECL_FUNC(clCreateKernelsInProgram,10),\t\t\\\n\tECL_FUNC(clSetKernelArg,10),\t\t\t\\\n\tECL_FUNC(clSetKernelArgSVMPointer,20),\t\t\\\n\tECL_FUNC(clSetKernelExecInfo,20),\t\t\\\n\tECL_FUNC(clRetainKernel,10),\t\t\t\\\n\tECL_FUNC(clReleaseKernel,10),\t\t\t\\\n\tECL_FUNC(clGetKernelInfo,10),\t\t\t\\\n\tECL_FUNC(clGetKernelArgInfo,12),\t\t\\\n\tECL_FUNC(clGetKernelWorkGroupInfo,10),\t\t\\\n\tECL_FUNC(clWaitForEvents,10),\t\t\t\\\n\tECL_FUNC(clGetEventInfo,10),\t\t\t\\\n\tECL_FUNC(clCreateUserEvent,11),\t\t\t\\\n\tECL_FUNC(clRetainEvent,10),\t\t\t\\\n\tECL_FUNC(clReleaseEvent,10),\t\t\t\\\n\tECL_FUNC(clSetUserEventStatus,11),\t\t\\\n\tECL_FUNC(clSetEventCallback,11),\t\t\\\n\tECL_FUNC(clGetEventProfilingInfo,10),\t\t\\\n\tECL_FUNC(clFlush,10),\t\t\t\t\\\n\tECL_FUNC(clFinish,10),\t\t\t\t\\\n\tECL_FUNC(clEnqueueReadBuffer,10),\t\t\\\n\tECL_FUNC(clEnqueueReadBufferRect,11),\t\t\\\n\tECL_FUNC(clEnqueueWriteBuffer,10),\t\t\\\n\tECL_FUNC(clEnqueueWriteBufferRect,11),\t\t\\\n\tECL_FUNC(clEnqueueFillBuffer,12),\t\t\\\n\tECL_FUNC(clEnqueueCopyBuffer,10),\t\t\\\n\tECL_FUNC(clEnqueueCopyBufferRect,11),\t\t\\\n\tECL_FUNC(clEnqueueReadImage,10),\t\t\\\n\tECL_FUNC(clEnqueueWriteImage,10),\t\t\\\n\tECL_FUNC(clEnqueueFillImage,12),\t\t\\\n\tECL_FUNC(clEnqueueCopyImage,10),\t\t\\\n\tECL_FUNC(clEnqueueCopyImageToBuffer,10),\t\\\n\tECL_FUNC(clEnqueueCopyBufferToImage,10),\t\\\n\tECL_FUNC(clEnqueueMapBuffer,10),\t\t\\\n\tECL_FUNC(clEnqueueMapImage,10),\t\t\t\\\n\tECL_FUNC(clEnqueueUnmapMemObject,10),\t\t\\\n\tECL_FUNC(clEnqueueMigrateMemObjects,12),\t\\\n\tECL_FUNC(clEnqueueNDRangeKernel,10),\t\t\\\n\tECL_FUNC(clEnqueueTask,10),\t\t\t\\\n\tECL_FUNC(clEnqueueNativeKernel,10),\t\t\\\n\tECL_FUNC(clEnqueueMarkerWithWaitList,12),\t\\\n\tECL_FUNC(clEnqueueBarrierWithWaitList,12),\t\\\n\tECL_FUNC(clEnqueueSVMFree,20),\t\t\t\\\n\tECL_FUNC(clEnqueueSVMMemcpy,20),\t\t\\\n\tECL_FUNC(clEnqueueSVMMemFill,20),\t\t\\\n\tECL_FUNC(clEnqueueSVMMap,20),\t\t\t\t\\\n\tECL_FUNC(clEnqueueSVMUnmap,20),\t\t\t\t\\\n\tECL_FUNC(clGetExtensionFunctionAddressForPlatform,12),\t\\\n\tECL_FUNC(clCreateImage2D,10),\t\t\t\t\\\n\tECL_FUNC(clCreateImage3D,10),\t\t\t\t\\\n\tECL_FUNC(clEnqueueMarker,10),\t\t\t\t\\\n\tECL_FUNC(clEnqueueWaitForEvents,10),\t\t\t\\\n\tECL_FUNC(clEnqueueBarrier,10),\t\t\t\t\\\n\tECL_FUNC(clUnloadCompiler,10),\t\t\t\t\\\n\tECL_FUNC(clGetExtensionFunctionAddress,10),\t\t\\\n\tECL_FUNC(clCreateProgramWithIL,21)\n\n#include \"ecl_types.h\"\n\n#undef  ECL_FUNC\n#define ECL_FUNC(nm,vsn) i_##nm\ntypedef enum {\n    ECL_FUNC_LIST\n} ecl_func_index_t;\n\n#undef  ECL_FUNC\n#define ECL_FUNC(nm,vsn)\t\t\t\\\n    [i_##nm] = { .name = #nm, .func = NULL, .version = (vsn) }\n\necl_func_t ecl_function[] = {\n    ECL_FUNC_LIST,\n    { NULL, NULL, 0 }\n};\n\n#define ECL_FUNC_PTR(nm) ecl_function[i_##nm].func\n#define ECL_FUNC_VERSION(nm) ecl_function[i_##nm].version\n#define ECL_CALL(nm) ((t_##nm)(ecl_function[i_##nm].func))\n\nstatic void* ecl_context_main(void* arg);\n\nstatic int ecl_load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info);\n\nstatic int ecl_upgrade(ErlNifEnv* env, void** priv_data, void** old_priv_data, \n\t\t\t ERL_NIF_TERM load_info);\n\nstatic void ecl_unload(ErlNifEnv* env, void* priv_data);\n\nstatic int ecl_load_dynfunctions(ecl_env_t* ecl);\n\nstatic ERL_NIF_TERM ecl_versions(ErlNifEnv* env, int argc, \n\t\t\t\t const ERL_NIF_TERM argv[]);\n\nstatic ERL_NIF_TERM ecl_noop(ErlNifEnv* env, int argc, \n\t\t\t    const ERL_NIF_TERM argv[]);\nstatic ERL_NIF_TERM ecl_noop_(ErlNifEnv* env, int argc, \n\t\t\t      const ERL_NIF_TERM argv[]);\n\nstatic ERL_NIF_TERM ecl_get_platform_ids(ErlNifEnv* env, int argc, \n\t\t\t\t\t const ERL_NIF_TERM argv[]);\n\nstatic ERL_NIF_TERM ecl_get_platform_info(ErlNifEnv* env, int argc, \n\t\t\t\t\t  const ERL_NIF_TERM argv[]);\n\nstatic ERL_NIF_TERM ecl_get_device_ids(ErlNifEnv* env, int argc, \n\t\t\t\t       const ERL_NIF_TERM argv[]);\n\n#if CL_VERSION_1_2 == 1\nstatic ERL_NIF_TERM ecl_create_sub_devices(ErlNifEnv* env, int argc,\n\t\t\t\t\t   const ERL_NIF_TERM argv[]);\n#endif\n\nstatic ERL_NIF_TERM ecl_get_device_info(ErlNifEnv* env, int argc, \n\t\t\t\t\tconst ERL_NIF_TERM argv[]);\n\nstatic ERL_NIF_TERM ecl_create_context(ErlNifEnv* env, int argc, \n\t\t\t\t       const ERL_NIF_TERM argv[]);\n\nstatic ERL_NIF_TERM ecl_get_context_info(ErlNifEnv* env, int argc, \n\t\t\t\t\t const ERL_NIF_TERM argv[]);\n\nstatic ERL_NIF_TERM ecl_create_queue(ErlNifEnv* env, int argc, \n\t\t\t\t     const ERL_NIF_TERM argv[]);\n\nstatic ERL_NIF_TERM ecl_get_queue_info(ErlNifEnv* env, int argc, \n\t\t\t\t       const ERL_NIF_TERM argv[]);\n\nstatic ERL_NIF_TERM ecl_create_buffer(ErlNifEnv* env, int argc, \n\t\t\t\t      const ERL_NIF_TERM argv[]);\n\n#if CL_VERSION_1_1 == 1\nstatic ERL_NIF_TERM ecl_create_sub_buffer(ErlNifEnv* env, int argc,\n\t\t\t\t\t  const ERL_NIF_TERM argv[]);\n#endif\n\nstatic ERL_NIF_TERM ecl_create_image2d(ErlNifEnv* env, int argc, \n\t\t\t\t       const ERL_NIF_TERM argv[]);\n\nstatic ERL_NIF_TERM ecl_create_image3d(ErlNifEnv* env, int argc, \n\t\t\t\t       const ERL_NIF_TERM argv[]);\n\n#if CL_VERSION_1_2 == 1\nstatic ERL_NIF_TERM ecl_create_image(ErlNifEnv* env, int argc,\n\t\t\t\t     const ERL_NIF_TERM argv[]);\n#endif\n\nstatic ERL_NIF_TERM ecl_get_supported_image_formats(ErlNifEnv* env, int argc, \n\t\t\t\t\t\t    const ERL_NIF_TERM argv[]);\n\nstatic ERL_NIF_TERM ecl_get_mem_object_info(ErlNifEnv* env, int argc, \n\t\t\t\t\t    const ERL_NIF_TERM argv[]);\n\nstatic ERL_NIF_TERM ecl_get_image_info(ErlNifEnv* env, int argc, \n\t\t\t\t       const ERL_NIF_TERM argv[]);\n\nstatic ERL_NIF_TERM ecl_create_sampler(ErlNifEnv* env, int argc, \n\t\t\t\t       const ERL_NIF_TERM argv[]);\n\nstatic ERL_NIF_TERM ecl_get_sampler_info(ErlNifEnv* env, int argc, \n\t\t\t\t\t const ERL_NIF_TERM argv[]);\n\nstatic ERL_NIF_TERM ecl_create_program_with_source(ErlNifEnv* env, int argc, \n\t\t\t\t\t\t   const ERL_NIF_TERM argv[]);\nstatic ERL_NIF_TERM ecl_create_program_with_binary(ErlNifEnv* env, int argc, \n\t\t\t\t\t\t   const ERL_NIF_TERM argv[]);\n#if CL_VERSION_2_1 == 1\nstatic ERL_NIF_TERM ecl_create_program_with_il(ErlNifEnv* env, int argc, \n\t\t\t\t\t       const ERL_NIF_TERM argv[]);\n#endif\n\n#if CL_VERSION_1_2 == 1\nstatic ERL_NIF_TERM ecl_create_program_with_builtin_kernels(\n    ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);\n#endif\nstatic ERL_NIF_TERM ecl_async_build_program(ErlNifEnv* env, int argc, \n\t\t\t\t\t    const ERL_NIF_TERM argv[]);\n\n#if CL_VERSION_1_2 == 1\nstatic ERL_NIF_TERM ecl_unload_platform_compiler(ErlNifEnv* env, int argc, \n\t\t\t\t\t\t const ERL_NIF_TERM argv[]);\nstatic ERL_NIF_TERM ecl_async_compile_program(ErlNifEnv* env, int argc,\n\t\t\t\t\t      const ERL_NIF_TERM argv[]);\nstatic ERL_NIF_TERM ecl_async_link_program(ErlNifEnv* env, int argc,\n\t\t\t\t\t   const ERL_NIF_TERM argv[]);\n#endif\n\nstatic ERL_NIF_TERM ecl_unload_compiler(ErlNifEnv* env, int argc, \n\t\t\t\t\tconst ERL_NIF_TERM argv[]);\nstatic ERL_NIF_TERM ecl_get_program_info(ErlNifEnv* env, int argc, \n\t\t\t\t\t const ERL_NIF_TERM argv[]);\nstatic ERL_NIF_TERM ecl_get_program_build_info(ErlNifEnv* env, int argc, \n\t\t\t\t\t       const ERL_NIF_TERM argv[]);\n\nstatic ERL_NIF_TERM ecl_create_kernel(ErlNifEnv* env, int argc, \n\t\t\t\t      const ERL_NIF_TERM argv[]);\nstatic ERL_NIF_TERM ecl_create_kernels_in_program(ErlNifEnv* env, int argc, \n\t\t\t\t\t\t  const ERL_NIF_TERM argv[]);\nstatic ERL_NIF_TERM ecl_set_kernel_arg(ErlNifEnv* env, int argc, \n\t\t\t\t       const ERL_NIF_TERM argv[]);\nstatic ERL_NIF_TERM ecl_set_kernel_arg_size(ErlNifEnv* env, int argc, \n\t\t\t\t\t    const ERL_NIF_TERM argv[]);\nstatic ERL_NIF_TERM ecl_get_kernel_info(ErlNifEnv* env, int argc, \n\t\t\t\t\tconst ERL_NIF_TERM argv[]);\nstatic ERL_NIF_TERM ecl_get_kernel_workgroup_info(ErlNifEnv* env, int argc, \n\t\t\t\t\t\t  const ERL_NIF_TERM argv[]);\n#if CL_VERSION_1_2 == 1\nstatic ERL_NIF_TERM ecl_get_kernel_arg_info(ErlNifEnv* env, int argc,\n\t\t\t\t\t    const ERL_NIF_TERM argv[]);\n#endif\n\nstatic ERL_NIF_TERM ecl_enqueue_task(ErlNifEnv* env, int argc, \n\t\t\t\t     const ERL_NIF_TERM argv[]);\nstatic ERL_NIF_TERM ecl_enqueue_nd_range_kernel(ErlNifEnv* env, int argc, \n\t\t\t\t\t\tconst ERL_NIF_TERM argv[]);\nstatic ERL_NIF_TERM ecl_enqueue_marker(ErlNifEnv* env, int argc, \n\t\t\t\t       const ERL_NIF_TERM argv[]);\n\nstatic ERL_NIF_TERM ecl_enqueue_barrier(ErlNifEnv* env, int argc, \n\t\t\t\t\tconst ERL_NIF_TERM argv[]);\n\n#if CL_VERSION_1_2 == 1\n\nstatic ERL_NIF_TERM ecl_enqueue_marker_with_wait_list(ErlNifEnv* env,\n\t\t\t\t\t\t      int argc,\n\t\t\t\t\t\t      const ERL_NIF_TERM argv[]);\nstatic ERL_NIF_TERM ecl_enqueue_barrier_with_wait_list(ErlNifEnv* env,\n\t\t\t\t\t\t       int argc,\n\t\t\t\t\t\t       const ERL_NIF_TERM argv[]);\n#endif\n\nstatic ERL_NIF_TERM ecl_enqueue_wait_for_events(ErlNifEnv* env, int argc, \n\t\t\t\t\t\tconst ERL_NIF_TERM argv[]);\n\nstatic ERL_NIF_TERM ecl_enqueue_read_buffer(ErlNifEnv* env, int argc, \n\t\t\t\t\t    const ERL_NIF_TERM argv[]);\n#if CL_VERSION_1_1 == 1\nstatic ERL_NIF_TERM ecl_enqueue_read_buffer_rect(ErlNifEnv* env, int argc,\n\t\t\t\t\t\t const ERL_NIF_TERM argv[]);\n#endif\n\nstatic ERL_NIF_TERM ecl_enqueue_write_buffer(ErlNifEnv* env, int argc, \n\t\t\t\t\t     const ERL_NIF_TERM argv[]);\n\n#if CL_VERSION_1_1 == 1\nstatic ERL_NIF_TERM ecl_enqueue_write_buffer_rect(ErlNifEnv* env, int argc,\n\t\t\t\t\t\t  const ERL_NIF_TERM argv[]);\n#endif\n\n#if CL_VERSION_1_2 == 1\nstatic ERL_NIF_TERM ecl_enqueue_fill_buffer(ErlNifEnv* env, int argc,\n\t\t\t\t\t    const ERL_NIF_TERM argv[]);\n#endif\n\nstatic ERL_NIF_TERM ecl_enqueue_read_image(ErlNifEnv* env, int argc, \n\t\t\t\t\t   const ERL_NIF_TERM argv[]);\n\nstatic ERL_NIF_TERM ecl_enqueue_write_image(ErlNifEnv* env, int argc, \n\t\t\t\t\t    const ERL_NIF_TERM argv[]);\n\nstatic ERL_NIF_TERM ecl_enqueue_copy_buffer(ErlNifEnv* env, int argc,\n\t\t\t\t\t    const ERL_NIF_TERM argv[]);\n\n#if CL_VERSION_1_1 == 1\nstatic ERL_NIF_TERM ecl_enqueue_copy_buffer_rect(ErlNifEnv* env, int argc,\n\t\t\t\t\t\t const ERL_NIF_TERM argv[]);\n#endif\n\nstatic ERL_NIF_TERM ecl_enqueue_copy_image(ErlNifEnv* env, int argc, \n\t\t\t\t\t   const ERL_NIF_TERM argv[]);\n#if CL_VERSION_1_2 == 1\nstatic ERL_NIF_TERM ecl_enqueue_fill_image(ErlNifEnv* env, int argc,\n\t\t\t\t\t   const ERL_NIF_TERM argv[]);\n#endif\n\n\nstatic ERL_NIF_TERM ecl_enqueue_copy_image_to_buffer(ErlNifEnv* env, int argc, \n\t\t\t\t\t\t     const ERL_NIF_TERM argv[]);\n\nstatic ERL_NIF_TERM ecl_enqueue_copy_buffer_to_image(ErlNifEnv* env, int argc, \n\t\t\t\t\t\t     const ERL_NIF_TERM argv[]);\n\nstatic ERL_NIF_TERM ecl_enqueue_map_buffer(ErlNifEnv* env, int argc, \n\t\t\t\t\t   const ERL_NIF_TERM argv[]);\n\nstatic ERL_NIF_TERM ecl_enqueue_map_image(ErlNifEnv* env, int argc, \n\t\t\t\t\t  const ERL_NIF_TERM argv[]);\n\nstatic ERL_NIF_TERM ecl_enqueue_unmap_mem_object(ErlNifEnv* env, int argc, \n\t\t\t\t\t\t const ERL_NIF_TERM argv[]);\n\n#if CL_VERSION_1_2 == 1\nstatic ERL_NIF_TERM ecl_enqueue_migrate_mem_objects(ErlNifEnv* env, int argc,\n\t\t\t\t\t\t    const ERL_NIF_TERM argv[]);\n#endif\n\nstatic ERL_NIF_TERM ecl_async_flush(ErlNifEnv* env, int argc, \n\t\t\t\t    const ERL_NIF_TERM argv[]);\n\nstatic ERL_NIF_TERM ecl_async_finish(ErlNifEnv* env, int argc, \n\t\t\t\t     const ERL_NIF_TERM argv[]);\n\n// speical version of clWaitForEvents \nstatic ERL_NIF_TERM ecl_async_wait_for_event(ErlNifEnv* env, int argc, \n\t\t\t\t\t     const ERL_NIF_TERM argv[]);\n\nstatic ERL_NIF_TERM ecl_get_event_info(ErlNifEnv* env, int argc, \n\t\t\t\t       const ERL_NIF_TERM argv[]);\n\nstatic ERL_NIF_TERM ecl_get_event_profiling_info(ErlNifEnv* env, int argc, \n\t\t\t\t\t\t const ERL_NIF_TERM argv[]);\n\n#if CL_VERSION_2_0 == 1\n\nstatic ERL_NIF_TERM ecl_create_pipe(ErlNifEnv* env, int argc,\n\t\t\t\t    const ERL_NIF_TERM argv[]);\n#endif\n\n\n// Dirty optional since 2.7 and mandatory since 2.12\n#if (ERL_NIF_MAJOR_VERSION > 2) || ((ERL_NIF_MAJOR_VERSION == 2) && (ERL_NIF_MINOR_VERSION >= 7))\n#ifdef USE_DIRTY_SCHEDULER\n#define NIF_FUNC(name,arity,fptr) {(name),(arity),(fptr),(ERL_NIF_DIRTY_JOB_CPU_BOUND)}\n#define NIF_DIRTY_FUNC(name,arity,fptr) {(name),(arity),(fptr),(ERL_NIF_DIRTY_JOB_CPU_BOUND)}\n#else\n#define NIF_FUNC(name,arity,fptr) {(name),(arity),(fptr),(0)}\n#define NIF_DIRTY_FUNC(name,arity,fptr) {(name),(arity),(fptr),(ERL_NIF_DIRTY_JOB_CPU_BOUND)}\n#endif\n#else\n#define NIF_FUNC(name,arity,fptr) {(name),(arity),(fptr)}\n#define NIF_DIRTY_FUNC(name,arity,fptr) {(name),(arity),(fptr)}\n#endif\n\nErlNifFunc ecl_funcs[] =\n{\n    NIF_FUNC( \"noop\",                        0, ecl_noop ),\n    NIF_FUNC( \"noop_\",                       0, ecl_noop_ ),\n    NIF_DIRTY_FUNC( \"dirty_noop\",            0, ecl_noop ),\n    NIF_FUNC( \"versions\",                    0, ecl_versions ),\n    \n    // Platform\n    NIF_FUNC( \"get_platform_ids\",           0, ecl_get_platform_ids ),\n    NIF_FUNC( \"get_platform_info\",          2, ecl_get_platform_info ),\n\n    // Devices\n    NIF_FUNC( \"get_device_ids\",             2, ecl_get_device_ids ),\n#if CL_VERSION_1_2 == 1\n    NIF_FUNC( \"create_sub_devices\",         2, ecl_create_sub_devices ),\n#endif\n    NIF_FUNC( \"get_device_info\",            2, ecl_get_device_info ),\n\n    // Context\n    NIF_FUNC( \"create_context\",             1, ecl_create_context ),\n    NIF_FUNC( \"get_context_info\",           2, ecl_get_context_info ),\n\n    // Command queue\n    NIF_FUNC( \"create_queue\",               3, ecl_create_queue ),\n    NIF_FUNC( \"get_queue_info\",             2, ecl_get_queue_info ),\n\n    // Memory object\n    NIF_FUNC( \"create_buffer\",              4, ecl_create_buffer ),\n#if CL_VERSION_1_1 == 1\n    NIF_FUNC( \"create_sub_buffer\",          4, ecl_create_sub_buffer ),\n#endif\n\n    NIF_FUNC( \"get_mem_object_info\",        2, ecl_get_mem_object_info ),\n    NIF_FUNC( \"get_image_info\",             2, ecl_get_image_info ),\n\n    NIF_FUNC( \"create_image2d\",            7, ecl_create_image2d ),\n    NIF_FUNC( \"create_image3d\",            9, ecl_create_image3d ),\n#if CL_VERSION_1_2 == 1\n    NIF_FUNC( \"create_image\",              5, ecl_create_image ),\n#endif\n    NIF_FUNC( \"get_supported_image_formats\",3, ecl_get_supported_image_formats ),\n\n    // Sampler \n    NIF_FUNC( \"create_sampler\",             4, ecl_create_sampler ),\n    NIF_FUNC( \"get_sampler_info\",           2, ecl_get_sampler_info ),\n\n    // Program\n    NIF_FUNC( \"create_program_with_source\", 2, ecl_create_program_with_source ),\n    NIF_FUNC( \"create_program_with_binary\", 3, ecl_create_program_with_binary ),\n#if CL_VERSION_1_2 == 1\n    NIF_FUNC( \"create_program_with_builtin_kernels\", 3, \n\t      ecl_create_program_with_builtin_kernels ),\n#endif    \n    NIF_FUNC( \"async_build_program\",        3, ecl_async_build_program ),\n#if CL_VERSION_1_2 == 1\n    NIF_FUNC( \"unload_platform_compiler\",   1, ecl_unload_platform_compiler ),\n#endif\n#if CL_VERSION_1_2 == 1\n    NIF_FUNC( \"async_compile_program\",      5,   ecl_async_compile_program ),\n#endif\n#if CL_VERSION_1_2 == 1\n    NIF_FUNC( \"async_link_program\",         4,   ecl_async_link_program ),\n#endif\n    NIF_FUNC( \"unload_compiler\",            0, ecl_unload_compiler ),\n    NIF_FUNC( \"get_program_info\",           2, ecl_get_program_info ),\n    NIF_FUNC( \"get_program_build_info\",     3, ecl_get_program_build_info ),\n\n    // Kernel\n    NIF_FUNC( \"create_kernel\",              2, ecl_create_kernel ),\n    NIF_FUNC( \"create_kernels_in_program\",  1, ecl_create_kernels_in_program ),\n    NIF_FUNC( \"set_kernel_arg\",             3, ecl_set_kernel_arg ),\n    NIF_FUNC( \"set_kernel_arg_size\",        3, ecl_set_kernel_arg_size ),\n    NIF_FUNC( \"get_kernel_info\",            2, ecl_get_kernel_info ),\n    NIF_FUNC( \"get_kernel_workgroup_info\",  3, ecl_get_kernel_workgroup_info ),\n#if CL_VERSION_1_2 == 1\n    NIF_FUNC( \"get_kernel_arg_info\",        3, ecl_get_kernel_arg_info ),\n#endif\n    // Events\n    NIF_FUNC( \"enqueue_task\",               4, ecl_enqueue_task ),\n    NIF_FUNC( \"enqueue_nd_range_kernel\",    6, ecl_enqueue_nd_range_kernel ),\n    NIF_FUNC( \"enqueue_marker\",             1, ecl_enqueue_marker ),\n    NIF_FUNC( \"enqueue_barrier\",            1, ecl_enqueue_barrier ),\n#if CL_VERSION_1_2 == 1\n    NIF_FUNC( \"enqueue_barrier_with_wait_list\", 2, ecl_enqueue_barrier_with_wait_list ),\n    NIF_FUNC( \"enqueue_marker_with_wait_list\",  2, ecl_enqueue_marker_with_wait_list ),\n#endif\n    NIF_FUNC( \"enqueue_wait_for_events\",    2, ecl_enqueue_wait_for_events ),\n    NIF_FUNC( \"enqueue_read_buffer\",        5, ecl_enqueue_read_buffer ),\n#if CL_VERSION_1_1 == 1\n    NIF_FUNC( \"enqueue_read_buffer_rect\",   10, ecl_enqueue_read_buffer_rect ),\n#endif\n    NIF_FUNC( \"enqueue_write_buffer\",       7, ecl_enqueue_write_buffer ),\n#if CL_VERSION_1_1 == 1\n    NIF_FUNC( \"enqueue_write_buffer_rect\",  11, ecl_enqueue_write_buffer_rect ),\n#endif\n#if CL_VERSION_1_2 == 1\n    NIF_FUNC( \"enqueue_fill_buffer\",         6, ecl_enqueue_fill_buffer ),\n#endif\n    NIF_FUNC( \"enqueue_read_image\",         7, ecl_enqueue_read_image ),\n    NIF_FUNC( \"enqueue_write_image\",        9, ecl_enqueue_write_image ),\n    NIF_FUNC( \"enqueue_copy_buffer\",        7, ecl_enqueue_copy_buffer ),\n#if CL_VERSION_1_1 == 1\n    NIF_FUNC( \"enqueue_copy_buffer_rect\",  11, ecl_enqueue_copy_buffer_rect ),\n#endif\n    NIF_FUNC( \"enqueue_copy_image\",         7, ecl_enqueue_copy_image ),\n#if CL_VERSION_1_2 == 1\n    NIF_FUNC( \"enqueue_fill_image\",         6, ecl_enqueue_fill_image ),\n#endif\n    NIF_FUNC( \"enqueue_copy_image_to_buffer\", 7, ecl_enqueue_copy_image_to_buffer ),\n    NIF_FUNC( \"enqueue_copy_buffer_to_image\", 7, ecl_enqueue_copy_buffer_to_image ),\n    NIF_FUNC( \"enqueue_map_buffer\",           6, ecl_enqueue_map_buffer ),\n    NIF_FUNC( \"enqueue_map_image\",            6, ecl_enqueue_map_image ),\n    NIF_FUNC( \"enqueue_unmap_mem_object\",     3, ecl_enqueue_unmap_mem_object ),\n#if CL_VERSION_1_2 == 1\n    NIF_FUNC( \"enqueue_migrate_mem_objects\",  4, ecl_enqueue_migrate_mem_objects ),\n#endif\n    NIF_FUNC( \"async_flush\",                  1, ecl_async_flush ),\n    NIF_FUNC( \"async_finish\",                 1, ecl_async_finish ),\n    NIF_FUNC( \"async_wait_for_event\",         1, ecl_async_wait_for_event ),\n    NIF_FUNC( \"get_event_info\",               2, ecl_get_event_info ),\n    NIF_FUNC( \"get_event_profiling_info\",     2, ecl_get_event_profiling_info ),\n\n#if CL_VERSION_2_0 == 1\n    NIF_FUNC( \"create_pipe\",                  4,  ecl_create_pipe ),\n#endif\n\n#if CL_VERSION_2_1 == 1\n    NIF_FUNC( \"create_program_with_il\",       2, ecl_create_program_with_il ),\n#endif\n\n};\n\nstatic ecl_resource_t platform_r;\nstatic ecl_resource_t device_r;\nstatic ecl_resource_t context_r;\nstatic ecl_resource_t command_queue_r;\nstatic ecl_resource_t mem_r;\nstatic ecl_resource_t sampler_r;\nstatic ecl_resource_t program_r;\nstatic ecl_resource_t kernel_r;\nstatic ecl_resource_t event_r;\n\n// General atoms\nDECL_ATOM(ok);\nDECL_ATOM(error);\nDECL_ATOM(unknown);\nDECL_ATOM(undefined);\nDECL_ATOM(true);\nDECL_ATOM(false);\n\n// async messages\nDECL_ATOM(cl_async);\nDECL_ATOM(cl_event);\n\n// Type names\nDECL_ATOM(platform_t);\nDECL_ATOM(device_t);\nDECL_ATOM(context_t);\nDECL_ATOM(command_queue_t);\nDECL_ATOM(mem_t);\nDECL_ATOM(sampler_t);\nDECL_ATOM(program_t);\nDECL_ATOM(kernel_t);\nDECL_ATOM(event_t);\n\n// 'cl' type names\nDECL_ATOM(char);\nDECL_ATOM(char2);\nDECL_ATOM(char4);\nDECL_ATOM(char8);\nDECL_ATOM(char16);\n\nDECL_ATOM(uchar);\nDECL_ATOM(uchar2);\nDECL_ATOM(uchar4);\nDECL_ATOM(uchar8);\nDECL_ATOM(uchar16);\n\nDECL_ATOM(short);\nDECL_ATOM(short2);\nDECL_ATOM(short4);\nDECL_ATOM(short8);\nDECL_ATOM(short16);\n\nDECL_ATOM(ushort);\nDECL_ATOM(ushort2);\nDECL_ATOM(ushort4);\nDECL_ATOM(ushort8);\nDECL_ATOM(ushort16);\n\nDECL_ATOM(int);\nDECL_ATOM(int2);\nDECL_ATOM(int4);\nDECL_ATOM(int8);\nDECL_ATOM(int16);\n\nDECL_ATOM(uint);\nDECL_ATOM(uint2);\nDECL_ATOM(uint4);\nDECL_ATOM(uint8);\nDECL_ATOM(uint16);\n\nDECL_ATOM(long);\nDECL_ATOM(long2);\nDECL_ATOM(long4);\nDECL_ATOM(long8);\nDECL_ATOM(long16);\n\nDECL_ATOM(ulong);\nDECL_ATOM(ulong2);\nDECL_ATOM(ulong4);\nDECL_ATOM(ulong8);\nDECL_ATOM(ulong16);\n\nDECL_ATOM(half);\n\nDECL_ATOM(float);\nDECL_ATOM(float2);\nDECL_ATOM(float4);\nDECL_ATOM(float8);\nDECL_ATOM(float16);\n\nDECL_ATOM(double);\nDECL_ATOM(double2);\nDECL_ATOM(double4);\nDECL_ATOM(double8);\nDECL_ATOM(double16);\n\n// records for image creation\nDECL_ATOM(cl_image_desc);\nDECL_ATOM(cl_image_format);\n\n// Platform info\n// DECL_ATOM(profile);\n// DECL_ATOM(version);\n// DECL_ATOM(name);\n// DECL_ATOM(vendor);\n// DECL_ATOM(extensions);\n\n// Context info\nDECL_ATOM(reference_count);\nDECL_ATOM(devices);\nDECL_ATOM(properties);\n\n// Queue info\nDECL_ATOM(context);\nDECL_ATOM(num_devices);\nDECL_ATOM(device);\n// DECL_ATOM(reference_count);\n// DECL_ATOM(properties);\n\n// Mem info\nDECL_ATOM(object_type);\nDECL_ATOM(flags);\nDECL_ATOM(size);\nDECL_ATOM(host_ptr);\nDECL_ATOM(map_count);\n// DECL_ATOM(reference_count); \n// DECL_ATOM(context);\n\n// Image info\nDECL_ATOM(format);\nDECL_ATOM(element_size);\nDECL_ATOM(row_pitch);\nDECL_ATOM(slice_pitch);\nDECL_ATOM(width);\nDECL_ATOM(height);\nDECL_ATOM(depth);\n\n// Sampler info\n// DECL_ATOM(reference_count);\n// DECL_ATOM(context);\nDECL_ATOM(normalized_coords);\nDECL_ATOM(addressing_mode);\nDECL_ATOM(filter_mode);\n\n// Program info\n// DECL_ATOM(reference_count);\n// DECL_ATOM(context);\nDECL_ATOM(num_decices);\n// DECL_ATOM(devices);\nDECL_ATOM(source); \nDECL_ATOM(binary_sizes);\nDECL_ATOM(binaries);\n\n// Build Info\nDECL_ATOM(status);\nDECL_ATOM(options);\nDECL_ATOM(log);\nDECL_ATOM(binary_type);\n\n// Kernel Info\nDECL_ATOM(function_name);\nDECL_ATOM(num_args);\n// DECL_ATOM(reference_count);\n// DECL_ATOM(context);\nDECL_ATOM(program);\n\n// Event Info\nDECL_ATOM(command_queue);\nDECL_ATOM(command_type);\n// DECL_ATOM(reference_count);\nDECL_ATOM(execution_status);\n\n// Event Profile Info\nDECL_ATOM(command_queued);\nDECL_ATOM(command_submit);\nDECL_ATOM(command_start);\nDECL_ATOM(command_end);\nDECL_ATOM(command_complete);\n\n// Workgroup info\nDECL_ATOM(work_group_size);\nDECL_ATOM(compile_work_group_size);\n// DECL_ATOM(local_mem_size);\nDECL_ATOM(preferred_work_group_size_multiple);\nDECL_ATOM(private_mem_size);\nDECL_ATOM(global_work_size);\n\n// Error codes\nDECL_ATOM(device_not_found);\nDECL_ATOM(device_not_available);\nDECL_ATOM(compiler_not_available);\nDECL_ATOM(mem_object_allocation_failure);\nDECL_ATOM(out_of_resources);\nDECL_ATOM(out_of_host_memory);\nDECL_ATOM(profiling_info_not_available);\nDECL_ATOM(mem_copy_overlap);\nDECL_ATOM(image_format_mismatch);\nDECL_ATOM(image_format_not_supported);\nDECL_ATOM(build_program_failure);\nDECL_ATOM(map_failure);\nDECL_ATOM(invalid_value);\nDECL_ATOM(invalid_device_type);\nDECL_ATOM(invalid_platform);\nDECL_ATOM(invalid_device);\nDECL_ATOM(invalid_context);\nDECL_ATOM(invalid_queue_properties);\nDECL_ATOM(invalid_command_queue);\nDECL_ATOM(invalid_host_ptr);\nDECL_ATOM(invalid_mem_object);\nDECL_ATOM(invalid_image_format_descriptor);\nDECL_ATOM(invalid_image_size);\nDECL_ATOM(invalid_sampler);\nDECL_ATOM(invalid_binary);\nDECL_ATOM(invalid_build_options);\nDECL_ATOM(invalid_program);\nDECL_ATOM(invalid_program_executable);\nDECL_ATOM(invalid_kernel_name);\nDECL_ATOM(invalid_kernel_definition);\nDECL_ATOM(invalid_kernel);\nDECL_ATOM(invalid_arg_index);\nDECL_ATOM(invalid_arg_value);\nDECL_ATOM(invalid_arg_size);\nDECL_ATOM(invalid_kernel_args);\nDECL_ATOM(invalid_work_dimension);\nDECL_ATOM(invalid_work_group_size);\nDECL_ATOM(invalid_work_item_size);\nDECL_ATOM(invalid_global_offset);\nDECL_ATOM(invalid_event_wait_list);\nDECL_ATOM(invalid_event);\nDECL_ATOM(invalid_operation);\nDECL_ATOM(invalid_gl_object);\nDECL_ATOM(invalid_buffer_size);\nDECL_ATOM(invalid_mip_level);\nDECL_ATOM(invalid_global_work_size);\nDECL_ATOM(device_partition_failed);\nDECL_ATOM(invalid_device_partition_count);\n\n// cl_device_type\nDECL_ATOM(all);\nDECL_ATOM(default);\nDECL_ATOM(cpu);\nDECL_ATOM(gpu);\nDECL_ATOM(accelerator);\nDECL_ATOM(custom);\n\n// fp_config\nDECL_ATOM(denorm);\nDECL_ATOM(inf_nan);\nDECL_ATOM(round_to_nearest);\nDECL_ATOM(round_to_zero);\nDECL_ATOM(round_to_inf);\nDECL_ATOM(fma);\nDECL_ATOM(soft_float);\nDECL_ATOM(correctly_rounded_divide_sqrt);\n\n// mem_cache_type\nDECL_ATOM(none);\nDECL_ATOM(read_only);\nDECL_ATOM(read_write);\n\n// local_mem_type\nDECL_ATOM(local);\nDECL_ATOM(global);\n\n// exec capability\nDECL_ATOM(kernel);\nDECL_ATOM(native_kernel);\n\n// command_queue_properties\nDECL_ATOM(out_of_order_exec_mode_enable);\nDECL_ATOM(profiling_enable);\n\n// mem_flags\n// DECL_ATOM(read_write);\nDECL_ATOM(write_only);\n// DECL_ATOM(read_only);\nDECL_ATOM(use_host_ptr);\nDECL_ATOM(alloc_host_ptr);\nDECL_ATOM(copy_host_ptr);\n\n// migration flags\nDECL_ATOM(host);\nDECL_ATOM(content_undefined);\n\n// mem_object_type\nDECL_ATOM(buffer);\nDECL_ATOM(image2d);\nDECL_ATOM(image3d);\n// version1.2\nDECL_ATOM(image2d_array);\nDECL_ATOM(image1d);\nDECL_ATOM(image1d_array);\nDECL_ATOM(image1d_buffer);\n// version2.0\nDECL_ATOM(pipe);\n\n// addressing_mode\n// DECL_ATOM(none);\nDECL_ATOM(clamp_to_edge);\nDECL_ATOM(clamp);\nDECL_ATOM(repeat);\n\n// filter_mode\nDECL_ATOM(nearest);\nDECL_ATOM(linear);\n\n// map_flags\nDECL_ATOM(read);\nDECL_ATOM(write);\n\n// build_status\nDECL_ATOM(success);\n// DECL_ATOM(none);\n// DECL_ATOM(error);\nDECL_ATOM(in_progress);\n\n// program_binary_type\n// DECL_ATOM(none);\nDECL_ATOM(compiled_object);\nDECL_ATOM(library);\nDECL_ATOM(executable);\n\n// command_type\nDECL_ATOM(ndrange_kernel);\nDECL_ATOM(task);\n// DECL_ATOM(native_kernel);\nDECL_ATOM(read_buffer);\nDECL_ATOM(write_buffer);\nDECL_ATOM(copy_buffer);\nDECL_ATOM(read_image);\nDECL_ATOM(write_image);\nDECL_ATOM(copy_image);\nDECL_ATOM(copy_image_to_buffer);\nDECL_ATOM(copy_buffer_to_image);\nDECL_ATOM(map_buffer);\nDECL_ATOM(map_image);\nDECL_ATOM(unmap_mem_object);\nDECL_ATOM(marker);\nDECL_ATOM(aquire_gl_objects);\nDECL_ATOM(release_gl_objects);\nDECL_ATOM(migreate_mem_objects);\nDECL_ATOM(fill_buffer);\nDECL_ATOM(fill_image);\n\n// execution_status\nDECL_ATOM(complete);\nDECL_ATOM(running);\nDECL_ATOM(submitted);\nDECL_ATOM(queued);\n\n// arguments\nDECL_ATOM(region);\n\n// DECL_ATOM(global);\n// DECL_ATOM(local);\nDECL_ATOM(constant);\nDECL_ATOM(private);\n\n// DECL_ATOM(read_only);\n// DECL_ATOM(write_only);\n// DECL_ATOM(read_write);\n// DECL_ATOM(none);\n\n// DECL_ATOM(none);\nDECL_ATOM(const);\nDECL_ATOM(restrict);\nDECL_ATOM(volatile);\n\nDECL_ATOM(address_qualifier);\nDECL_ATOM(access_qualifier);\nDECL_ATOM(type_name);\nDECL_ATOM(type_qualifier);\n// DECL_ATOM(name);\n\n#define SIZE_1   0x010000\n#define SIZE_2   0x020000\n#define SIZE_4   0x040000\n#define SIZE_8   0x080000\n#define SIZE_16  0x100000\n\necl_kv_t kv_cl_type[] = {\n    { &ATOM(char),     SIZE_1 + OCL_CHAR },\n    { &ATOM(char2),    SIZE_2 + OCL_CHAR },\n    { &ATOM(char4),    SIZE_4 + OCL_CHAR },\n    { &ATOM(char8),    SIZE_8 + OCL_CHAR },\n    { &ATOM(char16),   SIZE_16 + OCL_CHAR },\n    { &ATOM(uchar),    SIZE_1 + OCL_UCHAR },\n    { &ATOM(uchar2),   SIZE_2 + OCL_UCHAR },\n    { &ATOM(uchar4),   SIZE_4 + OCL_UCHAR },\n    { &ATOM(uchar8),   SIZE_8 + OCL_UCHAR },\n    { &ATOM(uchar16),  SIZE_16 + OCL_UCHAR },\n    { &ATOM(short),    SIZE_1 + OCL_SHORT },\n    { &ATOM(short2),   SIZE_2 + OCL_SHORT },\n    { &ATOM(short4),   SIZE_4 + OCL_SHORT },\n    { &ATOM(short8),   SIZE_8 + OCL_SHORT },\n    { &ATOM(short16),  SIZE_16 + OCL_SHORT },\n    { &ATOM(ushort),   SIZE_1 + OCL_USHORT },\n    { &ATOM(ushort2),  SIZE_2 + OCL_USHORT },\n    { &ATOM(ushort4),  SIZE_4 + OCL_USHORT },\n    { &ATOM(ushort8),  SIZE_8 + OCL_USHORT },\n    { &ATOM(ushort16), SIZE_16 + OCL_USHORT },\n    { &ATOM(int),      SIZE_1 + OCL_INT },\n    { &ATOM(int2),     SIZE_2 + OCL_INT },\n    { &ATOM(int4),     SIZE_4 + OCL_INT },\n    { &ATOM(int8),     SIZE_8 + OCL_INT },\n    { &ATOM(int16),    SIZE_16 + OCL_INT },\n    { &ATOM(uint),     SIZE_1 + OCL_UINT },\n    { &ATOM(uint2),    SIZE_2 + OCL_UINT },\n    { &ATOM(uint4),    SIZE_4 + OCL_UINT },\n    { &ATOM(uint8),    SIZE_8 + OCL_UINT },\n    { &ATOM(uint16),   SIZE_16 + OCL_UINT },\n    { &ATOM(long),     SIZE_1 + OCL_LONG },\n    { &ATOM(long2),    SIZE_2 + OCL_LONG },\n    { &ATOM(long4),    SIZE_4 + OCL_LONG },\n    { &ATOM(long8),    SIZE_8 + OCL_LONG },\n    { &ATOM(long16),   SIZE_16 + OCL_LONG },\n    { &ATOM(ulong),    SIZE_1 + OCL_ULONG },\n    { &ATOM(ulong2),   SIZE_2 + OCL_ULONG },\n    { &ATOM(ulong4),   SIZE_4 + OCL_ULONG },\n    { &ATOM(ulong8),   SIZE_8 + OCL_ULONG },\n    { &ATOM(ulong16),  SIZE_16 + OCL_ULONG },\n    { &ATOM(half),     SIZE_1 + OCL_HALF },\n    { &ATOM(float),    SIZE_1 + OCL_FLOAT },\n    { &ATOM(float2),   SIZE_2 + OCL_FLOAT },\n    { &ATOM(float4),   SIZE_4 + OCL_FLOAT },\n    { &ATOM(float8),   SIZE_8 + OCL_FLOAT },\n    { &ATOM(float16),  SIZE_16 + OCL_FLOAT },\n    { &ATOM(double),   SIZE_1 + OCL_DOUBLE },\n    { &ATOM(double2),  SIZE_2 + OCL_DOUBLE },\n    { &ATOM(double4),  SIZE_4 + OCL_DOUBLE },\n    { &ATOM(double8),  SIZE_8 + OCL_DOUBLE },\n    { &ATOM(double16), SIZE_16 + OCL_DOUBLE },\n    { 0, 0 }\n};\n\necl_kv_t kv_device_type[] = {  // bitfield\n    { &ATOM(cpu),         CL_DEVICE_TYPE_CPU },\n    { &ATOM(gpu),         CL_DEVICE_TYPE_GPU },\n    { &ATOM(accelerator), CL_DEVICE_TYPE_ACCELERATOR },\n    { &ATOM(default),     CL_DEVICE_TYPE_DEFAULT },\n    { &ATOM(all),         CL_DEVICE_TYPE_ALL },\n#if CL_VERSION_1_2 == 1\n    { &ATOM(custom),      CL_DEVICE_TYPE_CUSTOM },\n#endif\n    { 0, 0}\n};\n\necl_kv_t kv_fp_config[] = {  // bitfield\n    { &ATOM(denorm),      CL_FP_DENORM },\n    { &ATOM(inf_nan),     CL_FP_INF_NAN },\n    { &ATOM(round_to_nearest), CL_FP_ROUND_TO_NEAREST },\n    { &ATOM(round_to_zero), CL_FP_ROUND_TO_ZERO },\n    { &ATOM(round_to_inf), CL_FP_ROUND_TO_INF },\n    { &ATOM(fma), CL_FP_FMA },\n#if CL_VERSION_1_2 == 1\n    { &ATOM(soft_float), CL_FP_SOFT_FLOAT },\n    { &ATOM(correctly_rounded_divide_sqrt),CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT},\n#endif\n    { 0, 0 }\n};\n\necl_kv_t kv_mem_cache_type[] = {  // enum\n    { &ATOM(none), CL_NONE },\n    { &ATOM(read_only), CL_READ_ONLY_CACHE },\n    { &ATOM(read_write), CL_READ_WRITE_CACHE },\n    { 0, 0 }\n};\n\necl_kv_t kv_local_mem_type[] = {  // enum\n    { &ATOM(local), CL_LOCAL },\n    { &ATOM(global), CL_GLOBAL },\n    { 0, 0 }\n};\n\necl_kv_t kv_exec_capabilities[] = {  // bit field\n    { &ATOM(kernel), CL_EXEC_KERNEL },\n    { &ATOM(native_kernel), CL_EXEC_NATIVE_KERNEL },\n    { 0, 0 }\n};\n\n\necl_kv_t kv_command_queue_properties[] = { // bit field\n    { &ATOM(out_of_order_exec_mode_enable), CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE },\n    { &ATOM(profiling_enable), CL_QUEUE_PROFILING_ENABLE },\n    { 0, 0}\n};\n\necl_kv_t kv_mem_flags[] = { // bit field\n    { &ATOM(read_write), CL_MEM_READ_WRITE },\n    { &ATOM(write_only), CL_MEM_WRITE_ONLY },\n    { &ATOM(read_only),  CL_MEM_READ_ONLY },\n    { &ATOM(use_host_ptr), CL_MEM_USE_HOST_PTR },\n    { &ATOM(alloc_host_ptr), CL_MEM_ALLOC_HOST_PTR },\n    { &ATOM(copy_host_ptr), CL_MEM_COPY_HOST_PTR },\n    { 0, 0 }\n};\n\n#if CL_VERSION_1_2 == 1\necl_kv_t kv_migration_flags[] = { // bit field\n    { &ATOM(host), CL_MIGRATE_MEM_OBJECT_HOST },\n    { &ATOM(content_undefined), CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED},\n    { 0, 0 }\n};\n#endif\n\necl_kv_t kv_mem_object_type[] = { // enum\n    { &ATOM(buffer), CL_MEM_OBJECT_BUFFER },\n    { &ATOM(image2d), CL_MEM_OBJECT_IMAGE2D },\n    { &ATOM(image3d), CL_MEM_OBJECT_IMAGE3D },\n#if CL_VERSION_1_2 == 1\n    { &ATOM(image2d_array), CL_MEM_OBJECT_IMAGE2D_ARRAY },\n    { &ATOM(image1d), CL_MEM_OBJECT_IMAGE1D },\n    { &ATOM(image1d_array), CL_MEM_OBJECT_IMAGE1D_ARRAY },\n    { &ATOM(image1d_buffer), CL_MEM_OBJECT_IMAGE1D_BUFFER },\n#endif\n#if CL_VERSION_2_0 == 1\n    { &ATOM(pipe), CL_MEM_OBJECT_PIPE },\n#endif\n    { 0, 0 }\n};\n\necl_kv_t kv_addressing_mode[] = { // enum\n    { &ATOM(none), CL_ADDRESS_NONE },\n    { &ATOM(clamp_to_edge), CL_ADDRESS_CLAMP_TO_EDGE },\n    { &ATOM(clamp), CL_ADDRESS_CLAMP },\n    { &ATOM(repeat), CL_ADDRESS_REPEAT },\n    { 0, 0 }\n};\n\necl_kv_t kv_filter_mode[] = { // enum\n    { &ATOM(nearest), CL_FILTER_NEAREST },\n    { &ATOM(linear),  CL_FILTER_LINEAR },\n    { 0, 0 }\n};\n\necl_kv_t kv_map_flags[] = { // bitfield\n    { &ATOM(read), CL_MAP_READ },\n    { &ATOM(write), CL_MAP_WRITE },\n    { 0, 0 }\n};\n\necl_kv_t kv_build_status[] = { // enum\n    { &ATOM(success), CL_BUILD_SUCCESS },\n    { &ATOM(none), CL_BUILD_NONE },\n    { &ATOM(error), CL_BUILD_ERROR },\n    { &ATOM(in_progress), CL_BUILD_IN_PROGRESS },\n    { 0, 0 }\n};\n\n#if CL_VERSION_1_2 == 1\necl_kv_t kv_program_binary_type[] = { // enum\n    { &ATOM(none), CL_PROGRAM_BINARY_TYPE_NONE },\n    { &ATOM(compiled_object),  CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT },\n    { &ATOM(library), CL_PROGRAM_BINARY_TYPE_LIBRARY },\n    { &ATOM(executable), CL_PROGRAM_BINARY_TYPE_EXECUTABLE },\n    { 0, 0 }\n};\n#endif\n\necl_kv_t kv_command_type[] = { // enum\n    { &ATOM(ndrange_kernel), CL_COMMAND_NDRANGE_KERNEL },\n    { &ATOM(task),           CL_COMMAND_TASK },\n    { &ATOM(native_kernel),  CL_COMMAND_NATIVE_KERNEL },\n    { &ATOM(read_buffer),    CL_COMMAND_READ_BUFFER },\n    { &ATOM(write_buffer),   CL_COMMAND_WRITE_BUFFER },\n    { &ATOM(copy_buffer),    CL_COMMAND_COPY_BUFFER },\n    { &ATOM(read_image),     CL_COMMAND_READ_IMAGE },\n    { &ATOM(write_image),    CL_COMMAND_WRITE_IMAGE },\n    { &ATOM(copy_image),     CL_COMMAND_COPY_IMAGE },\n    { &ATOM(copy_image_to_buffer), CL_COMMAND_COPY_IMAGE_TO_BUFFER },\n    { &ATOM(copy_buffer_to_image), CL_COMMAND_COPY_BUFFER_TO_IMAGE },\n    { &ATOM(map_buffer), CL_COMMAND_MAP_BUFFER },\n    { &ATOM(map_image), CL_COMMAND_MAP_IMAGE },\n    { &ATOM(unmap_mem_object), CL_COMMAND_UNMAP_MEM_OBJECT },\n    { &ATOM(marker), CL_COMMAND_MARKER  },\n    { &ATOM(aquire_gl_objects), CL_COMMAND_ACQUIRE_GL_OBJECTS },\n    { &ATOM(release_gl_objects), CL_COMMAND_RELEASE_GL_OBJECTS },\n#if CL_VERSION_12 == 1\n    { &ATOM(migreate_mem_objects), CL_COMMAND_MIGRATE_MEM_OBJECTS },\n    { &ATOM(fill_buffer), CL_COMMAND_FILL_BUFFER },\n    { &ATOM(fill_image), CL_COMMAND_FILL_IMAGE },\n#endif\n    { 0, 0}\n};\n\necl_kv_t kv_execution_status[] = { // enum\n    { &ATOM(complete),   CL_COMPLETE   },   // same as CL_SUCCESS\n    { &ATOM(running),    CL_RUNNING    },\n    { &ATOM(submitted),  CL_SUBMITTED  },\n    { &ATOM(queued),     CL_QUEUED     },\n    // the error codes (negative values)\n    { &ATOM(device_not_found), CL_DEVICE_NOT_FOUND },\n    { &ATOM(device_not_available), CL_DEVICE_NOT_AVAILABLE },\n    { &ATOM(compiler_not_available), CL_COMPILER_NOT_AVAILABLE },\n    { &ATOM(mem_object_allocation_failure), CL_MEM_OBJECT_ALLOCATION_FAILURE },\n    { &ATOM(out_of_resources), CL_OUT_OF_RESOURCES },\n    { &ATOM(out_of_host_memory), CL_OUT_OF_HOST_MEMORY },\n    { &ATOM(profiling_info_not_available), CL_PROFILING_INFO_NOT_AVAILABLE },\n    { &ATOM(mem_copy_overlap), CL_MEM_COPY_OVERLAP },\n    { &ATOM(image_format_mismatch), CL_IMAGE_FORMAT_MISMATCH },\n    { &ATOM(image_format_not_supported), CL_IMAGE_FORMAT_NOT_SUPPORTED },\n    { &ATOM(build_program_failure), CL_BUILD_PROGRAM_FAILURE },\n    { &ATOM(map_failure), CL_MAP_FAILURE },\n    { &ATOM(invalid_value), CL_INVALID_VALUE },\n    { &ATOM(invalid_device_type), CL_INVALID_DEVICE_TYPE },\n    { &ATOM(invalid_platform), CL_INVALID_PLATFORM },\n    { &ATOM(invalid_device), CL_INVALID_DEVICE },\n    { &ATOM(invalid_context), CL_INVALID_CONTEXT },\n    { &ATOM(invalid_queue_properties), CL_INVALID_QUEUE_PROPERTIES },\n    { &ATOM(invalid_command_queue), CL_INVALID_COMMAND_QUEUE },\n    { &ATOM(invalid_host_ptr), CL_INVALID_HOST_PTR },\n    { &ATOM(invalid_mem_object), CL_INVALID_MEM_OBJECT },\n    { &ATOM(invalid_image_format_descriptor), CL_INVALID_IMAGE_FORMAT_DESCRIPTOR },\n    { &ATOM(invalid_image_size), CL_INVALID_IMAGE_SIZE },\n    { &ATOM(invalid_sampler), CL_INVALID_SAMPLER },\n    { &ATOM(invalid_binary), CL_INVALID_BINARY },\n    { &ATOM(invalid_build_options), CL_INVALID_BUILD_OPTIONS },\n    { &ATOM(invalid_program), CL_INVALID_PROGRAM },\n    { &ATOM(invalid_program_executable), CL_INVALID_PROGRAM_EXECUTABLE },\n    { &ATOM(invalid_kernel_name), CL_INVALID_KERNEL_NAME },\n    { &ATOM(invalid_kernel_definition), CL_INVALID_KERNEL_DEFINITION },\n    { &ATOM(invalid_kernel), CL_INVALID_KERNEL },\n    { &ATOM(invalid_arg_index), CL_INVALID_ARG_INDEX },\n    { &ATOM(invalid_arg_value), CL_INVALID_ARG_VALUE },\n    { &ATOM(invalid_arg_size), CL_INVALID_ARG_SIZE },\n    { &ATOM(invalid_kernel_args), CL_INVALID_KERNEL_ARGS },\n    { &ATOM(invalid_work_dimension), CL_INVALID_WORK_DIMENSION },\n    { &ATOM(invalid_work_group_size), CL_INVALID_WORK_GROUP_SIZE },\n    { &ATOM(invalid_work_item_size), CL_INVALID_WORK_ITEM_SIZE },\n    { &ATOM(invalid_global_offset), CL_INVALID_GLOBAL_OFFSET },\n    { &ATOM(invalid_event_wait_list), CL_INVALID_EVENT_WAIT_LIST },\n    { &ATOM(invalid_event), CL_INVALID_EVENT },\n    { &ATOM(invalid_operation), CL_INVALID_OPERATION },\n    { &ATOM(invalid_gl_object), CL_INVALID_GL_OBJECT },\n    { &ATOM(invalid_buffer_size), CL_INVALID_BUFFER_SIZE },\n    { &ATOM(invalid_mip_level), CL_INVALID_MIP_LEVEL },\n    { &ATOM(invalid_global_work_size), CL_INVALID_GLOBAL_WORK_SIZE },\n#ifdef CL_DEVICE_PARTITION_FAILED\n    { &ATOM(device_partition_failed), CL_DEVICE_PARTITION_FAILED },\n#endif\n#ifdef CL_INVALID_DEVICE_PARTITION_COUNT\n    { &ATOM(invalid_device_partition_count), CL_INVALID_DEVICE_PARTITION_COUNT },\n#endif\n    { 0, 0 }\n};\n\nDECL_ATOM(snorm_int8);\nDECL_ATOM(snorm_int16);\nDECL_ATOM(unorm_int8);\nDECL_ATOM(unorm_int16);\nDECL_ATOM(unorm_int24);\nDECL_ATOM(unorm_short_565);\nDECL_ATOM(unorm_short_555);\nDECL_ATOM(unorm_int_101010);\nDECL_ATOM(signed_int8);\nDECL_ATOM(signed_int16);\nDECL_ATOM(signed_int32);\nDECL_ATOM(unsigned_int8);\nDECL_ATOM(unsigned_int16);\nDECL_ATOM(unsigned_int32);\nDECL_ATOM(half_float);\n// DECL_ATOM(float);\n\necl_kv_t kv_channel_type[] = { // enum\n    { &ATOM(snorm_int8), CL_SNORM_INT8 },\n    { &ATOM(snorm_int16), CL_SNORM_INT16 },\n    { &ATOM(unorm_int8), CL_UNORM_INT8 },\n    { &ATOM(unorm_int16), CL_UNORM_INT16 },\n    { &ATOM(unorm_short_565), CL_UNORM_SHORT_565 },\n    { &ATOM(unorm_short_555), CL_UNORM_SHORT_555 },\n    { &ATOM(unorm_int_101010), CL_UNORM_INT_101010 },\n    { &ATOM(signed_int8), CL_SIGNED_INT8 },\n    { &ATOM(signed_int16), CL_SIGNED_INT16 },\n    { &ATOM(signed_int32), CL_SIGNED_INT32 },\n    { &ATOM(unsigned_int8), CL_UNSIGNED_INT8 },\n    { &ATOM(unsigned_int16), CL_UNSIGNED_INT16 },\n    { &ATOM(unsigned_int32), CL_UNSIGNED_INT32 },\n    { &ATOM(half_float), CL_HALF_FLOAT },\n    { &ATOM(float), CL_FLOAT },\n#if (CL_VERSION_1_2 == 1) && defined(CL_UNORM_INT24)\n    { &ATOM(unorm_int24), CL_UNORM_INT24 },\n#endif\n    { 0, 0 }\n};\n\n// channel order\nDECL_ATOM(r);\nDECL_ATOM(a);\nDECL_ATOM(rg);\nDECL_ATOM(ra);\nDECL_ATOM(rgb);\nDECL_ATOM(rgba);\nDECL_ATOM(bgra);\nDECL_ATOM(argb);\nDECL_ATOM(intensity);\nDECL_ATOM(luminance);\nDECL_ATOM(rx);\nDECL_ATOM(rgx);\nDECL_ATOM(rgbx);\n// DECL_ATOM(depth);\nDECL_ATOM(depth_stencil);\n\n// 1.1 features! in apple 1.0?\n#ifndef CL_Rx\n#define CL_Rx                                       0x10BA\n#endif \n\n#ifndef CL_RGx\n#define CL_RGx                                      0x10BB\n#endif\n\n#ifndef CL_RGBx\n#define CL_RGBx                                     0x10BC\n#endif\n\necl_kv_t kv_channel_order[] = {\n    { &ATOM(r), CL_R },\n    { &ATOM(a), CL_A },\n    { &ATOM(rg), CL_RG },\n    { &ATOM(ra), CL_RA },\n    { &ATOM(rgb), CL_RGB },\n    { &ATOM(rgba), CL_RGBA },\n    { &ATOM(bgra), CL_BGRA },\n    { &ATOM(argb), CL_ARGB },\n    { &ATOM(intensity), CL_INTENSITY },\n    { &ATOM(luminance), CL_LUMINANCE },\n    { &ATOM(rx), CL_Rx },\n    { &ATOM(rgx), CL_RGx },\n    { &ATOM(rgbx), CL_RGBx },\n#if CL_VERSION_1_2 == 1\n#if defined(CL_DEPTH)\n    { &ATOM(depth), CL_DEPTH },\n#endif\n#if defined(CL_DEPTH_STENCIL)\n    { &ATOM(depth_stencil), CL_DEPTH_STENCIL },\n#endif\n#endif\n    { 0, 0 }\n};\n\n// partition_property\nDECL_ATOM(equally);\nDECL_ATOM(by_counts);\nDECL_ATOM(by_counts_list_end);\nDECL_ATOM(by_affinity_domain);\n\n#if CL_VERSION_1_2 == 1\necl_kv_t kv_device_partition_property[] = {\n    { &ATOM(equally), CL_DEVICE_PARTITION_EQUALLY },\n    { &ATOM(by_counts), CL_DEVICE_PARTITION_BY_COUNTS },\n    { &ATOM(by_affinity_domain), CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN },\n    { &ATOM(undefined), 0 },\n    { 0, 0}\n};\n#endif\n\nDECL_ATOM(numa);\nDECL_ATOM(l4_cache);\nDECL_ATOM(l3_cache);\nDECL_ATOM(l2_cache);\nDECL_ATOM(l1_cache);\nDECL_ATOM(next_partitionable);\n\n#if CL_VERSION_1_2 == 1    \necl_kv_t kv_device_affinity_domain[] = { \n    { &ATOM(numa), CL_DEVICE_AFFINITY_DOMAIN_NUMA },\n    { &ATOM(l4_cache), CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE },\n    { &ATOM(l3_cache), CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE },\n    { &ATOM(l2_cache), CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE },\n    { &ATOM(l1_cache), CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE },\n    { &ATOM(next_partitionable), CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE },\n    { &ATOM(undefined), 0 },\n    { 0, 0}\n};\n#endif\n\n// Device info\nDECL_ATOM(type);\nDECL_ATOM(vendor_id);\nDECL_ATOM(max_compute_units);\nDECL_ATOM(max_work_item_dimensions);\nDECL_ATOM(max_work_group_size);\nDECL_ATOM(max_work_item_sizes);\nDECL_ATOM(preferred_vector_width_char);\nDECL_ATOM(preferred_vector_width_short);\nDECL_ATOM(preferred_vector_width_int);\nDECL_ATOM(preferred_vector_width_long);\nDECL_ATOM(preferred_vector_width_float);\nDECL_ATOM(preferred_vector_width_double);\nDECL_ATOM(max_clock_frequency);\nDECL_ATOM(address_bits);\nDECL_ATOM(max_read_image_args);\nDECL_ATOM(max_write_image_args);\nDECL_ATOM(max_read_write_image_args);\nDECL_ATOM(il_version);    \nDECL_ATOM(max_mem_alloc_size);\nDECL_ATOM(image2d_max_width);\nDECL_ATOM(image2d_max_height);\nDECL_ATOM(image3d_max_width);\nDECL_ATOM(image3d_max_height);\nDECL_ATOM(image3d_max_depth);\nDECL_ATOM(image_support);\nDECL_ATOM(max_parameter_size);\nDECL_ATOM(max_samplers);\nDECL_ATOM(mem_base_addr_align);\nDECL_ATOM(min_data_type_align_size);\nDECL_ATOM(single_fp_config);\nDECL_ATOM(global_mem_cache_type);\nDECL_ATOM(global_mem_cacheline_size);\nDECL_ATOM(global_mem_cache_size);\nDECL_ATOM(global_mem_size);\nDECL_ATOM(max_constant_buffer_size);\nDECL_ATOM(max_constant_args);\nDECL_ATOM(local_mem_type);\nDECL_ATOM(local_mem_size);\nDECL_ATOM(error_correction_support);\nDECL_ATOM(profiling_timer_resolution);\nDECL_ATOM(endian_little);\nDECL_ATOM(available);\nDECL_ATOM(compiler_available);\nDECL_ATOM(execution_capabilities);\nDECL_ATOM(queue_properties);\nDECL_ATOM(name);\nDECL_ATOM(vendor);\nDECL_ATOM(driver_version);\nDECL_ATOM(profile);\nDECL_ATOM(version);\nDECL_ATOM(extensions);\nDECL_ATOM(platform);\n\n// cl_khr_fp64 extension || CL_VERSION_1_2 == 1\nDECL_ATOM(double_fp_config);\n// cl_khr_fp16 extension || CL_VERSION_1_2 == 1\nDECL_ATOM(half_fp_config);\n// 1.2\nDECL_ATOM(preferred_vector_width_half);\nDECL_ATOM(host_unified_memory);\nDECL_ATOM(native_vector_width_char);\nDECL_ATOM(native_vector_width_short);\nDECL_ATOM(native_vector_width_int);\nDECL_ATOM(native_vector_width_long);\nDECL_ATOM(native_vector_width_float);\nDECL_ATOM(native_vector_width_double);\nDECL_ATOM(native_vector_width_half);\nDECL_ATOM(opencl_c_version);\nDECL_ATOM(linker_available);\nDECL_ATOM(built_in_kernels);\nDECL_ATOM(image_max_buffer_size);\nDECL_ATOM(image_max_array_size);\nDECL_ATOM(parent_device);\nDECL_ATOM(partition_max_sub_devices);\nDECL_ATOM(partition_properties);\nDECL_ATOM(partition_affinity_domain);\nDECL_ATOM(partition_type);\n// DECL_ATOM(reference_count);\nDECL_ATOM(preferred_interop_user_sync);\nDECL_ATOM(printf_buffer_size);\nDECL_ATOM(image_pitch_alignment);\nDECL_ATOM(image_base_address_alignment);\n// cl_nv_device_attribute_query extension\nDECL_ATOM(compute_capability_major_nv);\nDECL_ATOM(compute_capability_minor_nv);\nDECL_ATOM(registers_per_block_nv);\nDECL_ATOM(warp_size_nv);\nDECL_ATOM(gpu_overlap_nv);\nDECL_ATOM(kernel_exec_timeout_nv);\nDECL_ATOM(device_integrated_memory_nv);\n\n// Map device info index 0...N => cl_device_info x Data type\necl_info_t device_info[] = \n{\n    { &ATOM(type), CL_DEVICE_TYPE, false, OCL_DEVICE_TYPE, kv_device_type, 0 },\n    { &ATOM(vendor_id), CL_DEVICE_VENDOR_ID, false, OCL_UINT, 0, 0 },\n    { &ATOM(max_compute_units), CL_DEVICE_MAX_COMPUTE_UNITS, false, OCL_UINT, 0, 0 },\n    { &ATOM(max_work_item_dimensions), CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, false, OCL_UINT, 0, 0 },\n    { &ATOM(max_work_group_size), CL_DEVICE_MAX_WORK_GROUP_SIZE, false, OCL_SIZE, 0, 0 },\n    { &ATOM(max_work_item_sizes), CL_DEVICE_MAX_WORK_ITEM_SIZES, true, OCL_SIZE, 0, 0 },\n    { &ATOM(preferred_vector_width_char), CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, false, OCL_UINT, 0, 0 },\n    { &ATOM(preferred_vector_width_short), CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, false, OCL_UINT,  0, 0 },\n    { &ATOM(preferred_vector_width_int), CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, false, OCL_UINT, 0, 0 },\n    { &ATOM(preferred_vector_width_long), CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, false,OCL_UINT, 0, 0 },\n    { &ATOM(preferred_vector_width_float), CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, false, OCL_UINT, 0, 0 },\n    { &ATOM(preferred_vector_width_double), CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, false, OCL_UINT, 0, 0 },\n    { &ATOM(max_clock_frequency), CL_DEVICE_MAX_CLOCK_FREQUENCY, false, OCL_UINT, 0, 0 },\n    { &ATOM(address_bits), CL_DEVICE_ADDRESS_BITS, false, OCL_UINT, 0, 0 },\n    { &ATOM(max_read_image_args), CL_DEVICE_MAX_READ_IMAGE_ARGS, false, OCL_UINT, 0, 0 },\n    { &ATOM(max_write_image_args), CL_DEVICE_MAX_WRITE_IMAGE_ARGS, false, OCL_UINT, 0, 0 },\n    { &ATOM(max_mem_alloc_size), CL_DEVICE_MAX_MEM_ALLOC_SIZE, false, OCL_ULONG, 0, 0 },\n    { &ATOM(image2d_max_width), CL_DEVICE_IMAGE2D_MAX_WIDTH, false, OCL_SIZE, 0, 0 },\n    { &ATOM(image2d_max_height), CL_DEVICE_IMAGE2D_MAX_HEIGHT, false, OCL_SIZE, 0, 0 },\n    { &ATOM(image3d_max_width), CL_DEVICE_IMAGE3D_MAX_WIDTH, false, OCL_SIZE, 0, 0 },\n    { &ATOM(image3d_max_height), CL_DEVICE_IMAGE3D_MAX_HEIGHT, false, OCL_SIZE, 0, 0 },\n    { &ATOM(image3d_max_depth), CL_DEVICE_IMAGE3D_MAX_DEPTH, false, OCL_SIZE, 0, 0 },\n    { &ATOM(image_support), CL_DEVICE_IMAGE_SUPPORT, false, OCL_BOOL, 0, 0 },\n    { &ATOM(max_parameter_size), CL_DEVICE_MAX_PARAMETER_SIZE, false, OCL_SIZE, 0, 0 },\n    { &ATOM(max_samplers), CL_DEVICE_MAX_SAMPLERS, false, OCL_UINT, 0, 0 },\n    { &ATOM(mem_base_addr_align), CL_DEVICE_MEM_BASE_ADDR_ALIGN, false, OCL_UINT, 0, 0 },\n    { &ATOM(min_data_type_align_size), CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, false, OCL_UINT, 0, 0 },\n    { &ATOM(single_fp_config), CL_DEVICE_SINGLE_FP_CONFIG, false, OCL_DEVICE_FP_CONFIG, kv_fp_config, 0 },\n    { &ATOM(global_mem_cache_type), CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, false, OCL_DEVICE_GLOBAL_MEM_CACHE_TYPE, kv_mem_cache_type, 0 },\n    { &ATOM(global_mem_cacheline_size), CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, false, OCL_UINT, 0, 0 },\n    { &ATOM(global_mem_cache_size), CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, false, OCL_ULONG, 0, 0 },\n    { &ATOM(global_mem_size), CL_DEVICE_GLOBAL_MEM_SIZE, false, OCL_ULONG, 0, 0 },\n    { &ATOM(max_constant_buffer_size), CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE,  false, OCL_ULONG, 0, 0 },\n    { &ATOM(max_constant_args), CL_DEVICE_MAX_CONSTANT_ARGS, false, OCL_UINT, 0, 0 },\n    { &ATOM(local_mem_type), CL_DEVICE_LOCAL_MEM_TYPE, false, OCL_DEVICE_LOCAL_MEM_TYPE, kv_local_mem_type, 0 },\n    { &ATOM(local_mem_size), CL_DEVICE_LOCAL_MEM_SIZE,  false, OCL_ULONG, 0, 0 },\n    { &ATOM(error_correction_support), CL_DEVICE_ERROR_CORRECTION_SUPPORT, false,  OCL_BOOL, 0, 0 },\n    { &ATOM(profiling_timer_resolution), CL_DEVICE_PROFILING_TIMER_RESOLUTION, false,  OCL_SIZE, 0, 0 },\n    { &ATOM(endian_little), CL_DEVICE_ENDIAN_LITTLE, false, OCL_BOOL, 0, 0 },\n    { &ATOM(available), CL_DEVICE_AVAILABLE,  false, OCL_BOOL, 0, 0 },\n    { &ATOM(compiler_available), CL_DEVICE_COMPILER_AVAILABLE, false, OCL_BOOL, 0, 0 },\n    { &ATOM(execution_capabilities), CL_DEVICE_EXECUTION_CAPABILITIES, false, OCL_DEVICE_EXEC_CAPABILITIES, kv_exec_capabilities, 0 },\n    { &ATOM(queue_properties), CL_DEVICE_QUEUE_PROPERTIES, false, OCL_QUEUE_PROPERTIES, kv_command_queue_properties, 0 },\n    { &ATOM(name), CL_DEVICE_NAME, false, OCL_STRING, 0, 0 },\n    { &ATOM(vendor), CL_DEVICE_VENDOR, false, OCL_STRING, 0, 0 },\n    { &ATOM(driver_version), CL_DRIVER_VERSION, false, OCL_STRING, 0, 0 },\n    { &ATOM(profile), CL_DEVICE_PROFILE, false, OCL_STRING, 0, 0 },\n    { &ATOM(version), CL_DEVICE_VERSION, false, OCL_STRING, 0, 0 },\n    { &ATOM(extensions), CL_DEVICE_EXTENSIONS, false, OCL_STRING, 0, 0 },\n    { &ATOM(platform), CL_DEVICE_PLATFORM, false, OCL_PLATFORM, 0, 0 },\n#if CL_VERSION_1_1 == 1\n    { &ATOM(preferred_vector_width_half), CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF,false, OCL_UINT, 0, 0 },\n    { &ATOM(host_unified_memory), CL_DEVICE_HOST_UNIFIED_MEMORY,false,OCL_BOOL,0, 0},\n    { &ATOM(native_vector_width_char), CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR,false,OCL_UINT, 0, 0},\n    { &ATOM(native_vector_width_short), CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT,false,OCL_UINT, 0, 0},\n    { &ATOM(native_vector_width_int), CL_DEVICE_NATIVE_VECTOR_WIDTH_INT,false,OCL_UINT, 0, 0},\n    { &ATOM(native_vector_width_long), CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG,false,OCL_UINT, 0, 0},\n    { &ATOM(native_vector_width_float), CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT,false,OCL_UINT, 0, 0},\n    { &ATOM(native_vector_width_double), CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE,false,OCL_UINT, 0, 0},\n    { &ATOM(native_vector_width_half), CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF,false,OCL_UINT, 0, 0},\n    { &ATOM(opencl_c_version), CL_DEVICE_OPENCL_C_VERSION,false,OCL_STRING, 0, 0},\n#endif\n    // cl_khr_fp64 extension || CL_VERSION_1_2 == 1\n#if CL_DEVICE_DOUBLE_FP_CONFIG\n    { &ATOM(double_fp_config), CL_DEVICE_DOUBLE_FP_CONFIG, false, OCL_DEVICE_FP_CONFIG, kv_fp_config, 0 },\n#endif\n    // cl_khr_fp16 extension || CL_VERSION_1_2 == 1\n#if CL_DEVICE_HALF_FP_CONFIG\n    { &ATOM(half_fp_config), CL_DEVICE_HALF_FP_CONFIG, false, OCL_DEVICE_FP_CONFIG, kv_fp_config, 0 },\n#endif\n#if CL_VERSION_1_2 == 1\n    { &ATOM(linker_available), CL_DEVICE_LINKER_AVAILABLE,false,OCL_BOOL, 0, 0},\n    { &ATOM(built_in_kernels), CL_DEVICE_BUILT_IN_KERNELS,false, OCL_STRING, 0, 0},\n    { &ATOM(image_max_buffer_size), CL_DEVICE_IMAGE_MAX_BUFFER_SIZE,false,OCL_SIZE, 0, 0},\n    { &ATOM(image_max_array_size), CL_DEVICE_IMAGE_MAX_ARRAY_SIZE,false,OCL_SIZE, 0, 0},\n    { &ATOM(parent_device), CL_DEVICE_PARENT_DEVICE,false,OCL_DEVICE, 0, 0},\n    { &ATOM(partition_max_sub_devices), CL_DEVICE_PARTITION_MAX_SUB_DEVICES,false,OCL_SIZE, 0, 0},\n    { &ATOM(partition_properties), CL_DEVICE_PARTITION_PROPERTIES,true,\n      OCL_ENUM, kv_device_partition_property, 0},\n\n    { &ATOM(partition_affinity_domain), CL_DEVICE_PARTITION_AFFINITY_DOMAIN,false,OCL_ENUM, kv_device_affinity_domain, 0 },\n\n    { &ATOM(partition_type), CL_DEVICE_PARTITION_TYPE, false, OCL_DEVICE_PARTITION, 0, 0},\n    { &ATOM(reference_count), CL_DEVICE_REFERENCE_COUNT, false, OCL_UINT, 0, 0 },\n    { &ATOM(preferred_interop_user_sync), CL_DEVICE_PREFERRED_INTEROP_USER_SYNC,false, OCL_BOOL, 0, 0},\n    { &ATOM(printf_buffer_size), CL_DEVICE_PRINTF_BUFFER_SIZE,false, OCL_SIZE, 0, 0 },\n#ifdef CL_DEVICE_IMAGE_PITCH_ALIGNMENT\n    { &ATOM(image_pitch_alignment), CL_DEVICE_IMAGE_PITCH_ALIGNMENT, false, OCL_SIZE, 0, 0 },\n#endif\n#ifdef CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT\n    { &ATOM(image_base_address_alignment), CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, false, OCL_SIZE, 0, 0 },\n#endif\n#endif\n\n    // cl_nv_device_attribute_query extension\n#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV\n    { &ATOM(compute_capability_major_nv), CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, false, OCL_UINT, 0, 0},\n#endif\n#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV\n    { &ATOM(compute_capability_minor_nv), CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, false, OCL_UINT, 0, 0},\n#endif\n#ifdef CL_DEVICE_REGISTERS_PER_BLOCK_NV\n    { &ATOM(registers_per_block_nv),CL_DEVICE_REGISTERS_PER_BLOCK_NV, false, OCL_UINT, 0, 0},\n#endif\n#ifdef CL_DEVICE_WARP_SIZE_NV\n    { &ATOM(warp_size_nv),CL_DEVICE_WARP_SIZE_NV, false, OCL_UINT, 0, 0},\n#endif\n#ifdef CL_DEVICE_GPU_OVERLAP_NV\n    { &ATOM(gpu_overlap_nv),CL_DEVICE_GPU_OVERLAP_NV, false, OCL_BOOL, 0, 0},\n#endif\n#ifdef CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV\n    { &ATOM(kernel_exec_timeout_nv), CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, false, OCL_BOOL, 0, 0},\n#endif\n#ifdef CL_DEVICE_INTEGRATED_MEMORY_NV\n    { &ATOM(device_integrated_memory_nv),CL_DEVICE_INTEGRATED_MEMORY_NV, false, OCL_BOOL, 0, 0},\n#endif\n\n#ifdef CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS\n    { &ATOM(max_read_write_image_args), CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, false, OCL_UINT, 0, 0 },\n#endif\n\n#ifdef CL_DEVICE_IL_VERSION\n    { &ATOM(il_version), CL_DEVICE_IL_VERSION, false, OCL_STRING, 0, 0 },\n#endif\n\n};\n\n// Map device info index 0...N => cl_device_info x Data type\necl_info_t platform_info[] = \n{\n    { &ATOM(profile), CL_PLATFORM_PROFILE, false, OCL_STRING, 0, 0 },\n    { &ATOM(version), CL_PLATFORM_VERSION, false, OCL_STRING, 0, 0 },\n    { &ATOM(name),    CL_PLATFORM_NAME,    false, OCL_STRING, 0, 0 },\n    { &ATOM(vendor),  CL_PLATFORM_VENDOR,  false, OCL_STRING, 0, 0 },\n    { &ATOM(extensions), CL_PLATFORM_EXTENSIONS, false, OCL_STRING, 0, 0 }\n};\n\necl_info_t context_info[] =\n{\n    { &ATOM(reference_count), CL_CONTEXT_REFERENCE_COUNT, false, OCL_UINT, 0, 0 },\n    { &ATOM(devices), CL_CONTEXT_DEVICES, true, OCL_DEVICE, 0, 0 },\n    { &ATOM(properties), CL_CONTEXT_PROPERTIES, true, OCL_INT, 0, 0 }\n};\n\necl_info_t queue_info[] = \n{\n    { &ATOM(context), CL_QUEUE_CONTEXT, false, OCL_CONTEXT, 0, 0 },\n    { &ATOM(device),  CL_QUEUE_DEVICE, false, OCL_DEVICE, 0, 0 },\n    { &ATOM(reference_count), CL_QUEUE_REFERENCE_COUNT, false, OCL_UINT, 0, 0 },\n    { &ATOM(properties), CL_QUEUE_PROPERTIES, false, OCL_QUEUE_PROPERTIES, kv_command_queue_properties, 0 }\n};\n\necl_info_t mem_info[] =\n{\n    { &ATOM(object_type), CL_MEM_TYPE, false, OCL_MEM_OBJECT_TYPE, kv_mem_object_type, 0 },\n    { &ATOM(flags), CL_MEM_FLAGS, false, OCL_MEM_FLAGS, kv_mem_flags, 0 },\n    { &ATOM(size),  CL_MEM_SIZE,  false, OCL_SIZE, 0, 0 },\n    // FIXME: pointer!! map it (binary resource?)\n    { &ATOM(host_ptr), CL_MEM_HOST_PTR, false, OCL_POINTER, 0, 0 }, \n    { &ATOM(map_count), CL_MEM_MAP_COUNT, false, OCL_UINT, 0, 0 },\n    { &ATOM(reference_count), CL_MEM_REFERENCE_COUNT, false, OCL_UINT, 0, 0 },\n    { &ATOM(context), CL_MEM_CONTEXT, false, OCL_CONTEXT, 0, 0 }\n};\n\necl_info_t image_info[] =\n{\n    { &ATOM(format), CL_IMAGE_FORMAT, false, OCL_IMAGE_FORMAT, 0, 0 },\n    { &ATOM(element_size), CL_IMAGE_ELEMENT_SIZE, false, OCL_SIZE, 0, 0 },\n    { &ATOM(row_pitch),  CL_IMAGE_ROW_PITCH,  false, OCL_SIZE, 0, 0 },\n    { &ATOM(slice_pitch), CL_IMAGE_SLICE_PITCH, false, OCL_SIZE, 0, 0 },\n    { &ATOM(width), CL_IMAGE_WIDTH, false, OCL_SIZE, 0, 0 },\n    { &ATOM(height), CL_IMAGE_HEIGHT, false, OCL_SIZE, 0, 0 },\n    { &ATOM(depth), CL_IMAGE_DEPTH, false, OCL_SIZE, 0, 0 }\n};\n\necl_info_t sampler_info[] = \n{\n    { &ATOM(reference_count), CL_SAMPLER_REFERENCE_COUNT, false, OCL_UINT, 0, 0},\n    { &ATOM(context), CL_SAMPLER_CONTEXT, false,  OCL_CONTEXT, 0, 0 },\n    { &ATOM(normalized_coords), CL_SAMPLER_NORMALIZED_COORDS, false, OCL_BOOL, 0, 0 },\n    {  &ATOM(addressing_mode), CL_SAMPLER_ADDRESSING_MODE, false, OCL_SAMPLER_ADDRESSING_MODE, kv_addressing_mode, 0 },\n    { &ATOM(filter_mode), CL_SAMPLER_FILTER_MODE, false, OCL_SAMPLER_FILTER_MODE, kv_filter_mode, 0 }\n};\n\necl_info_t program_info[] = {\n    { &ATOM(reference_count), CL_PROGRAM_REFERENCE_COUNT, false, OCL_UINT, 0, 0 },\n    { &ATOM(context), CL_PROGRAM_CONTEXT, false, OCL_CONTEXT, 0, 0 },\n    { &ATOM(num_devices), CL_PROGRAM_NUM_DEVICES, false, OCL_UINT, 0, 0 },\n    { &ATOM(devices), CL_PROGRAM_DEVICES, true, OCL_DEVICE, 0, 0 },\n    { &ATOM(source), CL_PROGRAM_SOURCE, false, OCL_STRING, 0, 0 },\n    { &ATOM(binary_sizes), CL_PROGRAM_BINARY_SIZES, true, OCL_SIZE, 0, 0 },\n    { &ATOM(binaries), CL_PROGRAM_BINARIES, true, OCL_STRING, 0, 0 }\n};\n\necl_info_t build_info[] = {\n    { &ATOM(status), CL_PROGRAM_BUILD_STATUS, false, OCL_BUILD_STATUS, kv_build_status, 0 },\n    { &ATOM(options), CL_PROGRAM_BUILD_OPTIONS, false, OCL_STRING, 0, 0 },\n    { &ATOM(log), CL_PROGRAM_BUILD_LOG, false, OCL_STRING, 0, 0 },\n#if CL_VERSION_1_2 == 1\n    { &ATOM(binary_type), CL_PROGRAM_BINARY_TYPE, false, OCL_PROGRAM_BINARY_TYPE, kv_program_binary_type, 0 },\n#endif\n};\n\necl_info_t kernel_info[] = {\n    { &ATOM(function_name), CL_KERNEL_FUNCTION_NAME, false, OCL_STRING, 0, 0 },\n    { &ATOM(num_args), CL_KERNEL_NUM_ARGS, false, OCL_UINT, 0, 0 },\n    { &ATOM(reference_count), CL_KERNEL_REFERENCE_COUNT, false, OCL_UINT, 0, 0 },\n    { &ATOM(context), CL_KERNEL_CONTEXT, false, OCL_CONTEXT, 0, 0 },\n    { &ATOM(program), CL_KERNEL_PROGRAM, false, OCL_PROGRAM, 0, 0 }\n};\n\necl_info_t workgroup_info[] = {\n    { &ATOM(work_group_size), CL_KERNEL_WORK_GROUP_SIZE, false, OCL_SIZE, 0, sizeof(size_t)},\n    { &ATOM(compile_work_group_size), CL_KERNEL_COMPILE_WORK_GROUP_SIZE, true, OCL_SIZE, 0, sizeof(size_t[3])},\n    { &ATOM(local_mem_size), CL_KERNEL_LOCAL_MEM_SIZE, false, OCL_ULONG, 0, sizeof(cl_ulong)},\n#if CL_VERSION_1_1 == 1\n    { &ATOM(preferred_work_group_size_multiple), CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, false,  OCL_SIZE, 0, sizeof(size_t) },\n    { &ATOM(private_mem_size), CL_KERNEL_PRIVATE_MEM_SIZE, false, OCL_ULONG, 0, sizeof(cl_ulong)},\n#endif\n#if CL_VERSION_1_2 == 1\n    { &ATOM(global_work_size), CL_KERNEL_GLOBAL_WORK_SIZE, true, OCL_SIZE, 0, sizeof(size_t[3])},\n#endif\n};\n\necl_info_t event_info[] = {\n    { &ATOM(command_queue),  CL_EVENT_COMMAND_QUEUE, false, OCL_COMMAND_QUEUE, 0, 0 },\n    { &ATOM(command_type),   CL_EVENT_COMMAND_TYPE, false,  OCL_ENUM, kv_command_type, 0 },\n    { &ATOM(reference_count), CL_EVENT_REFERENCE_COUNT, false, OCL_UINT, 0, 0 },\n    { &ATOM(execution_status), CL_EVENT_COMMAND_EXECUTION_STATUS, false, OCL_ENUM, kv_execution_status, 0 }\n};\n\necl_info_t event_profile_info[] = {\n    { &ATOM(command_queued),  CL_PROFILING_COMMAND_QUEUED, false, OCL_ULONG, 0, 0 },\n    { &ATOM(command_submit),  CL_PROFILING_COMMAND_SUBMIT, false, OCL_ULONG, 0, 0 },\n    { &ATOM(command_start),   CL_PROFILING_COMMAND_START, false, OCL_ULONG, 0, 0 },\n    { &ATOM(command_end),     CL_PROFILING_COMMAND_END, false, OCL_ULONG, 0, 0 },\n#if CL_VERSION_2_0 == 1\n    { &ATOM(command_complete), CL_PROFILING_COMMAND_COMPLETE, false, OCL_ULONG, 0, 0 },\n#endif\n};\n\n// clGetKernelArgInfo 1.2\n#if CL_VERSION_1_2 == 1\n\necl_kv_t kv_address_qualifier[] = {\n    { &ATOM(global), CL_KERNEL_ARG_ADDRESS_GLOBAL },\n    { &ATOM(local),  CL_KERNEL_ARG_ADDRESS_LOCAL },\n    { &ATOM(constant), CL_KERNEL_ARG_ADDRESS_CONSTANT },\n    { &ATOM(private), CL_KERNEL_ARG_ADDRESS_PRIVATE },\n    { 0, 0 }\n};\n\necl_kv_t kv_access_qualifier[] = {\n    { &ATOM(read_only), CL_KERNEL_ARG_ACCESS_READ_ONLY },\n    { &ATOM(write_only), CL_KERNEL_ARG_ACCESS_WRITE_ONLY },\n    { &ATOM(read_write), CL_KERNEL_ARG_ACCESS_READ_WRITE },\n    { &ATOM(none), CL_KERNEL_ARG_ACCESS_NONE },\n    { 0, 0 }\n};\n\necl_kv_t kv_type_qualifier[] = {\n    { &ATOM(none), CL_KERNEL_ARG_TYPE_NONE },\n    { &ATOM(const), CL_KERNEL_ARG_TYPE_CONST },\n    { &ATOM(restrict), CL_KERNEL_ARG_TYPE_RESTRICT },\n    { &ATOM(volatile), CL_KERNEL_ARG_TYPE_VOLATILE },\n    { 0, 0 }\n};\n\necl_info_t arg_info[] = {\n    { &ATOM(address_qualifier), CL_KERNEL_ARG_ADDRESS_QUALIFIER, false, OCL_ENUM, kv_address_qualifier, 0 },\n    { &ATOM(access_qualifier), CL_KERNEL_ARG_ACCESS_QUALIFIER, false, OCL_ENUM, kv_access_qualifier, 0 },\n    { &ATOM(type_name), CL_KERNEL_ARG_TYPE_NAME, false, OCL_STRING, 0, 0 },\n    { &ATOM(type_qualifier), CL_KERNEL_ARG_TYPE_QUALIFIER, false, OCL_ENUM, kv_type_qualifier, 0 },\n    { &ATOM(name),  CL_KERNEL_ARG_NAME, false, OCL_STRING, 0, 0 },\n};\n#endif\n\n// Error reasons\nERL_NIF_TERM ecl_error(cl_int err)\n{\n    switch(err) {\n    case CL_DEVICE_NOT_FOUND: \n\treturn ATOM(device_not_found);\n    case CL_DEVICE_NOT_AVAILABLE: \n\treturn ATOM(device_not_available);\n    case CL_COMPILER_NOT_AVAILABLE: \n\treturn ATOM(compiler_not_available);\n    case CL_MEM_OBJECT_ALLOCATION_FAILURE: \n\treturn ATOM(mem_object_allocation_failure);\n    case CL_OUT_OF_RESOURCES: \n\treturn ATOM(out_of_resources);\n    case CL_OUT_OF_HOST_MEMORY: \n\treturn ATOM(out_of_host_memory);\n    case CL_PROFILING_INFO_NOT_AVAILABLE: \n\treturn ATOM(profiling_info_not_available);\n    case CL_MEM_COPY_OVERLAP: \n\treturn ATOM(mem_copy_overlap);\n    case CL_IMAGE_FORMAT_MISMATCH:\n\treturn ATOM(image_format_mismatch);\n    case CL_IMAGE_FORMAT_NOT_SUPPORTED:\n\treturn ATOM(image_format_not_supported);\n    case CL_BUILD_PROGRAM_FAILURE: \n\treturn ATOM(build_program_failure);\n    case CL_MAP_FAILURE: \n\treturn ATOM(map_failure);\n    case CL_INVALID_VALUE: \n\treturn ATOM(invalid_value);\n    case CL_INVALID_DEVICE_TYPE: \n\treturn ATOM(invalid_device_type);\n    case CL_INVALID_PLATFORM: \n\treturn ATOM(invalid_platform);\n    case CL_INVALID_DEVICE: \n\treturn ATOM(invalid_device);\n    case CL_INVALID_CONTEXT: \n\treturn ATOM(invalid_context);\n    case CL_INVALID_QUEUE_PROPERTIES: \n\treturn ATOM(invalid_queue_properties);\n    case CL_INVALID_COMMAND_QUEUE: \n\treturn ATOM(invalid_command_queue);\n    case CL_INVALID_HOST_PTR: \n\treturn ATOM(invalid_host_ptr);\n    case CL_INVALID_MEM_OBJECT: \n\treturn ATOM(invalid_mem_object);\n    case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: \n\treturn ATOM(invalid_image_format_descriptor);\n    case CL_INVALID_IMAGE_SIZE: \n\treturn ATOM(invalid_image_size);\n    case CL_INVALID_SAMPLER: \n\treturn ATOM(invalid_sampler);\n    case CL_INVALID_BINARY: \n\treturn ATOM(invalid_binary);\n    case CL_INVALID_BUILD_OPTIONS: \n\treturn ATOM(invalid_build_options);\n    case CL_INVALID_PROGRAM: \n\treturn ATOM(invalid_program);\n    case CL_INVALID_PROGRAM_EXECUTABLE: \n\treturn ATOM(invalid_program_executable);\n    case CL_INVALID_KERNEL_NAME: \n\treturn ATOM(invalid_kernel_name);\n    case CL_INVALID_KERNEL_DEFINITION: \n\treturn ATOM(invalid_kernel_definition);\n    case CL_INVALID_KERNEL: \n\treturn ATOM(invalid_kernel);\n    case CL_INVALID_ARG_INDEX: \n\treturn ATOM(invalid_arg_index);\n    case CL_INVALID_ARG_VALUE: \n\treturn ATOM(invalid_arg_value);\n    case CL_INVALID_ARG_SIZE: \n\treturn ATOM(invalid_arg_size);\n    case CL_INVALID_KERNEL_ARGS: \n\treturn ATOM(invalid_kernel_args);\n    case CL_INVALID_WORK_DIMENSION: \n\treturn ATOM(invalid_work_dimension);\n    case CL_INVALID_WORK_GROUP_SIZE: \n\treturn ATOM(invalid_work_group_size);\n    case CL_INVALID_WORK_ITEM_SIZE: \n\treturn ATOM(invalid_work_item_size);\n    case CL_INVALID_GLOBAL_OFFSET: \n\treturn ATOM(invalid_global_offset);\n    case CL_INVALID_EVENT_WAIT_LIST: \n\treturn ATOM(invalid_event_wait_list);\n    case CL_INVALID_EVENT: \n\treturn ATOM(invalid_event);\n    case CL_INVALID_OPERATION: \n\treturn ATOM(invalid_operation);\n    case CL_INVALID_GL_OBJECT: \n\treturn ATOM(invalid_gl_object);\n    case CL_INVALID_BUFFER_SIZE: \n\treturn ATOM(invalid_buffer_size);\n    case CL_INVALID_MIP_LEVEL: \n\treturn ATOM(invalid_mip_level);\n    case CL_INVALID_GLOBAL_WORK_SIZE:\n\treturn ATOM(invalid_global_work_size);\n#ifdef CL_DEVICE_PARTITION_FAILED\n    case CL_DEVICE_PARTITION_FAILED:\n\treturn ATOM(device_partition_failed);\n#endif\n#ifdef CL_INVALID_DEVICE_PARTITION_COUNT\n    case CL_INVALID_DEVICE_PARTITION_COUNT:\n\treturn ATOM(invalid_device_partition_count);\n#endif\n    default: \n\treturn ATOM(unknown);\n    }\n}\n\nERL_NIF_TERM ecl_make_error(ErlNifEnv* env, cl_int err)\n{\n    return enif_make_tuple2(env, ATOM(error), ecl_error(err));\n}\n\nstatic void ecl_emit_error(char* file, int line, ...)\n{\n    va_list ap;\n    char* fmt;\n\n    va_start(ap, line);\n    fmt = va_arg(ap, char*);\n\n    fprintf(stderr, \"%s:%d: \", file, line); \n    vfprintf(stderr, fmt, ap);\n    fprintf(stderr, \"\\r\\n\");\n    va_end(ap);\n    fflush(stderr);\n}\n\n// Parse bool\nstatic int get_bool(ErlNifEnv* env, const ERL_NIF_TERM key, cl_bool* val)\n{\n    UNUSED(env);\n    if (key == ATOM(true)) {\n\t*val = true;\n\treturn 1;\n    }\n    else if (key == ATOM(false)) {\n\t*val = false;\n\treturn 1;\n    }\n    return 0;\n}\n\n\n// Parse enum\nstatic int get_enum(ErlNifEnv* env, const ERL_NIF_TERM key,\n\t\t    cl_uint* num, ecl_kv_t* kv)\n{\n    UNUSED(env);\n\n    if (!enif_is_atom(env, key))\n\treturn 0;\n    while(kv->key) {\n\tif (*kv->key == key) {\n\t    *num = (cl_uint) kv->value;\n\t    return 1;\n\t}\n\tkv++;\n    }\n    return 0;\n}\n\n// Parse bitfield\nstatic int get_bitfield(ErlNifEnv* env, const ERL_NIF_TERM key,\n\t\t\tcl_bitfield* field, ecl_kv_t* kv)\n{\n    UNUSED(env);\n\n    if (!enif_is_atom(env, key))\n\treturn 0;\n    while(kv->key) {\n\tif (*kv->key == key) {\n\t    *field = kv->value;\n\t    return 1;\n\t}\n\tkv++;\n    }\n    return 0;\n}\n\n\nstatic int get_bitfields(ErlNifEnv* env, const ERL_NIF_TERM term,\n\t\t\t cl_bitfield* field, ecl_kv_t* kv)\n{\n    cl_bitfield t;\n\n    if (enif_is_atom(env, term)) {\n\tif (!get_bitfield(env, term, &t, kv))\n\t    return 0;\n\t*field = t;\n\treturn 1;\n    }\n    else if (enif_is_empty_list(env, term)) {\n\t*field = 0;\n\treturn 1;\n    }\n    else if (enif_is_list(env, term)) {\n\tcl_bitfield fs = 0;\n\tERL_NIF_TERM list = term;\n\tERL_NIF_TERM head, tail;\n\t\n\twhile(enif_get_list_cell(env, list, &head, &tail)) {\n\t    if (!get_bitfield(env, head, &t, kv))\n\t\treturn 0;\n\t    fs |= t;\n\t    list = tail;\n\t}\n\tif (!enif_is_empty_list(env, list))\n\t    return 0;\n\t*field = fs;\n\treturn 1;\n    }\n    return 0;\n}\n\nERL_NIF_TERM make_enum(ErlNifEnv* env, cl_uint num, ecl_kv_t* kv)\n{\n    while(kv->key) {\n\tif (num == (cl_uint)kv->value)\n\t    return *kv->key;\n\tkv++;\n    }\n    return enif_make_uint(env, num);\n}\n\nERL_NIF_TERM make_bitfields(ErlNifEnv* env, cl_bitfield v, ecl_kv_t* kv)\n{\n    ERL_NIF_TERM list = enif_make_list(env, 0);\n\n    if (v) {\n\tint n = 0;\n\twhile(kv->key) {\n\t    kv++;\n\t    n++;\n\t}\n\twhile(n--) {\n\t    kv--;\n\t    if ((kv->value & v) == kv->value)\n\t\tlist = enif_make_list_cell(env, *kv->key, list);\n\t}\n    }\n    return list;\n}\n\n\n\n/******************************************************************************\n *\n *   Linear hash functions\n *\n *****************************************************************************/\n\n#define EPTR_HANDLE(ptr) ((intptr_t)(ptr))\n\nstatic lhash_value_t ref_hash(void* key)\n{\n    return (lhash_value_t) key;\n}\n\nstatic int ref_cmp(void* key, void* data)\n{\n    if (((intptr_t)key) == EPTR_HANDLE(((ecl_object_t*)data)->opaque))\n\treturn 0;\n    return 1;\n}\n\nstatic void ref_release(void *data)\n{\n    UNUSED(data);\n    // object's are free'd by garbage collection\n}\n\n// Remove object from hash \nstatic void object_erase(ecl_object_t* obj)\n{\n    ecl_env_t* ecl = obj->env;\n    enif_rwlock_rwlock(ecl->ref_lock);\n    lhash_erase(&ecl->ref, (void*)EPTR_HANDLE(obj->opaque));\n    enif_rwlock_rwunlock(ecl->ref_lock);\n}\n\n/******************************************************************************\n *\n *   Message queue\n *\n *****************************************************************************/\n\n// Peek at queue front\n#if 0\nstatic ecl_message_t* ecl_queue_peek(ecl_queue_t* q)\n{\n    ecl_qlink_t* ql;\n\n    enif_mutex_lock(q->mtx);\n    ql = q->front;\n    enif_mutex_unlock(q->mtx);\n    if (ql)\n\treturn &ql->mesg;\n    else\n\treturn 0;\n}\n#endif\n\n// Get message from queue front\nstatic int ecl_queue_get(ecl_queue_t* q, ecl_message_t* m)\n{\n    ecl_qlink_t* ql;\n\n    enif_mutex_lock(q->mtx);\n    while(!(ql = q->front)) {\n\tenif_cond_wait(q->cv, q->mtx);\n    }\n    if (!(q->front = ql->next))\n\tq->rear = 0;\n    q->len--;\n\n    *m = ql->mesg;\n\n    if ((ql >= &q->ql[0]) && (ql <= &q->ql[MAX_QLINK-1])) {\n\tql->next = q->free;\n\tq->free = ql;\n    }\n    else \n\tenif_free(ql);\n    enif_mutex_unlock(q->mtx);\n    return 0;\n}\n\n// Put message at queue rear\nstatic int ecl_queue_put(ecl_queue_t* q, ecl_message_t* m)\n{\n    ecl_qlink_t* ql;\n    ecl_qlink_t* qr;\n    int res = 0;\n\n    enif_mutex_lock(q->mtx);\n\n    if ((ql = q->free))\n\tq->free = ql->next;\n    else\n\tql = enif_alloc(sizeof(ecl_qlink_t));\n    if (!ql)\n\tres = -1;\n    else {\n\tql->mesg = *m;\n\tq->len++;\n\tql->next = 0;\n\tif (!(qr = q->rear)) {\n\t    q->front = ql;\n\t    enif_cond_signal(q->cv);\n\t}\n\telse\n\t    qr->next = ql;\n\tq->rear = ql;\n    }\n    enif_mutex_unlock(q->mtx);\n    return res;\n}\n\nstatic int ecl_queue_init(ecl_queue_t* q)\n{\n    int i;\n    if (!(q->cv     = enif_cond_create(\"queue_cv\")))\n\treturn -1;\n    if (!(q->mtx    = enif_mutex_create(\"queue_mtx\")))\n\treturn -1;\n    q->front  = 0;\n    q->rear   = 0;\n    q->len    = 0;\n    for (i = 0; i < MAX_QLINK-1; i++)\n\tq->ql[i].next = &q->ql[i+1];\n    q->ql[MAX_QLINK-1].next = 0;\n    q->free = &q->ql[0];\n    return 0;\n}\n\nstatic void ecl_queue_destroy(ecl_queue_t* q)\n{\n    ecl_qlink_t* ql;\n\n    enif_cond_destroy(q->cv);\n    enif_mutex_destroy(q->mtx);\n\n    ql = q->front;\n    while(ql) {\n\tecl_qlink_t* qln = ql->next;\n\tif ((ql >= &q->ql[0]) && (ql <= &q->ql[MAX_QLINK-1]))\n\t    ;\n\telse\n\t    enif_free(ql);\n\tql = qln;\n    }\n}\n\n/******************************************************************************\n *\n *   Threads\n *\n *****************************************************************************/\n\nstatic int ecl_message_send(ecl_thread_t* thr, ecl_message_t* m)\n{\n    return ecl_queue_put(&thr->q, m);\n}\n\nstatic int ecl_message_recv(ecl_thread_t* thr, ecl_message_t* m)\n{\n    int r;\n    if ((r = ecl_queue_get(&thr->q, m)) < 0)\n\treturn r;\n    return 0;\n}\n\n#if 0\nstatic ecl_message_t* ecl_message_peek(ecl_thread_t* thr, ecl_thread_t** from)\n{\n    ecl_message_t* m;\n    if ((m = ecl_queue_peek(&thr->q))) {\n\tif (from)\n\t    *from = m->sender;\n    }\n    return m;\n}\n#endif\n\nstatic ecl_thread_t* ecl_thread_start(void* (*func)(void* arg),\n\t\t\t\t      void* arg, int stack_size)\n{\n    ErlNifThreadOpts* opts;\n    ecl_thread_t* thr;\n\n    if (!(thr = enif_alloc(sizeof(ecl_thread_t))))\n\treturn 0;\n    if (ecl_queue_init(&thr->q) < 0)\n\tgoto error;\n    if (!(opts = enif_thread_opts_create(\"ecl_thread_opts\")))\n\tgoto error;\n    opts->suggested_stack_size = stack_size;\n    thr->arg = arg;\n\n    enif_thread_create(\"ecl_thread\", &thr->tid, func, thr, opts);\n    enif_thread_opts_destroy(opts);\n    return thr;\nerror:\n    enif_free(thr);\n    return 0;\n}\n\nstatic int ecl_thread_stop(ecl_thread_t* thr, void** exit_value)\n{\n    ecl_message_t m;\n\n    m.type   = ECL_MESSAGE_STOP;\n    m.env    = 0;\n    ecl_message_send(thr, &m);\n    enif_thread_join(thr->tid, exit_value);\n    ecl_queue_destroy(&thr->q);\n    enif_free(thr);\n    return 0;\n}\n\nstatic void ecl_thread_exit(void* value)\n{\n    enif_thread_exit(value);\n}\n\n/******************************************************************************\n *\n *   Ecl resource\n *\n *****************************************************************************/\n\nstatic int ecl_resource_init(ErlNifEnv* env,\n\t\t\t     ecl_resource_t* res,\n\t\t\t     char* name,\n\t\t\t     size_t size,  // object size\n\t\t\t     void (*dtor)(ErlNifEnv*, ecl_object_t*),\n\t\t\t     ErlNifResourceFlags flags,\n\t\t\t     ErlNifResourceFlags* tried)\n{\n    res->name = name;\n    res->type = enif_make_atom(env, name);\n    res->size = size;\n    res->res  = enif_open_resource_type(env, 0, name, \n\t\t\t\t\t(ErlNifResourceDtor*) dtor,\n\t\t\t\t\tflags, tried);\n    return 0;\n}\n\n//\n// Reference new kernel argument and Dereference old value\n//\n\nstatic void unref_kernel_arg(int type, void* val)\n{\n    switch(type) {\n    case KERNEL_ARG_MEM:\n\tif (val)\n\t    ECL_CALL(clReleaseMemObject)((cl_mem) val);\n\tbreak;\n    case KERNEL_ARG_SAMPLER:\n\tif (val)\n\t    ECL_CALL(clReleaseSampler)((cl_sampler) val);\n\tbreak;\n    case KERNEL_ARG_OTHER:\n    default:\n\tbreak;\n    }\n}\n\nstatic void ref_kernel_arg(int type, void* val)\n{\n    switch(type) {\n    case KERNEL_ARG_MEM:\n\tif (val)\n\t    ECL_CALL(clRetainMemObject)((cl_mem) val);\n\tbreak;\n    case KERNEL_ARG_SAMPLER:\n\tif (val)\n\t    ECL_CALL(clRetainSampler)((cl_sampler) val);\n\tbreak;\n    case KERNEL_ARG_OTHER:\n    default:\n\tbreak;\n    }\n}\n\nstatic int set_kernel_arg(ecl_kernel_t* kern, cl_uint i, int type, void* value)\n{\n    if (i < kern->num_args) {\n\tint   old_type  = kern->arg[i].type;\n\tvoid* old_value = kern->arg[i].value;\n\tref_kernel_arg(type, value);\n\tkern->arg[i].type  = type;\n\tkern->arg[i].value = value;\n\tunref_kernel_arg(old_type, old_value);\n\treturn 0;\n    }\n    return -1;\n}\n\n/******************************************************************************\n *\n *   Resource destructors\n *\n *****************************************************************************/\n\nstatic void ecl_platform_dtor(ErlNifEnv* env, ecl_object_t* obj)\n{\n    UNUSED(env);\n    UNUSED(obj);\n    DBG(\"ecl_platform_dtor: %p\", obj);\n    object_erase(obj);\n    if (obj->parent) enif_release_resource(obj->parent);\n}\n\nstatic void ecl_device_dtor(ErlNifEnv* env, ecl_object_t* obj)\n{\n    UNUSED(env);\n    UNUSED(obj);\n    DBG(\"ecl_device_dtor: %p\", obj);\n    object_erase(obj);\n    if (obj->parent) enif_release_resource(obj->parent);\n}\n\nstatic void ecl_queue_dtor(ErlNifEnv* env, ecl_object_t* obj)\n{\n    UNUSED(env);\n    DBG(\"ecl_queue_dtor: %p\", obj);\n    ECL_CALL(clReleaseCommandQueue)(obj->queue);\n    object_erase(obj);\n    if (obj->parent) enif_release_resource(obj->parent);\n}\n\nstatic void ecl_mem_dtor(ErlNifEnv* env, ecl_object_t* obj)\n{\n    UNUSED(env);\n    DBG(\"ecl_mem_dtor: %p\", obj);\n    ECL_CALL(clReleaseMemObject)(obj->mem);\n    object_erase(obj);\n    if (obj->parent) enif_release_resource(obj->parent);\n}\n\nstatic void ecl_sampler_dtor(ErlNifEnv* env, ecl_object_t* obj)\n{\n    UNUSED(env);\n    DBG(\"ecl_sampler_dtor: %p\", obj);\n    ECL_CALL(clReleaseSampler)(obj->sampler);\n    object_erase(obj);\n    if (obj->parent) enif_release_resource(obj->parent);\n}\n\nstatic void ecl_program_dtor(ErlNifEnv* env, ecl_object_t* obj)\n{\n    UNUSED(env);\n    DBG(\"ecl_program_dtor: %p\", obj);\n    ECL_CALL(clReleaseProgram)(obj->program);\n    object_erase(obj);\n    if (obj->parent) enif_release_resource(obj->parent);\n}\n\nstatic void ecl_kernel_dtor(ErlNifEnv* env, ecl_object_t* obj)\n{\n    ecl_kernel_t* kern = (ecl_kernel_t*) obj;\n    cl_uint i;\n    UNUSED(env);\n    DBG(\"ecl_kernel_dtor: %p\", kern);\n    for (i = 0; i < kern->num_args; i++)\n\tunref_kernel_arg(kern->arg[i].type, kern->arg[i].value);\n    enif_free(kern->arg);\n    ECL_CALL(clReleaseKernel)(kern->obj.kernel);\n    object_erase(obj);\n    if (obj->parent) enif_release_resource(obj->parent);\n}\n\nstatic void ecl_event_dtor(ErlNifEnv* env, ecl_object_t* obj)\n{\n    ecl_event_t* evt = (ecl_event_t*) obj;\n    UNUSED(env);\n    DBG(\"ecl_event_dtor: %p\", evt);\n    ECL_CALL(clReleaseEvent)(evt->obj.event);\n    object_erase(obj);\n    if (evt->bin) {\n\tif (!evt->rl)\n\t    enif_release_binary(evt->bin);\n\tenif_free(evt->bin);\n    }\n    if (evt->bin_env)\n\tenif_free_env(evt->bin_env);\n    if (obj->parent) enif_release_resource(obj->parent);\n}\n\nstatic void ecl_context_dtor(ErlNifEnv* env, ecl_object_t* obj)\n{\n    void* exit_value;\n    ecl_context_t* ctx = (ecl_context_t*) obj;\n    ecl_context_t** pp;\n    ecl_env_t* ecl = enif_priv_data(env);\n    ecl_thread_t* thr = ctx->thr;\n\n    DBG(\"ecl_context_dtor: %p\", ctx);\n\n    enif_rwlock_rwlock(ecl->context_list_lock);\n    pp = &ecl->context_list;\n    while(*pp != ctx)\n\tpp = &(*pp)->next;\n    *pp = ctx->next;\n    enif_rwlock_rwunlock(ecl->context_list_lock);\n\n    ECL_CALL(clReleaseContext)(ctx->obj.context);\n    object_erase(obj);\n    // parent is always = 0\n    // kill the event thread\n    ecl_thread_stop(thr, &exit_value);\n}\n\n\n/******************************************************************************\n *\n *   make/get\n *\n *****************************************************************************/\n\n// For now, wrap the resource object {type,pointer-val,handle}\nstatic ERL_NIF_TERM make_object(ErlNifEnv* env, const ERL_NIF_TERM type,\n\t\t\t\tvoid* robject)\n{\n    if (!robject)\n\treturn ATOM(undefined);\n    else\n\treturn enif_make_tuple3(env,\n\t\t\t\ttype,\n\t\t\t\tecl_make_sizet(env, (size_t) robject),\n\t\t\t\tenif_make_resource(env, robject));\n}\n\n// Accept {type,pointer-val,handle}\nstatic int get_ecl_object(ErlNifEnv* env, const ERL_NIF_TERM term,\n\t\t\t  ecl_resource_t* rtype, bool nullp,  \n\t\t\t  ecl_object_t** robjectp)\n{\n    const ERL_NIF_TERM* elem;\n    int arity;\n    size_t handle;  // not really a size_t but the type has a good size\n\n    if (nullp && (term == ATOM(undefined))) {\n\t*robjectp = 0;\n\treturn 1;\n    }\n    if (!enif_get_tuple(env, term, &arity, &elem))\n\treturn 0;\n    if (arity != 3)\n\treturn 0;\n    if (!enif_is_atom(env, elem[0]) || (elem[0] != rtype->type))\n\treturn 0;\n    if (!ecl_get_sizet(env, elem[1], &handle))\n\treturn 0;\n    if (!enif_get_resource(env, elem[2], rtype->res, (void**) robjectp))\n\treturn 0;\n    if ((size_t)*robjectp != handle)\n\treturn 0;\n    return 1;\n}\n\n#if 0\nstatic int get_ecl_object_list(ErlNifEnv* env, const ERL_NIF_TERM term,\n\t\t\t       ecl_resource_t* rtype, bool nullp,\n\t\t\t       ecl_object_t** robjv, size_t* rlen)\n{\n    size_t maxlen = *rlen;\n    size_t n = 0;\n    ERL_NIF_TERM list = term;\n\n    while(n < maxlen) {\n\tERL_NIF_TERM head, tail;\n\t\n\tif (enif_get_list_cell(env, list, &head, &tail)) {\n\t    if (!get_ecl_object(env, head, rtype, nullp, robjv))\n\t\treturn 0;\n\t    n++;\n\t    robjv++;\n\t    list = tail;\n\t}\n\telse if (enif_is_empty_list(env, list)) {\n\t    *rlen = n;\n\t    return 1;\n\t}\n\telse \n\t    return 0;\n    }\n    return 0;\n}\n#endif\n\nstatic int get_object(ErlNifEnv* env, const ERL_NIF_TERM term,\n\t\t      ecl_resource_t* rtype, bool nullp,  \n\t\t      void** rptr)\n{\n    ecl_object_t* obj;\n    if (get_ecl_object(env, term, rtype, nullp, &obj)) {\n\t*rptr = obj ? obj->opaque : 0;\n\treturn 1;\n    }\n    return 0;\n}\n\nstatic int get_object_list(ErlNifEnv* env, const ERL_NIF_TERM term,\n\t\t\t   ecl_resource_t* rtype, bool nullp,\n\t\t\t   void** robjv, cl_uint* rlen)\n{\n    cl_uint maxlen = *rlen;\n    cl_uint n = 0;\n    ERL_NIF_TERM list = term;\n\n    while(n < maxlen) {\n\tERL_NIF_TERM head, tail;\n\t\n\tif (enif_get_list_cell(env, list, &head, &tail)) {\n\t    if (!get_object(env, head, rtype, nullp, robjv))\n\t\treturn 0;\n\t    n++;\n\t    robjv++;\n\t    list = tail;\n\t}\n\telse if (enif_is_empty_list(env, list)) {\n\t    *rlen = n;\n\t    return 1;\n\t}\n\telse \n\t    return 0;\n    }\n    return 0;\n}\n\n\n\nstatic int get_sizet_list(ErlNifEnv* env, const ERL_NIF_TERM term,\n\t\t\t  size_t* rvec, size_t* rlen)\n{\n    size_t maxlen = *rlen;\n    size_t n = 0;\n    ERL_NIF_TERM list = term;\n\n    while(n < maxlen) {\n\tERL_NIF_TERM head, tail;\n\t\n\tif (enif_get_list_cell(env, list, &head, &tail)) {\n\t    if (!ecl_get_sizet(env, head, rvec))\n\t\treturn 0;\n\t    n++;\n\t    rvec++;\n\t    list = tail;\n\t}\n\telse if (enif_is_empty_list(env, list)) {\n\t    *rlen = n;\n\t    return 1;\n\t}\n\telse \n\t    return 0;\n    }\n    if (enif_is_empty_list(env, list)) {\n\t*rlen = n;\n\treturn 1;\n    }\n    return 0;\n}\n\nstatic int get_binary_list(ErlNifEnv* env, const ERL_NIF_TERM term,\n\t\t\t   ErlNifBinary* rvec, size_t* rlen)\n{\n    size_t maxlen = *rlen;\n    size_t n = 0;\n    ERL_NIF_TERM list = term;\n\n    while(n < maxlen) {\n\tERL_NIF_TERM head, tail;\n\t\n\tif (enif_get_list_cell(env, list, &head, &tail)) {\n\t    if (!enif_inspect_binary(env, head, rvec))\n\t\treturn 0;\n\t    n++;\n\t    rvec++;\n\t    list = tail;\n\t}\n\telse if (enif_is_empty_list(env, list)) {\n\t    *rlen = n;\n\t    return 1;\n\t}\n\telse \n\t    return 0;\n    }\n    return 0;\n}\n\n#if CL_VERSION_1_2 == 1\n// avoid warning\n// currently onlt used my compile_program which is a 1.2 function\n\n// free an array of strings\nstatic void free_string_list(char** rvec, size_t n)\n{\n    int i;\n    for (i = 0; i < (int)n; i++)\n\tenif_free(rvec[i]);\n}\n\n// get a list of, max *rlen, dynamically allocated, strings.\nstatic int get_string_list(ErlNifEnv* env, const ERL_NIF_TERM term,\n\t\t\t   char** rvec, size_t* rlen)\n{\n    char** rvec0 = rvec;\n    size_t maxlen = *rlen;\n    size_t n = 0;\n    ERL_NIF_TERM list = term;\n    ERL_NIF_TERM head, tail;\n    while((n < maxlen) &&\n\t  enif_get_list_cell(env, list, &head, &tail)) {\n\tchar* str;\n\tunsigned int len;\n\tif (!enif_get_list_length(env, head, &len))\n\t    goto error;\n\tif (!(str = enif_alloc(len+1)))\n\t    goto error;\n\tif (!enif_get_string(env, head, str, len+1, ERL_NIF_LATIN1))\n\t    goto error;\n\t*rvec++ = str;\n\tn++;\n\tlist = tail;\n    }\n    if (enif_is_empty_list(env, list)) {\n\t*rlen = n;\n\treturn 1;\n    }\nerror:\n    free_string_list(rvec0, rvec-rvec0);\n    return 0;\n}\n#endif\n\n// Copy a \"local\" binary to a new process independent environment\n// fill the binary structure with the new data and return it.\n//\nstatic int ecl_make_binary(ErlNifEnv* src_env,\n\t\t\t   const ERL_NIF_TERM src,\n\t\t\t   ErlNifEnv* dst_env,\n\t\t\t   ErlNifBinary* bin)\n{\n    ERL_NIF_TERM ref_counted;\n\n    if (enif_is_binary(src_env, src)) {\n\t// Update refc (and/or fix heap binaries)\n\tref_counted = enif_make_copy(dst_env, src);\n\treturn enif_inspect_binary(dst_env, ref_counted, bin);\n    } else {\n\t//  iolist to binary\n\tif (!enif_inspect_iolist_as_binary(src_env, src, bin))\n\t    return 0;\n\t// ref count binary ?\n\tenif_make_binary(dst_env, bin);\n\treturn 1; \n    }\n}\n\n\n// Lookup a openCL object (native => reource ecl_object_t*)\nstatic ecl_object_t* ecl_lookup(ErlNifEnv* env, void* ptr)\n{\n    if (!ptr)\n\treturn 0;\n    else {\n\tecl_env_t* ecl = enif_priv_data(env);\n\tecl_object_t* obj;\n\n\tenif_rwlock_rlock(ecl->ref_lock);\n\tobj = (ecl_object_t*) lhash_lookup(&ecl->ref,(void*)EPTR_HANDLE(ptr));\n\tenif_rwlock_runlock(ecl->ref_lock);\n\treturn obj;\n    }\n}\n\n// Create a new openCL resource object\nstatic ecl_object_t* ecl_new(ErlNifEnv* env, ecl_resource_t* rtype, \n\t\t\t     void* ptr, ecl_object_t* parent, cl_int version)\n{\n    if (!ptr) \n\treturn 0;\n    else {\n\tecl_env_t* ecl = enif_priv_data(env);\n\tecl_object_t* obj;\n\n\tobj = enif_alloc_resource(rtype->res, rtype->size);\n\tif (obj) {\n\t    if (parent)\tenif_keep_resource(parent);\n\t    obj->opaque = ptr;\n\t    obj->env    = ecl;\n\t    obj->parent = parent;\n\t    if(version == -1) {\n\t\tversion = parent ? parent->version : ecl->icd_version;\n\t    }\n\t    obj->version = (version < ecl->icd_version) ? version : ecl->icd_version;\n\t    enif_rwlock_rwlock(ecl->ref_lock);\n\t    lhash_insert_new(&ecl->ref, (void*)EPTR_HANDLE(ptr), obj);\n\t    enif_rwlock_rwunlock(ecl->ref_lock);\n\t}\n\treturn obj;\n    }\n}\n\nstatic ERL_NIF_TERM ecl_make_object(ErlNifEnv* env, ecl_resource_t* rtype, \n\t\t\t\t    void* ptr, ecl_object_t* parent)\n{\n    ecl_object_t* obj = ecl_new(env,rtype,ptr,parent,-1);\n    ERL_NIF_TERM  res;\n    res = make_object(env, rtype->type, obj);\n    if (obj)\n\tenif_release_resource(obj);\n    return res;\n}\n\n\n// lookup or create a new ecl_object_t resource\nstatic ecl_object_t* ecl_maybe_new(ErlNifEnv* env, ecl_resource_t* rtype, \n\t\t\t\t   void* ptr, ecl_object_t* parent, \n\t\t\t\t   bool* is_new)\n{\n    ecl_object_t* obj = ecl_lookup(env, ptr);\n    if (!obj) {\n\tobj = ecl_new(env, rtype, ptr, parent,-1);\n\t*is_new = true;\n    }\n    else\n\t*is_new = false;\n    return obj;\n}\n\n\n// lookup or create resource object, return as erlang term\nstatic ERL_NIF_TERM ecl_lookup_object(ErlNifEnv* env, ecl_resource_t* rtype, \n\t\t\t\t      void* ptr, ecl_object_t* parent)\n{\n    bool is_new;\n    ERL_NIF_TERM  res;\n    ecl_object_t* obj = ecl_maybe_new(env,rtype,ptr,parent,&is_new);\n    \n    res = make_object(env, rtype->type, obj);\n    if (obj && is_new)\n\tenif_release_resource(obj);\n    return res;\n}\n\nstatic ERL_NIF_TERM ecl_make_kernel(ErlNifEnv* env, cl_kernel kernel,\n\t\t\t\t    ecl_object_t* parent)\n{\n    ecl_kernel_t* kern = (ecl_kernel_t*) ecl_new(env,&kernel_r,\n\t\t\t\t\t\t (void*)kernel,parent,-1);\n    ERL_NIF_TERM  res;\n    cl_uint num_args;\n    size_t sz;\n\n    // Get number of arguments, FIXME: check error return\n    ECL_CALL(clGetKernelInfo)(kernel,CL_KERNEL_NUM_ARGS,\n\t\t\t      sizeof(num_args),&num_args,0);\n    sz = num_args*sizeof(ecl_kernel_arg_t);\n\n    kern->arg = (ecl_kernel_arg_t*) enif_alloc(sz);\n    memset(kern->arg, 0, sz);\n    kern->num_args = num_args;\n    \n    res = make_object(env, kernel_r.type, kern);\n    if (kern)\n\tenif_release_resource(kern);\n    return res;\n}\n\nstatic ERL_NIF_TERM ecl_make_event(ErlNifEnv* env, cl_event event,\n\t\t\t\t   bool rd, bool rl,\n\t\t\t\t   ErlNifEnv* bin_env,\n\t\t\t\t   ErlNifBinary* bin, \n\t\t\t\t   ecl_object_t* parent)\n{\n    ecl_event_t* evt = (ecl_event_t*) ecl_new(env,&event_r,\n\t\t\t\t\t      (void*)event,parent,-1);\n    ERL_NIF_TERM res;\n    evt->bin_env = bin_env;\n    evt->bin = bin;\n    evt->rd  = rd;\n    evt->rl  = rl;\n    res = make_object(env, event_r.type, (ecl_object_t*) evt);\n    if (evt)\n\tenif_release_resource(evt);\n    return res;    \n}\n\nstatic ERL_NIF_TERM ecl_make_context(ErlNifEnv* env, cl_context context, cl_int version)\n{\n    ERL_NIF_TERM  res;\n    ecl_env_t* ecl;\n    ecl_context_t* ctx = (ecl_context_t*) ecl_new(env,&context_r,\n\t\t\t\t\t\t  (void*)context,0,version);\n    ecl = ctx->obj.env;\n    ctx->upgrade_count = 0;  // first incarnation\n    ctx->thr = ecl_thread_start(ecl_context_main, ctx, 8); // 8K stack!\n    res = make_object(env, context_r.type, (ecl_object_t*) ctx);\n    enif_rwlock_rwlock(ecl->context_list_lock);\n    // link contexts for upgrade\n    ctx->next = ecl->context_list;\n    ecl->context_list = ctx;\n    enif_rwlock_rwunlock(ecl->context_list_lock);\n\n    if (ctx)\n\tenif_release_resource(ctx);\n    return res;\n}\n\n\ntypedef cl_int CL_API_CALL info_fn_t(void* ptr, cl_uint param_name, \n\t\t\t\t     size_t param_value_size,\n\t\t\t\t     void* param_value, size_t* param_value_size_ret);\ntypedef cl_int CL_API_CALL info2_fn_t(void* ptr1, void* ptr2, cl_uint param_name, \n\t\t\t\t      size_t param_value_size,\n\t\t\t\t      void* param_value, size_t* param_value_size_ret);\n\n// return size of type\nstatic size_t ecl_sizeof(ocl_type_t type)\n{\n    switch(type) {\n    case OCL_CHAR: return sizeof(cl_char);\n    case OCL_UCHAR: return sizeof(cl_uchar);\n    case OCL_SHORT: return sizeof(cl_short);\n    case OCL_USHORT: return sizeof(cl_ushort);\n    case OCL_INT: return sizeof(cl_int);\n    case OCL_UINT: return sizeof(cl_uint);\n    case OCL_LONG: return sizeof(cl_long);\n    case OCL_ULONG: return sizeof(cl_ulong);\n    case OCL_HALF: return sizeof(cl_half);\n    case OCL_FLOAT: return sizeof(cl_float);\n    case OCL_DOUBLE: return sizeof(cl_double);\n    case OCL_BOOL: return sizeof(cl_bool);\n    case OCL_STRING: return sizeof(cl_char*);\n    case OCL_ENUM: return sizeof(cl_int);\n    case OCL_BITFIELD: return sizeof(cl_bitfield);\n    case OCL_POINTER: return sizeof(void*);\n    case OCL_SIZE: return sizeof(size_t);\n    case OCL_PLATFORM: return sizeof(void*);\n    case OCL_DEVICE: return sizeof(void*);\n    case OCL_CONTEXT: return sizeof(void*);\n    case OCL_PROGRAM: return sizeof(void*);\n    case OCL_COMMAND_QUEUE: return sizeof(void*);\n    case OCL_IMAGE_FORMAT: return sizeof(cl_image_format);\n#if CL_VERSION_1_2 == 1\n    case OCL_DEVICE_PARTITION: return sizeof(cl_device_partition_property);\n#endif\n    case OCL_NUM_TYPES:\n    default:\n\tDBG(\"info_size: unknown type %d detected\", type);\n\treturn sizeof(cl_int);\n    }\n}\n\n// put basic value types\nstatic ERL_NIF_TERM make_info_element(ErlNifEnv* env, ocl_type_t type, void* ptr, ecl_kv_t* kv)\n{\n    switch(type) {\n    case OCL_CHAR:  return enif_make_int(env, *((cl_char*)ptr));\n    case OCL_SHORT: return enif_make_int(env, *((cl_short*)ptr));\n    case OCL_INT: return enif_make_int(env, *((cl_int*)ptr));\n    case OCL_LONG: return enif_make_int64(env, *((cl_long*)ptr));\n    case OCL_UCHAR:  return enif_make_uint(env, *((cl_uchar*)ptr));\n    case OCL_USHORT: return enif_make_uint(env, *((cl_ushort*)ptr));\n    case OCL_UINT: return enif_make_uint(env, *((cl_uint*)ptr));\n    case OCL_HALF: return enif_make_uint(env, *((cl_half*)ptr));\n    case OCL_ULONG: return enif_make_uint64(env, *((cl_ulong*)ptr));\n    case OCL_SIZE: return ecl_make_sizet(env, *((size_t*)ptr));\n    case OCL_FLOAT: return enif_make_double(env, *((cl_float*)ptr));\n    case OCL_DOUBLE: return enif_make_double(env, *((cl_double*)ptr));\n    case OCL_BOOL: return (*((cl_bool*)ptr)) ? ATOM(true) : ATOM(false);\n    // case POINTER: cbuf_put_pointer(data, *((pointer_t*)ptr)); break;\n    case OCL_STRING:\n\treturn enif_make_string_len(env, (char*) ptr, strlen((char*) ptr), ERL_NIF_LATIN1);\n\n    case OCL_BITFIELD:\n\treturn make_bitfields(env, *((cl_bitfield*)ptr), kv);\n\n    case OCL_ENUM:\n\treturn make_enum(env, *((cl_int*)ptr), kv);\n\n    case OCL_POINTER: \n\treturn ecl_make_sizet(env, *((intptr_t*)ptr));\n\n    case OCL_PLATFORM:\n\treturn ecl_lookup_object(env,&platform_r,*(void**)ptr,0);\n\n    case OCL_DEVICE:\n\treturn ecl_lookup_object(env,&device_r,*(void**)ptr,0);\n\n    case OCL_CONTEXT:\n\treturn ecl_lookup_object(env,&context_r,*(void**)ptr,0);\n\n    case OCL_PROGRAM:\n\t// FIXME: find context object, pass as parent\n\treturn ecl_lookup_object(env,&program_r,*(void**)ptr,0);\n\n    case OCL_COMMAND_QUEUE:\n\t// FIXME: find context object, pass as parent\n\treturn ecl_lookup_object(env,&command_queue_r,*(void**)ptr,0);\n\n    case OCL_IMAGE_FORMAT: {\n\tcl_image_format* fmt = (cl_image_format*) ptr;\n\tERL_NIF_TERM channel_order;\n\tERL_NIF_TERM channel_type;\n\tchannel_order = make_enum(env,fmt->image_channel_order,\n\t\t\t\t  kv_channel_order);\n\tchannel_type = make_enum(env,fmt->image_channel_data_type,\n\t\t\t\t kv_channel_type);\n\treturn enif_make_tuple2(env, channel_order, channel_type);\n    }\n#if CL_VERSION_1_2 == 1\n    case OCL_DEVICE_PARTITION: { // cl_device_partition_property\n\tcl_device_partition_property* prop = (cl_device_partition_property*)ptr;\n\tERL_NIF_TERM term = ATOM(undefined);\n\n\tswitch(*prop++) {\n\tcase CL_DEVICE_PARTITION_EQUALLY:\n\t    term = enif_make_uint(env, *prop);\n\t    return enif_make_tuple2(env, ATOM(equally), term);\n\tcase CL_DEVICE_PARTITION_BY_COUNTS: {\n\t    cl_device_partition_property* pp = prop;\n\t    term = enif_make_list(env, 0);\n\t    while(*pp != CL_DEVICE_PARTITION_BY_COUNTS_LIST_END)\n\t\tpp++;\n\t    if (pp > prop) {  // build list backwards\n\t\tpp--;\n\t\twhile(pp >= prop) {\n\t\t    ERL_NIF_TERM ui = enif_make_uint(env, *pp);\n\t\t    term = enif_make_list_cell(env, ui, term);\n\t\t    pp--;\n\t\t}\n\t    }\n\t    return enif_make_tuple2(env, ATOM(by_counts), term);\n\t}\n\tcase CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN:\n\t    switch(*prop) {\n\t    case CL_DEVICE_AFFINITY_DOMAIN_NUMA:\n\t\tterm = ATOM(numa); break;\n\t    case CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE:\n\t\tterm = ATOM(l4_cache); break;\n\t    case CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE:\n\t\tterm = ATOM(l3_cache); break;\n\t    case CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE:\n\t\tterm = ATOM(l2_cache); break;\n\t    case CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE:\n\t\tterm = ATOM(l1_cache); break;\n\t    case CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE:\n\t\tterm = ATOM(next_partitionable); break;\n\t    default: return ATOM(undefined);\n\t    }\n\t    return enif_make_tuple2(env, ATOM(by_affinity_domain), term);\n\n\tdefault:\n\t    return ATOM(undefined);\n\t}\n\tbreak;\n    }\n#endif\n    case OCL_NUM_TYPES:\n    default:\n\treturn ATOM(undefined);\n    }\n}\n\n\nstatic ERL_NIF_TERM make_info_value(ErlNifEnv* env, ecl_info_t* iptr, void* buf, size_t buflen)\n{\n    char* dptr = (char*) buf;\n    ERL_NIF_TERM value;\n\n    if (iptr->is_array) {  // arrays are return as lists of items\n\tERL_NIF_TERM list = enif_make_list(env, 0);\n\tsize_t elem_size = ecl_sizeof(iptr->info_type);\n\tsize_t n = (buflen / elem_size);\n\tdptr += (n*elem_size);  // run backwards!!!\n\twhile (buflen >= elem_size) {\n\t    dptr -= elem_size;\n\t    value = make_info_element(env, iptr->info_type, dptr, iptr->extern_info);\n\t    list = enif_make_list_cell(env, value, list);\n\t    buflen -= elem_size;\n\t}\n\tvalue = list;\n    }\n    else {\n\tvalue = make_info_element(env, iptr->info_type, dptr, iptr->extern_info);\n    }\n    return value;\n}\n\n// Find object value\n// return {ok,Value} | {error,Reason} | exception badarg\n//\nERL_NIF_TERM make_object_info(ErlNifEnv* env,  ERL_NIF_TERM key, ecl_object_t* obj, info_fn_t* func, \n\t\t\t      ecl_info_t* info, size_t num_info)\n{\n    size_t returned_size = 0;\n    size_t size = MAX_INFO_SIZE;\n    unsigned char buf[MAX_INFO_SIZE];\n    void* ptr = buf;\n    ERL_NIF_TERM res;\n    cl_int err;\n    unsigned int i;\n\n    if (!enif_is_atom(env, key))\n\treturn enif_make_badarg(env);\n    i = 0;\n    while((i < num_info) && (*info[i].info_key != key))\n\ti++;\n    if (i == num_info)\n\treturn enif_make_badarg(env);  // or error ?\n\n    err = (*func)(obj->opaque,info[i].info_id,size,ptr,&returned_size);\n    if (err == CL_INVALID_VALUE) {\n        // try again allocate returned_size, returned_size does not\n\t// (yet) return the actual needed bytes (by spec) \n\t// but it looks like it... ;-)\n\tif (returned_size <= size) {\n\t    err = (*func)(obj->opaque,info[i].info_id,0,NULL,&returned_size);\n\t    if (returned_size <= size) {\n\t\treturn ecl_make_error(env, err);\n\t    }\n\t}\n\tsize = returned_size;\n\tif (!(ptr = enif_alloc(size)))\n\t    return ecl_make_error(env, CL_OUT_OF_HOST_MEMORY);\n\terr = (*func)(obj->opaque,info[i].info_id,size,ptr,&returned_size);\n    }\n\n    if (!err) {\n\tres = enif_make_tuple2(env, ATOM(ok), \n\t\t\t       make_info_value(env,&info[i],ptr,returned_size));\n    }\n    else {\n\tres = ecl_make_error(env, err);\n    }\n    if (ptr != buf)\n\tenif_free(ptr);\n    return res;\n}\n\n\nERL_NIF_TERM make_object_info2(ErlNifEnv* env,  ERL_NIF_TERM key, ecl_object_t* obj1, void* obj2,\n\t\t\t\t   info2_fn_t* func, ecl_info_t* info, size_t num_info)\n{\n    size_t returned_size = 0;\n    cl_long *buf;\n    cl_int err;\n    unsigned int i;\n    ERL_NIF_TERM result;\n\n    if (!enif_is_atom(env, key))\n\treturn enif_make_badarg(env);\n    i = 0;\n    while((i < num_info) && (*info[i].info_key != key))\n\ti++;\n    if (i == num_info)\n\treturn enif_make_badarg(env);  // or error ?\n    returned_size = info[i].def_size;\n    if (returned_size > 0 ||\n        !(err = (*func)(obj1->opaque, obj2, info[i].info_id,\n\t\t\t0, NULL, &returned_size))) {\n\tif (!(buf = enif_alloc(returned_size)))\n\t    return ecl_make_error(env, CL_OUT_OF_RESOURCES);\n\tif (!(err = (*func)(obj1->opaque, obj2, info[i].info_id,\n\t\t\t    returned_size, buf, &returned_size))) {\n\t    result = enif_make_tuple2(env, ATOM(ok), make_info_value(env, &info[i], buf, returned_size));\n\t    enif_free(buf);\n\t    return result;\n\t}\n    }\n    return ecl_make_error(env, err);\n}\n\n/******************************************************************************\n *\n * main ecl event loop run as a thread.\n *  The main purpose is to dispatch and send messages to owners\n *\n *****************************************************************************/\n\nstatic void* ecl_context_main(void* arg)\n{\n    ecl_thread_t* self = arg;\n    ecl_context_t* ctx = self->arg;\n\n    ctx->upgrade_count++;     // signal that we have started/upgraded\n\t    \n    DBG(\"ecl_context_main: started (addr=%p,tid=%p,count=%d)\",\n\t&self, self->tid, ctx->upgrade_count);\n\n    while(1) {\n\tecl_message_t m;\n\tecl_message_recv(self, &m);\n\n\tswitch(m.type) {\n\tcase ECL_MESSAGE_UPGRADE:\n\t    DBG(\"ecl_context_main: %p got upgrade func=%p\", \n\t\tself, m.upgrade);\n\t    // upgrade must never return and SHOULD be tail recursive!\n\t    return (m.upgrade)(arg);\n\n\tcase ECL_MESSAGE_SYNC:\n\t    DBG(\"ecl_context_main: %p got sync\", self);\n\t    m.type = ECL_MESSAGE_SYNC_ACK;\n\t    ecl_queue_put(&ctx->obj.env->q, &m);\n\t    break;\n\n\tcase ECL_MESSAGE_SYNC_ACK:\n\t    // Should not end up here \n\t    DBG(\"ecl_context_main: sync ack received\");\n\t    break;\n\n\tcase ECL_MESSAGE_STOP: {\n\t    DBG(\"ecl_context_main: stopped by command\");\n\t    if (m.env) {\n\t\tenif_send(0, &m.sender, m.env, \n\t\t\t  enif_make_tuple3(m.env, \n\t\t\t\t\t   ATOM(cl_async), m.ref,\n\t\t\t\t\t   ATOM(ok)));\n\t\tenif_free_env(m.env);\n\t    }\n\t    ecl_thread_exit(self);\n\t    break;\n\t}\n\n\tcase ECL_MESSAGE_FLUSH: {  // flush message queue\n\t    cl_int err;\n\n\t    DBG(\"ecl_context_main: flush q=%lu\", (unsigned long) m.queue);\n\t    err = ECL_CALL(clFlush)(m.queue->queue);\n\t    // send {cl_async, Ref, ok | {error,Reason}}\n\t    if (m.env) {\n\t\tERL_NIF_TERM reply;\n\t\tint res;\n\t\tUNUSED(res);\n\t\treply = !err ? ATOM(ok) : ecl_make_error(m.env, err);\n\t\tres = enif_send(0, &m.sender, m.env, \n\t\t\t\tenif_make_tuple3(m.env, \n\t\t\t\t\t\t ATOM(cl_async),\n\t\t\t\t\t\t m.ref,\n\t\t\t\t\t\t reply));\n\t\tDBG(\"ecl_context_main: send r=%d\", res);\n\t\tenif_free_env(m.env);\n\t    }\n\t    enif_release_resource(m.queue);\n\t    break;\n\t}\n\n\tcase ECL_MESSAGE_FINISH: {  // finish message queue\n\t    cl_int err;\n\t    DBG(\"ecl_context_main: finish q=%lu\", (unsigned long) m.queue);\n\t    err = ECL_CALL(clFlush)(m.queue->queue);\n\t    // send {cl_async, Ref, ok | {error,Reason}}\n\t    if (m.env) {\n\t\tint res;\n\t\tERL_NIF_TERM reply;\n\t\tUNUSED(res);\n\t\treply = !err ? ATOM(ok) : ecl_make_error(m.env, err);\n\t\tres = enif_send(0, &m.sender, m.env, \n\t\t\t\tenif_make_tuple3(m.env, \n\t\t\t\t\t\t ATOM(cl_async), m.ref,\n\t\t\t\t\t\t reply));\n\t\tDBG(\"ecl_context_main: send r=%d\", res);\n\t\tenif_free_env(m.env);\n\t    }\n\t    enif_release_resource(m.queue);\n\t    break;\n\t}\n\n\tcase ECL_MESSAGE_WAIT_FOR_EVENT: { // wait for one event\n\t    cl_int err;\n\t    cl_event list[1];\n\t    DBG(\"ecl_context_main: wait_for_event e=%lu\",\n\t\t(unsigned long) m.event);\n\t    list[0] = m.event->obj.event;\n\t    err = ECL_CALL(clWaitForEvents)(1, list);\n\t    DBG(\"ecl_context_main: wait_for_event err=%d\", err);\n\t    // reply to caller pid !\n\t    if (m.env) {\n\t\tERL_NIF_TERM reply;\n\t\tint res;\n\t\t\n\t\tUNUSED(res);\n\t\tif (!err) {\n\t\t    cl_int status;\n\t\t    // read status COMPLETE | ERROR\n\t\t    // FIXME: check error\n\t\t    ECL_CALL(clGetEventInfo)(m.event->obj.event,\n\t\t\t\t\t     CL_EVENT_COMMAND_EXECUTION_STATUS,\n\t\t\t\t\t     sizeof(status), &status, 0);\n\t\t    switch(status) {\n\t\t    case CL_COMPLETE:\n\t\t\tDBG(\"ecl_context_main: wait_for_event complete\");\n\t\t\tif (m.event->bin && m.event->rd) {\n\t\t\t    m.event->rl = true;\n\t\t\t    reply = enif_make_binary(m.env, m.event->bin);\n\t\t\t}\n\t\t\telse\n\t\t\t    reply = ATOM(complete);\n\t\t\tbreak;\n\t\t    default:\n\t\t      DBG(\"ecl_context_main: wait_for_event: status=%d\",\n\t\t\t  status);\n\t\t\t// must/should be an error\n\t\t\treply = ecl_make_error(m.env, status);\n\t\t\tbreak;\n\t\t    }\n\t\t}\n\t\telse\n\t\t    reply = ecl_make_error(m.env, err);\n\t\tres = enif_send(0, &m.sender, m.env,\n\t\t\t\tenif_make_tuple3(m.env, \n\t\t\t\t\t\t ATOM(cl_event), m.ref,\n\t\t\t\t\t\t reply));\n\t\tDBG(\"ecl_context_main: send r=%d\", res);\n\t\tenif_free_env(m.env);\n\t    }\n\t    enif_release_resource(m.event);\n\t    break;\n\t}\n\tdefault:\n\t    break;\n\t}\n    }\n    return 0;\n}\n\n\n//\n// API functions\n//\n\n// noop - no operation for NIF interface performance benchmarking\nstatic ERL_NIF_TERM ecl_noop(ErlNifEnv* env, int argc,\n\t\t\t     const ERL_NIF_TERM argv[])\n{\n    UNUSED(env);\n    UNUSED(argc);\n    UNUSED(argv);\n    return ATOM(ok);\n}\n\nstatic ERL_NIF_TERM ecl_noop_(ErlNifEnv* env, int argc,\n\t\t\t\t   const ERL_NIF_TERM argv[])\n{\n    ecl_env_t* ecl = enif_priv_data(env);\n\n    if (ecl->dirty_scheduler_support)\n\treturn enif_schedule_nif(env, \"noop\", ERL_NIF_DIRTY_JOB_CPU_BOUND,\n\t\t\t\t ecl_noop, argc, argv);\n    else\n\treturn ecl_noop(env, argc, argv);\n}\n\n// version - return list of API versions supported\nstatic ERL_NIF_TERM ecl_versions(ErlNifEnv* env, int argc,\n\t\t\t\t const ERL_NIF_TERM argv[])\n{\n    ERL_NIF_TERM list = enif_make_list(env, 0);\n    ERL_NIF_TERM vsn;\n    UNUSED(env);\n    UNUSED(argc);\n    UNUSED(argv);\n\n#if CL_VERSION_1_0 == 1\n    vsn = enif_make_tuple2(env, enif_make_int(env, 1), enif_make_int(env, 0));\n    list = enif_make_list_cell(env, vsn, list);\n#endif\n#if CL_VERSION_1_1 == 1\n    vsn = enif_make_tuple2(env, enif_make_int(env, 1), enif_make_int(env, 1));\n    list = enif_make_list_cell(env, vsn, list);\n#endif\n#if CL_VERSION_1_2 == 1\n    vsn = enif_make_tuple2(env, enif_make_int(env, 1), enif_make_int(env, 2));\n    list = enif_make_list_cell(env, vsn, list);\n#endif\n\n#if CL_VERSION_2_0 == 1\n    vsn = enif_make_tuple2(env, enif_make_int(env, 2), enif_make_int(env, 0));\n    list = enif_make_list_cell(env, vsn, list);\n#endif\n\n#if CL_VERSION_2_1 == 1\n    vsn = enif_make_tuple2(env, enif_make_int(env, 2), enif_make_int(env, 1));\n    list = enif_make_list_cell(env, vsn, list);\n#endif    \n\n    return list;\n}\n\nstatic ERL_NIF_TERM ecl_get_platform_ids(ErlNifEnv* env, int argc,\n\t\t\t\t\t const ERL_NIF_TERM argv[])\n{\n    cl_uint          num_platforms;\n    cl_platform_id   platform_id[MAX_PLATFORMS];\n    ERL_NIF_TERM     idv[MAX_PLATFORMS];\n    ERL_NIF_TERM     platform_list;\n    cl_uint i;\n    cl_int err;\n    UNUSED(argc);\n    UNUSED(argv);\n\n    if ((err = ECL_CALL(clGetPlatformIDs)(MAX_PLATFORMS, platform_id, &num_platforms)))\n\treturn ecl_make_error(env, err);\n\n    for (i = 0; i < num_platforms; i++)\n\tidv[i] = ecl_lookup_object(env,&platform_r,platform_id[i],0);\n\n    platform_list = enif_make_list_from_array(env, idv,num_platforms);\n    return enif_make_tuple2(env, ATOM(ok), platform_list);\n}\n\nstatic ERL_NIF_TERM ecl_get_platform_info(ErlNifEnv* env, int argc,\n\t\t\t\t\t  const ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_platform;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &platform_r, false, &o_platform))\n\treturn enif_make_badarg(env);\n    return make_object_info(env, argv[1], o_platform,\n\t\t\t    (info_fn_t*) ECL_FUNC_PTR(clGetPlatformInfo),\n\t\t\t    platform_info, \n\t\t\t    sizeof_array(platform_info));\n}\n\n\nstatic ERL_NIF_TERM ecl_get_device_ids(ErlNifEnv* env, int argc, \n\t\t\t\t       const ERL_NIF_TERM argv[])\n{\n    cl_device_type   device_type = 0;\n    cl_device_id     device_id[MAX_DEVICES];\n    ERL_NIF_TERM     idv[MAX_DEVICES];\n    ERL_NIF_TERM     device_list;\n    cl_uint          num_devices;\n    cl_uint          i;\n    cl_platform_id   platform;\n    cl_int err;\n    UNUSED(argc);\n    \n    if (!get_object(env, argv[0], &platform_r, true,(void**)&platform))\n\treturn enif_make_badarg(env);\n    if (!get_bitfields(env, argv[1], &device_type, kv_device_type))\n\treturn enif_make_badarg(env);\n    if ((err = ECL_CALL(clGetDeviceIDs)(platform, device_type, MAX_DEVICES, \n\t\t\t\t\tdevice_id, &num_devices)))\n\treturn ecl_make_error(env, err);\n    \n    for (i = 0; i < num_devices; i++)\n\tidv[i] = ecl_lookup_object(env, &device_r, device_id[i], 0);\n    device_list = enif_make_list_from_array(env, idv, num_devices);\n    return enif_make_tuple2(env, ATOM(ok), device_list);\n}\n\n#if CL_VERSION_1_2 == 1\n\n// parse:\n//    {equally,<unsigned int>} |\n//    {by_counts, [<unsigned_int>]}\n//    {by_affinity_domain, num|l4_cache|l3_cache|l2_cache|l1_cache|\n//                         next_partiionable}\n// \nstatic int get_partition_properties(ErlNifEnv* env, const ERL_NIF_TERM term,\n\t\t\t\t    cl_device_partition_property* rvec,\n\t\t\t\t    size_t* rlen)\n{\n    const ERL_NIF_TERM* elem;\n    int arity;\n    size_t maxlen = *rlen;\n    size_t n = 0;\n\n    if (!enif_get_tuple(env, term, &arity, &elem))\n\treturn 0;\n    if (arity != 2)\n\treturn 0;\n    if (!enif_is_atom(env, elem[0]))\n\treturn 0;\n\n    if (elem[0] == ATOM(equally)) {\n\tunsigned long v;\n\t*rvec++ = CL_DEVICE_PARTITION_EQUALLY;\n\tif (!enif_get_ulong(env, elem[1], &v))\n\t    return 0;\n\t*rvec++ = v;\n\tn=2;\n    }\n    else if (elem[0] == ATOM(by_counts)) {\n\tERL_NIF_TERM head, tail;\n\tERL_NIF_TERM list = elem[1];\n\tunsigned long v;\n\t*rvec++ = CL_DEVICE_PARTITION_BY_COUNTS;\n\tn++;\n\twhile((n < maxlen-1) &&\n\t      enif_get_list_cell(env, list, &head, &tail)) {\n\t    if (!enif_get_ulong(env, head, &v))\n\t\treturn 0;\n\t    *rvec++=v;\n\t    n++;\n\t    list = tail;\n\t}\n\tif (!enif_is_empty_list(env, list))\n\t    return 0;\n\t*rvec++ = CL_DEVICE_PARTITION_BY_COUNTS_LIST_END;\n\tn++;\n    }\n    else if (elem[0] == ATOM(by_affinity_domain)) {\n\t*rvec++ = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN;\n\tif (elem[1] == ATOM(numa))\n\t    *rvec++ = CL_DEVICE_AFFINITY_DOMAIN_NUMA;\n\telse if (elem[1] == ATOM(l4_cache))\n\t    *rvec++ = CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE;\n\telse if (elem[1] == ATOM(l3_cache))\n\t    *rvec++ = CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE;\n\telse if (elem[1] == ATOM(l2_cache))\n\t    *rvec++ = CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE;\n\telse if (elem[1] == ATOM(l1_cache))\n\t    *rvec++ = CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE;\n\telse if (elem[1] == ATOM(next_partitionable))\n\t    *rvec++ = CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE;\n\telse\n\t    return 0;\n\tn = 2;\n    }\n    else\n\treturn 0;\n    *rlen = n;\n    return 1;\n}\n\nstatic ERL_NIF_TERM ecl_create_sub_devices(ErlNifEnv* env, int argc,\n\t\t\t\t\t   const ERL_NIF_TERM argv[])\n{\n    ecl_object_t*    d;\n    cl_device_id     out_devices[MAX_DEVICES];\n    ERL_NIF_TERM     idv[MAX_DEVICES];\n    ERL_NIF_TERM     device_list;\n    cl_uint          num_devices;\n    cl_uint          i;\n    cl_device_partition_property properties[128];\n    size_t num_property =  128-1;\n    cl_int err;\n    UNUSED(argc);\n\n    // fixme calc length of properties !\n    if (!get_ecl_object(env, argv[0], &device_r, false, &d))\n\treturn enif_make_badarg(env);\n    if (!get_partition_properties(env, argv[1], properties, &num_property))\n\treturn enif_make_badarg(env);\n    properties[num_property] = 0;\n\n    err = ECL_CALL(clCreateSubDevices)(d->device, properties, MAX_DEVICES,\n\t\t\t\t       out_devices, &num_devices);\n    if (err)\n\treturn ecl_make_error(env, err);\n    for (i = 0; i < num_devices; i++) {\n\tecl_object_t* obj;\n\tif ((obj = ecl_lookup(env, out_devices[i])) == NULL)\n\t    obj = ecl_new(env, &device_r, out_devices[i], 0, d->version);\n\tidv[i] = make_object(env, device_r.type, obj);\n    }\n    device_list = enif_make_list_from_array(env, idv, num_devices);\n    return enif_make_tuple2(env, ATOM(ok), device_list);\n}\n#endif\n\n\nstatic ERL_NIF_TERM ecl_get_device_info(ErlNifEnv* env, int argc, \n\t\t\t\t\tconst ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_device;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &device_r, false, &o_device))\n\treturn enif_make_badarg(env);\n    return make_object_info(env, argv[1], o_device, \n\t\t\t    (info_fn_t*) ECL_FUNC_PTR(clGetDeviceInfo), \n\t\t\t    device_info, \n\t\t\t    sizeof_array(device_info));\n}\n\ntypedef struct {\n    ErlNifPid        sender;  // sender pid\n    ErlNifEnv*        s_env;  // senders message environment (ref, bin's etc)\n    ErlNifEnv*        r_env;  // receiver message environment (ref, bin's etc)\n    ErlNifTid           tid;  // Calling thread\n} ecl_notify_data_t;\n\nvoid CL_CALLBACK ecl_context_notify(const char *errinfo, \n\t\t\t\t    const void* private_info, size_t cb,\n\t\t\t\t    void * user_data)\n{\n    /* ecl_notify_data_t* bp = user_data; */\n    /* ERL_NIF_TERM reply; */\n    /* ErlNifEnv*   s_env; */\n    /* int res; */\n    UNUSED(errinfo);\n    UNUSED(private_info);\n    UNUSED(cb);\n    UNUSED(user_data);\n\n    DBG(\"ecl_context_notify:  user_data=%p\", user_data);        \n    DBG(\"ecl_context_notify:  priv_info=%p cb=%d\", private_info, cb);\n    CL_ERROR(\"CL ERROR ASYNC: %s\", errinfo);\n}\n\n//\n// cl:create_context([cl_device_id()]) -> \n//   {ok, cl_context()} | {error, cl_error()}\n//\nstatic ERL_NIF_TERM ecl_create_context(ErlNifEnv* env, int argc, \n\t\t\t\t       const ERL_NIF_TERM argv[])\n{\n    cl_device_id     device_list[MAX_DEVICES];\n    cl_uint          num_devices = MAX_DEVICES;\n    cl_context       context;\n    cl_int err;\n    ecl_notify_data_t* bp;\n\n    UNUSED(argc);\n\n    if (!get_object_list(env, argv[0], &device_r, false, \n\t\t\t (void**) device_list, &num_devices))\n\treturn enif_make_badarg(env);\n\n    if (!(bp = enif_alloc(sizeof(ecl_notify_data_t))))\n\treturn ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?\n    \n    if (!(bp->r_env = enif_alloc_env())) {\n\tenif_free(bp);\n\treturn ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?\n    }\n    (void) enif_self(env, &bp->sender);\n    bp->s_env = env;\n    bp->tid = enif_thread_self();\n    DBG(\"ecl_create_context: self %p\", bp->tid);\n\n    context = ECL_CALL(clCreateContext)(0, num_devices, device_list, \n\t\t\t\t\tecl_context_notify,\n\t\t\t\t\tbp,\n\t\t\t\t\t&err);\n    if (context) {\n\tcl_uint i;\n\tERL_NIF_TERM t;\n\tecl_object_t *dev;\n\tcl_int version = 100;\n\tfor(i = 0; i < num_devices; i++) {\n\t    dev = ecl_lookup(env, device_list[i]);\n\t    /* Should hopefully be the same for all devices ?\n\t       use the least version */\n\t    if(dev->version < version)\n\t\tversion = dev->version;\n\t}\n\tt = ecl_make_context(env, context, version);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    return ecl_make_error(env, err);\n}\n\nstatic ERL_NIF_TERM ecl_get_context_info(ErlNifEnv* env, int argc, \n\t\t\t\t\t const ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_context;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))\n\treturn enif_make_badarg(env);\n    return make_object_info(env, argv[1], o_context,\n\t\t\t    (info_fn_t*) ECL_FUNC_PTR(clGetContextInfo),\n\t\t\t    context_info,\n\t\t\t    sizeof_array(context_info));\n}\n\nstatic ERL_NIF_TERM ecl_create_queue(ErlNifEnv* env, int argc, \n\t\t\t\t     const ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_context;\n    cl_device_id  device;\n    cl_command_queue_properties properties;\n    cl_command_queue queue;\n    cl_int err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))\n\treturn enif_make_badarg(env);\n    if (!get_object(env, argv[1], &device_r, false, (void**) &device))\n\treturn enif_make_badarg(env);\n    if (!get_bitfields(env, argv[2], &properties,\n\t\t       kv_command_queue_properties))\n\treturn enif_make_badarg(env);\n    queue = ECL_CALL(clCreateCommandQueue)(o_context->context, device, properties,\n\t\t\t\t &err);\n    if (queue) {\n\tERL_NIF_TERM t;\n\tt = ecl_make_object(env, &command_queue_r,(void*) queue, o_context);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    return ecl_make_error(env, err);\n}\n\nstatic ERL_NIF_TERM ecl_get_queue_info(ErlNifEnv* env, int argc, \n\t\t\t\t       const ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_queue;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))\n\treturn enif_make_badarg(env);\n    return make_object_info(env, argv[1], o_queue, \n\t\t\t    (info_fn_t*) ECL_FUNC_PTR(clGetCommandQueueInfo), \n\t\t\t    queue_info,\n\t\t\t    sizeof_array(queue_info));\n}\n\n\nstatic ERL_NIF_TERM ecl_create_buffer(ErlNifEnv* env, int argc, \n\t\t\t\t      const ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_context;\n    size_t size;\n    cl_mem_flags mem_flags;\n    cl_mem mem;\n    ErlNifBinary bin;\n    void* host_ptr = 0;\n    cl_int err;\n    UNUSED(argc);\n\n\n    if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))\n\treturn enif_make_badarg(env);\n    if (!get_bitfields(env, argv[1], &mem_flags, kv_mem_flags))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[2], &size))\n\treturn enif_make_badarg(env);\n    if (!enif_inspect_iolist_as_binary(env, argv[3], &bin))\n\treturn enif_make_badarg(env);\n    // How do we keep binary data (CL_MEM_USE_HOST_PTR) \n    // We should probably make sure that the buffer is read_only in this\n    // case!\n    // we must be able to reference count the binary object!\n    // USE enif_make_copy !!!! this copy is done to the thread environment!\n    if (bin.size > 0) {\n\thost_ptr = bin.data;\n\tmem_flags |= CL_MEM_COPY_HOST_PTR;\n\tif (size < bin.size)\n\t    size = bin.size;\n    }\n    else if (size)\n\tmem_flags |= CL_MEM_ALLOC_HOST_PTR;\n\n    mem = ECL_CALL(clCreateBuffer)(o_context->context, mem_flags, size,\n\t\t\t\t   host_ptr, &err);\n\n    if (!err) {\n\tERL_NIF_TERM t;\n\tt = ecl_make_object(env, &mem_r,(void*) mem, o_context);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    return ecl_make_error(env, err);\n}\n\n#if CL_VERSION_1_1 == 1\nstatic ERL_NIF_TERM ecl_create_sub_buffer(ErlNifEnv* env, int argc,\n\t\t\t\t\t  const ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_buf;\n    cl_mem_flags mem_flags;\n    cl_mem mem;\n    ERL_NIF_TERM info;\n    ERL_NIF_TERM info_arg1, info_arg2;\n    cl_buffer_region reg;\n    cl_int err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &mem_r, false, &o_buf))\n\treturn enif_make_badarg(env);\n    if (!get_bitfields(env, argv[1], &mem_flags, kv_mem_flags))\n\treturn enif_make_badarg(env);\n    if (!enif_is_atom(env, argv[2]) || (argv[2] != ATOM(region)))\n\treturn enif_make_badarg(env);\n    info = argv[3];\n    if (!enif_is_list(env, info))\n\treturn enif_make_badarg(env);\n    enif_get_list_cell(env, info, &info_arg1, &info);\n    if (!enif_is_list(env, info))\n\treturn enif_make_badarg(env);\n    enif_get_list_cell(env, info, &info_arg2, &info);\n    if (!enif_is_empty_list(env, info))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, info_arg1, &reg.origin))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, info_arg2, &reg.size))\n\treturn enif_make_badarg(env);\n\n    mem = ECL_CALL(clCreateSubBuffer)(o_buf->mem, mem_flags,\n\t\t\t\t      CL_BUFFER_CREATE_TYPE_REGION,\n\t\t\t\t      &reg, &err);\n    if (!err) {\n\tERL_NIF_TERM t;\n\tt = ecl_make_object(env, &mem_r,(void*) mem, o_buf);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    return ecl_make_error(env, err);\n}\n#endif\n//\n// format {channel_order, channel_data_type} (old) |\n// {'cl_image_format', order, data_type }\n//\nstatic int get_image_format(ErlNifEnv* env, ERL_NIF_TERM arg,\n\t\t\t\tcl_image_format* format)\n{\n    const ERL_NIF_TERM* rec;\n    int i, arity;\n\n    if (!enif_get_tuple(env, arg, &arity, &rec))\n\treturn 0;\n    if (arity == 2)\n\ti = 0;\n    else if (arity == 3) {\n\ti = 1;\n\tif (!enif_is_atom(env, rec[0]) || (rec[0] != ATOM(cl_image_format)))\n\t    return 0;\n    }\n    else\n\treturn 0;\n\n    if (!get_enum(env, rec[i], &format->image_channel_order,\n\t\t  kv_channel_order))\n\treturn 0;\n    if (!get_enum(env, rec[i+1], &format->image_channel_data_type,\n\t\t  kv_channel_type))\n\treturn 0;\n    return 1;\n}\n\n//\n// format {'cl_image_desc',image_type,image_width,image_height,image_depth,\n//             image_array_size,image_row_pitch,image_slice_pitch,\n//             num_mip_levels,num_samples,buffer}\n//\nstatic int get_image_desc(ErlNifEnv* env, ERL_NIF_TERM arg,\n\t\t\t  cl_image_desc* desc)\n{\n    const ERL_NIF_TERM* rec;\n    int arity;\n\n    if (!enif_get_tuple(env, arg, &arity, &rec) || (arity != 11))\n\treturn 0;\n\n    if (!enif_is_atom(env, rec[0]) || (rec[0] != ATOM(cl_image_desc)))\n\treturn 0;\n\n    if (!get_enum(env, rec[1], &desc->image_type, kv_mem_object_type))\n\treturn 0;\n    if (!ecl_get_sizet(env, rec[2], &desc->image_width))\n\treturn 0;\n    if (!ecl_get_sizet(env, rec[3], &desc->image_height))\n\treturn 0;\n    if (!ecl_get_sizet(env, rec[4], &desc->image_depth))\n\treturn 0;\n    if (!ecl_get_sizet(env, rec[5], &desc->image_array_size))\n\treturn 0;\n    if (!ecl_get_sizet(env, rec[6], &desc->image_row_pitch))\n\treturn 0;\n    if (!ecl_get_sizet(env, rec[7], &desc->image_slice_pitch))\n\treturn 0;\n    desc->num_mip_levels = 0;  // rec[8] according to spec\n    desc->num_samples = 0;     // rec[9] according to spec\n    if (!get_object(env, rec[10], &mem_r, true, (void**)&desc->buffer))\n\treturn 0;\n    return 1;\n}\n\n// 1.0, 1,1 wrapper where clCreateImage\ncl_mem CL_CALLBACK e_clCreateImage(cl_context context,\n\t\t       cl_mem_flags flags,\n\t\t       const cl_image_format image_format,\n\t\t       const cl_image_desc* image_desc, \n\t\t       void* host_ptr,\n\t\t       cl_int* errcode_ret)\n{\n    UNUSED(context);\n    UNUSED(flags);\n    UNUSED(image_format);\n    UNUSED(image_desc);\n    UNUSED(host_ptr);\n\n    *errcode_ret = CL_INVALID_OPERATION;\n    return NULL;\n}\n\n// 1.2 -> 1.0 wrapper: clCreateImage2D using clCreateImage\ncl_mem CL_CALLBACK eclCreateImage2D(cl_context context,\n\t\t\t cl_mem_flags mem_flags,\n\t\t\t const cl_image_format * format,\n\t\t\t size_t width,\n\t\t\t size_t height,\n\t\t\t size_t row_pitch, \n\t\t\t void * host_ptr,\n\t\t\t cl_int *err)\n{\n    cl_image_desc desc;\n\t\n    desc.image_type = CL_MEM_OBJECT_IMAGE2D;\n    desc.image_width = width;\n    desc.image_height = height;\n    desc.image_depth = 1;       // used with IMAGE3D\n    desc.image_array_size = 1;  // used with IMAGE2D/3D_ARRAY?\n    desc.image_row_pitch = row_pitch;\n    desc.image_slice_pitch = 0;  // maybe 0 for 2D image\n    desc.num_mip_levels = 0;  // must be 0\n    desc.num_samples= 0;      // must be 0\n    desc.buffer = NULL;       // used when CL_MEM_OBJECT_IMAGE1D_BUFFER\n\n    return ECL_CALL(clCreateImage)(context,\n\t\t\t\t   mem_flags,\n\t\t\t\t   format,\n\t\t\t\t   &desc,\n\t\t\t\t   host_ptr,\n\t\t\t\t   err);\n}\n\nstatic ERL_NIF_TERM ecl_create_image2d(ErlNifEnv* env, int argc,\n\t\t\t\t\tconst ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_context;\n    size_t width;\n    size_t height;\n    size_t row_pitch;\n    cl_image_format format;\n    cl_mem_flags mem_flags;\n    cl_mem mem;\n    ErlNifBinary bin;\n    void* host_ptr = 0;\n    cl_int err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))\n\treturn enif_make_badarg(env);\n    if (!get_bitfields(env, argv[1], &mem_flags, kv_mem_flags))\n\treturn enif_make_badarg(env);\n\n    if (!get_image_format(env, argv[2], &format))\n\treturn enif_make_badarg(env);\n\n    if (!ecl_get_sizet(env, argv[3], &width))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[4], &height))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[5], &row_pitch))\n\treturn enif_make_badarg(env);\n\n    if (!enif_inspect_iolist_as_binary(env, argv[6], &bin))\n\treturn enif_make_badarg(env);\n    // How do we keep binary data (CL_MEM_USE_HOST_PTR) (read_only)\n    // we must be able to reference count the binary object!\n    if (bin.size > 0) {\n\thost_ptr = bin.data;\n\tmem_flags |= CL_MEM_COPY_HOST_PTR;\n    }\n    else if (width && height)\n\tmem_flags |= CL_MEM_ALLOC_HOST_PTR;\n    mem = ECL_CALL(clCreateImage2D)(o_context->context, mem_flags, &format,\n\t\t\t\t    width, height, row_pitch,\n\t\t\t\t    host_ptr, &err);\n    if (!err) {\n\tERL_NIF_TERM t;\n\tt = ecl_make_object(env, &mem_r,(void*) mem, o_context);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    return ecl_make_error(env, err);\n}\n\n// 1.2 -> 1.1 wrapper: clCreateImage3D using clCreateImage\ncl_mem CL_CALLBACK eclCreateImage3D(cl_context context,\n\t\t\tcl_mem_flags mem_flags,\n\t\t\tconst cl_image_format* format,\n\t\t\tsize_t width, \n\t\t\tsize_t height,\n\t\t\tsize_t depth, \n\t\t\tsize_t row_pitch, \n\t\t\tsize_t slice_pitch, \n\t\t\tvoid * host_ptr,\n\t\t\tcl_int *err)\n{\n    cl_image_desc desc;\n\n    desc.image_type = CL_MEM_OBJECT_IMAGE3D;\n    desc.image_width = width;\n    desc.image_height = height;\n    desc.image_depth = depth;       // used with IMAGE3D\n    desc.image_array_size = 1;  // used with IMAGE2D/3D_ARRAY?\n    desc.image_row_pitch = row_pitch;\n    desc.image_slice_pitch = slice_pitch;  // maybe 0 for 2D image\n    desc.num_mip_levels = 0;  // must be 0\n    desc.num_samples= 0;      // must be 0\n    desc.buffer = NULL;       // used when CL_MEM_OBJECT_IMAGE1D_BUFFER\n\n    return ECL_CALL(clCreateImage)(context,\n\t\t\t\t   mem_flags,\n\t\t\t\t   format,\n\t\t\t\t   &desc,\n\t\t\t\t   host_ptr,\n\t\t\t\t   err);\n}\n\n\nstatic ERL_NIF_TERM ecl_create_image3d(ErlNifEnv* env, int argc, \n\t\t\t\t       const ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_context;\n    size_t width;\n    size_t height;\n    size_t depth;\n    size_t row_pitch;\n    size_t slice_pitch;\n    cl_image_format format;\n    cl_mem_flags mem_flags;\n    cl_mem mem;\n    ErlNifBinary bin;\n    void* host_ptr = 0;\n    cl_int err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))\n\treturn enif_make_badarg(env);\n    if (!get_bitfields(env, argv[1], &mem_flags, kv_mem_flags))\n\treturn enif_make_badarg(env);\n\n    if (!get_image_format(env, argv[2], &format))\n\treturn enif_make_badarg(env);\n\n    if (!ecl_get_sizet(env, argv[3], &width))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[4], &height))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[5], &depth))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[6], &row_pitch))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[7], &slice_pitch))\n\treturn enif_make_badarg(env);\n\n    if (!enif_inspect_iolist_as_binary(env, argv[8], &bin))\n\treturn enif_make_badarg(env);\n    // How do we keep binary data (CL_MEM_USE_HOST_PTR)  (read_only)\n    // we must be able to reference count the binary object!\n    if (bin.size > 0) {\n\thost_ptr = bin.data;\n\tmem_flags |= CL_MEM_COPY_HOST_PTR;\n    }\n    else if (width && height && depth)\n\tmem_flags |= CL_MEM_ALLOC_HOST_PTR;\n    mem = ECL_CALL(clCreateImage3D)(o_context->context, mem_flags, &format,\n\t\t\t\t    width, height, depth, row_pitch, \n\t\t\t\t    slice_pitch,\n\t\t\t\t    host_ptr, &err);\n    if (mem) {\n\tERL_NIF_TERM t;\n\tt = ecl_make_object(env, &mem_r,(void*) mem, o_context);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    return ecl_make_error(env, err);\n}\n\n//\n// cl:create_image(Context, MemFlags, ImageFormat, ImageDesc, Data) ->\n//\n//\n#if CL_VERSION_1_2 == 1\nstatic ERL_NIF_TERM ecl_create_image(ErlNifEnv* env, int argc,\n\t\t\t\t     const ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_context;\n    cl_image_format format;\n    cl_image_desc   desc;\n    cl_mem_flags mem_flags;\n    cl_mem mem;\n    ErlNifBinary bin;\n    void* host_ptr = 0;\n    cl_int err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))\n\treturn enif_make_badarg(env);\n    if (!get_bitfields(env, argv[1], &mem_flags, kv_mem_flags))\n\treturn enif_make_badarg(env);\n\n    if (!get_image_format(env, argv[2], &format))\n\treturn enif_make_badarg(env);\n\n    if (!get_image_desc(env, argv[3], &desc))\n\treturn enif_make_badarg(env);\n\n    if (!enif_inspect_iolist_as_binary(env, argv[4], &bin))\n\treturn enif_make_badarg(env);\n\n    if (bin.size > 0) {\n\thost_ptr = bin.data;\n\tmem_flags |= CL_MEM_COPY_HOST_PTR;\n    }\n    else if (desc.image_width && desc.image_height && desc.image_depth)\n\tmem_flags |= CL_MEM_ALLOC_HOST_PTR;\n\n    mem = ECL_CALL(clCreateImage)(o_context->context,\n\t\t\t\t  mem_flags,\n\t\t\t\t  &format,\n\t\t\t\t  &desc,\n\t\t\t\t  host_ptr,\n\t\t\t\t  &err);\n    if (mem) {\n\tERL_NIF_TERM t;\n\tt = ecl_make_object(env, &mem_r,(void*) mem, o_context);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    return ecl_make_error(env, err);\n}\n#endif\n\nstatic ERL_NIF_TERM ecl_get_supported_image_formats(ErlNifEnv* env, int argc, \n\t\t\t\t\t\t    const ERL_NIF_TERM argv[])\n{\n    cl_context context;\n    cl_mem_flags flags;\n    cl_mem_object_type image_type;\n    cl_image_format image_format[MAX_IMAGE_FORMATS];\n    cl_uint num_image_formats;\n    cl_int err;\n    UNUSED(argc);\n\n    if (!get_object(env, argv[0], &context_r, false, (void**) &context))\n\treturn enif_make_badarg(env);\n    if (!get_bitfields(env, argv[1], &flags, kv_mem_flags))\n\treturn enif_make_badarg(env);\t\n    if (!get_enum(env, argv[2], &image_type, kv_mem_object_type))\n\treturn enif_make_badarg(env);\t\n    err = ECL_CALL(clGetSupportedImageFormats)(context, flags, image_type,\n\t\t\t\t\t       MAX_IMAGE_FORMATS,\n\t\t\t\t\t       image_format,\n\t\t\t\t\t       &num_image_formats);\n    if (!err) {\n\tint i = (int) num_image_formats;\n\tERL_NIF_TERM list = enif_make_list(env, 0);\n\n\twhile(i) {\n\t    ERL_NIF_TERM channel_order, channel_type;\n\t    ERL_NIF_TERM elem;\n\t    i--;\n\t    channel_order = make_enum(env,\n\t\t\t\t      image_format[i].image_channel_order, \n\t\t\t\t      kv_channel_order);\n\t    channel_type = make_enum(env,\n\t\t\t\t     image_format[i].image_channel_data_type,\n\t\t\t\t     kv_channel_type);\n\t    elem = enif_make_tuple2(env, channel_order, channel_type);\n\t    list = enif_make_list_cell(env, elem, list);\n\t}\n\treturn enif_make_tuple2(env, ATOM(ok), list);\n    }\n    return ecl_make_error(env, err);\n}\n\n\nstatic ERL_NIF_TERM ecl_get_mem_object_info(ErlNifEnv* env, int argc, \n\t\t\t\t\t    const ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_mem;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &mem_r, false, &o_mem))\n\treturn enif_make_badarg(env);\n    return make_object_info(env, argv[1], o_mem, \n\t\t\t    (info_fn_t*) ECL_FUNC_PTR(clGetMemObjectInfo),\n\t\t\t    mem_info,\n\t\t\t    sizeof_array(mem_info));\n}\n\nstatic ERL_NIF_TERM ecl_get_image_info(ErlNifEnv* env, int argc, \n\t\t\t\t       const ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_mem;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &mem_r, false, &o_mem))\n\treturn enif_make_badarg(env);\n    return make_object_info(env, argv[1], o_mem,\n\t\t\t    (info_fn_t*) ECL_FUNC_PTR(clGetImageInfo),\n\t\t\t    image_info, \n\t\t\t    sizeof_array(image_info));\n}\n\n//\n// cl:create_sampler(Context::cl_context(),Normalized::boolean(),\n//\t\t     AddressingMode::cl_addressing_mode(),\n//\t\t     FilterMode::cl_filter_mode()) -> \n//    {'ok', cl_sampler()} | {'error', cl_error()}.\n//\n\nstatic ERL_NIF_TERM ecl_create_sampler(ErlNifEnv* env, int argc,\n\t\t\t\t       const ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_context;\n    cl_bool normalized_coords;\n    cl_addressing_mode addressing_mode;\n    cl_filter_mode filter_mode;\n    cl_sampler sampler;\n    cl_int err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))\n\treturn enif_make_badarg(env);\n    if (!get_bool(env, argv[1], &normalized_coords))\n\treturn enif_make_badarg(env);\n    if (!get_enum(env, argv[2], &addressing_mode, kv_addressing_mode))\n\treturn enif_make_badarg(env);\n    if (!get_enum(env, argv[3], &filter_mode, kv_filter_mode))\n\treturn enif_make_badarg(env);\n\n    sampler = ECL_CALL(clCreateSampler)(o_context->context,\n\t\t\t\t\tnormalized_coords, addressing_mode, filter_mode,\n\t\t\t\t\t&err);\n    if (!err) {\n\tERL_NIF_TERM t;\n\tt = ecl_make_object(env, &sampler_r,(void*) sampler, o_context);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    return ecl_make_error(env, err);\n}\n\n\nstatic ERL_NIF_TERM ecl_get_sampler_info(ErlNifEnv* env, int argc, \n\t\t\t\t\t const ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_sampler;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &sampler_r, false, &o_sampler))\n\treturn enif_make_badarg(env);\n    return make_object_info(env, argv[1], o_sampler,\n\t\t\t    (info_fn_t*) ECL_FUNC_PTR(clGetSamplerInfo),\n\t\t\t    sampler_info,\n\t\t\t    sizeof_array(sampler_info));\n}\n\n//\n// cl:create_program_with_source(Context::cl_context(), Source::iodata()) ->\n//   {'ok', cl_program()} | {'error', cl_error()}\n//\nstatic ERL_NIF_TERM ecl_create_program_with_source(ErlNifEnv* env, int argc, \n\t\t\t\t\t\t   const ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_context;\n    cl_program program;\n    ErlNifBinary source;\n    char* strings[1];\n    size_t lengths[1];\n    cl_int err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))\n\treturn enif_make_badarg(env);\n    if (!enif_inspect_iolist_as_binary(env, argv[1], &source))\n\treturn enif_make_badarg(env);\n    strings[0] = (char*) source.data;\n    lengths[0] = source.size;\n    program = ECL_CALL(clCreateProgramWithSource)(o_context->context,\n\t\t\t\t\t\t  1,\n\t\t\t\t\t\t  (const char**) strings,\n\t\t\t\t\t\t  lengths,\n\t\t\t\t\t\t  &err);\n    if (!err) {\n\tERL_NIF_TERM t;\n\tt = ecl_make_object(env, &program_r,(void*) program, o_context);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    return ecl_make_error(env, err);\n}\n\n//\n//  cl:create_program_with_binary(Context::cl_context(),\n//                                  DeviceList::[cl_device_id()],\n//                                  BinaryList::[binary()]) ->\n//    {'ok', cl_program()} | {'error', cl_error()}\n//\nstatic ERL_NIF_TERM ecl_create_program_with_binary(ErlNifEnv* env, int argc, \n\t\t\t\t\t\t   const ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_context;\n    cl_program     program;\n    cl_device_id   device_list[MAX_DEVICES];\n    cl_uint        num_devices = MAX_DEVICES;\n    ErlNifBinary   binary_list[MAX_DEVICES];\n    size_t         num_binaries = MAX_DEVICES;\n    size_t         lengths[MAX_DEVICES];\n    unsigned char* data[MAX_DEVICES];\n    cl_uint        i;\n    cl_int         status[MAX_DEVICES];\n    cl_int         err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))\n\treturn enif_make_badarg(env);\n    if (!get_object_list(env, argv[1], &device_r, false,\n\t\t\t (void**) device_list, &num_devices))\n\treturn enif_make_badarg(env);\n    if (!get_binary_list(env, argv[2], binary_list, &num_binaries))\n\treturn enif_make_badarg(env);\n    if (num_binaries != num_devices)\n\treturn enif_make_badarg(env);\n\t\n    for (i = 0; i < num_devices; i++) {\n\tlengths[i] = binary_list[i].size;\n\tdata[i]    = binary_list[i].data;\n    }\n    program = ECL_CALL(clCreateProgramWithBinary)(o_context->context,\n\t\t\t\t\t\t  num_devices,\n\t\t\t\t\t\t  (const cl_device_id*) device_list,\n\t\t\t\t\t\t  (const size_t*) lengths,\n\t\t\t\t\t\t  (const unsigned char**) data,\n\t\t\t\t\t\t  status,\n\t\t\t\t\t\t  &err);\n    if (!err) {\n\tERL_NIF_TERM t;\n\tt = ecl_make_object(env, &program_r,(void*) program, o_context);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    // FIXME: handle the value in the status array\n    // In cases of error we can then detect which binary was corrupt...\n    return ecl_make_error(env, err);\n}\n\n\n//\n//  cl:create_program_with_builtin_kernels(Context::cl_context(),\n//                                  DeviceList::[cl_device_id()],\n//                                  KernelNames::string()) ->\n//    {'ok', cl_program()} | {'error', cl_error()}\n//\n#if CL_VERSION_1_2 == 1\n\nstatic ERL_NIF_TERM ecl_create_program_with_builtin_kernels(\n    ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_context;\n    cl_program     program;\n    cl_device_id   device_list[MAX_DEVICES];\n    cl_uint        num_devices = MAX_DEVICES;\n    char kernel_names[MAX_KERNEL_NAME];\n    cl_int         err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))\n\treturn enif_make_badarg(env);\n    if (!get_object_list(env, argv[1], &device_r, false,\n\t\t\t (void**) device_list, &num_devices))\n\treturn enif_make_badarg(env);\n    if (!enif_get_string(env, argv[2], kernel_names, sizeof(kernel_names),\n\t\t\t ERL_NIF_LATIN1))\n\treturn enif_make_badarg(env);\n\n    program = ECL_CALL(clCreateProgramWithBuiltInKernels)(\n\to_context->context,\n\tnum_devices,\n\t(const cl_device_id*) device_list,\n\tkernel_names,\n\t&err);\n    if (!err) {\n\tERL_NIF_TERM t;\n\tt = ecl_make_object(env, &program_r,(void*) program, o_context);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    return ecl_make_error(env, err);\n}\n#endif\n\n#if CL_VERSION_2_1 == 1\n//\n// cl:create_program_with_il(Context::cl_context(), IL::iodata()) ->\n//   {'ok', cl_program()} | {'error', cl_error()}\n//\nstatic ERL_NIF_TERM ecl_create_program_with_il(ErlNifEnv* env, int argc, \n\t\t\t\t\t       const ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_context;\n    cl_program program;\n    ErlNifBinary il;\n    cl_int err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))\n\treturn enif_make_badarg(env);\n    if (!enif_inspect_iolist_as_binary(env, argv[1], &il))\n\treturn enif_make_badarg(env);\n    program = ECL_CALL(clCreateProgramWithIL)(o_context->context,\n\t\t\t\t\t      1,\n\t\t\t\t\t      (const void*) il.data,\n\t\t\t\t\t      il.size,\n\t\t\t\t\t      &err);\n    if (!err) {\n\tERL_NIF_TERM t;\n\tt = ecl_make_object(env, &program_r,(void*) program, o_context);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    return ecl_make_error(env, err);\n}\n#endif\n\n//\n// @spec async_build_program(Program::cl_program(),\n//                     DeviceList::[cl_device_id()],\n//                     Options::string()) ->\n//  {'ok',Ref} | {'error', cl_error()}\n//\n//\n// Notification functio for clBuildProgram\n// Passed to main thread by sending a async response\n// FIXME: lock needed?\n//\ntypedef struct {\n    ErlNifPid        sender;  // sender pid\n    ErlNifEnv*        s_env;  // senders message environment (ref, bin's etc)\n    ErlNifEnv*        r_env;  // receiver message environment (ref, bin's etc)\n    ErlNifTid           tid;  // Calling thread\n    ERL_NIF_TERM        ref;  // ref (in env!)\n    ecl_object_t*  program;\n} ecl_build_data_t;\n\nvoid CL_CALLBACK ecl_build_notify(cl_program program, void* user_data)\n{\n    ecl_build_data_t* bp = user_data;\n    ERL_NIF_TERM reply;\n    ErlNifEnv*        s_env;\n    int res;\n    UNUSED(program);\n    UNUSED(res);\n\n    DBG(\"ecl_build_notify: done program=%p, user_data=%p\",\n\tprogram, user_data);\n\n    // FIXME: check all devices for build_status!\n    // clGetProgramBuildInfo(bp->program->program, CL_PROGRAM_BUILD_STATUS,\n\n    // reply = !err ? ATOM(ok) : ecl_make_error(bp->env, err);\n\n    if(enif_equal_tids(bp->tid, enif_thread_self()))\n       s_env = bp->s_env;\n    else\n       s_env = 0;\n\n    reply = ATOM(ok);\n    res = enif_send(s_env, &bp->sender, bp->r_env, \n\t\t    enif_make_tuple3(bp->r_env,\n\t\t\t\t     ATOM(cl_async),\n\t\t\t\t     bp->ref,\n\t\t\t\t     reply));\n    DBG(\"ecl_build_notify: send r=%d\", res);\n    enif_free_env(bp->r_env);\n    if (bp->program)\n\tenif_release_resource(bp->program);\n    enif_free(bp);\n}\n\n\nstatic ERL_NIF_TERM ecl_async_build_program(ErlNifEnv* env, int argc, \n\t\t\t\t\t    const ERL_NIF_TERM argv[])\n{\n    ecl_object_t*    o_program;\n    cl_device_id     device_list[MAX_DEVICES];\n    cl_uint          num_devices = MAX_DEVICES;\n    char             options[MAX_OPTION_LIST];\n    ERL_NIF_TERM     ref;\n    ecl_build_data_t* bp;\n    cl_int           err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &program_r, false, &o_program))\n\treturn enif_make_badarg(env);\n    if (!get_object_list(env, argv[1], &device_r, false,\n\t\t\t (void**) device_list, &num_devices))\n\treturn enif_make_badarg(env);\n    if (!enif_get_string(env, argv[2], options, sizeof(options),ERL_NIF_LATIN1))\n\treturn enif_make_badarg(env);\n    if (!(bp = enif_alloc(sizeof(ecl_build_data_t))))\n\treturn ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?\n\n    if (!(bp->r_env = enif_alloc_env())) {\n\tenif_free(bp);\n\treturn ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?\n    }\n    ref = enif_make_ref(env);\n    (void) enif_self(env, &bp->sender);\n    bp->ref    = enif_make_copy(bp->r_env, ref);\n    bp->program = o_program;\n    bp->s_env = env;\n    bp->tid = enif_thread_self();\n    enif_keep_resource(o_program);    // keep while operation is running\n\n    err = ECL_CALL(clBuildProgram)(o_program->program,\n\t\t\t\t   num_devices,\n\t\t\t\t   device_list,\n\t\t\t\t   (const char*) options,\n\t\t\t\t   ecl_build_notify,\n\t\t\t\t   bp);\n    DBG(\"ecl_async_build_program: err=%d user_data=%p\", err, bp);\n\n    if ((err==CL_SUCCESS) ||\n\t// This should not be returned, it is not according to spec!!!!\n\t(err==CL_BUILD_PROGRAM_FAILURE))\n\treturn enif_make_tuple2(env, ATOM(ok), ref);\n    else { \n        enif_free_env(bp->r_env);\n\tenif_release_resource(bp->program);\n\tenif_free(bp);\n\treturn ecl_make_error(env, err);\n    }\n}\n\n#if CL_VERSION_1_2 == 1\nstatic ERL_NIF_TERM ecl_unload_platform_compiler(ErlNifEnv* env, int argc, \n\t\t\t\t\t\t const ERL_NIF_TERM argv[])\n{\n    cl_int err;\n    cl_platform_id   platform;\n    ecl_env_t* ecl = enif_priv_data(env);\n    UNUSED(argc);\n\n    if(ecl->icd_version < 12)\n\treturn ecl_make_error(env, CL_INVALID_OPERATION);\n    if (!get_object(env, argv[0], &platform_r, true,(void**)&platform))\n\treturn enif_make_badarg(env);\n    err = ECL_CALL(clUnloadPlatformCompiler)(platform);\n    // err = eclUnloadPlatformCompiler(platform);\n    if (err)\n\treturn ecl_make_error(env, err);\n    return ATOM(ok);    \n}\n#endif\n\n#if CL_VERSION_1_2 == 1\n// -spec compile_program(Program::cl_program(),\n//\t\t      DeviceList::[cl_device_id()],\n//\t\t      Options::string(),\n//\t\t      Headers::[cl_program()],\n//\t\t      Names::[string()]) ->\n//    'ok' | {'error', cl_error()}.\n\n#define MAX_HEADERS 128\n\nstatic ERL_NIF_TERM ecl_async_compile_program(ErlNifEnv* env, int argc,\n\t\t\t\t\t      const ERL_NIF_TERM argv[])\n{\n    ecl_object_t*    o_program;\n    cl_device_id     device_list[MAX_DEVICES];\n    cl_uint          num_devices = MAX_DEVICES;\n    char             options[MAX_OPTION_LIST];\n    cl_uint          num_input_headers = MAX_HEADERS;\n    cl_program       input_headers[MAX_HEADERS];\n    size_t           num_header_include_names = MAX_HEADERS;\n    char*            header_include_names[MAX_HEADERS];\n    ERL_NIF_TERM     ref;\n    ecl_build_data_t* bp = NULL;\n    cl_int           err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &program_r, false, &o_program))\n\treturn enif_make_badarg(env);\n    if (!get_object_list(env, argv[1], &device_r, false,\n\t\t\t (void**) device_list, &num_devices))\n\treturn enif_make_badarg(env);\n    if (!enif_get_string(env, argv[2], options, sizeof(options),ERL_NIF_LATIN1))\n\treturn enif_make_badarg(env);\n    if (!get_object_list(env, argv[3], &program_r, false,\n\t\t\t (void**) input_headers, &num_input_headers))\n\treturn enif_make_badarg(env);\n    num_header_include_names = num_input_headers;\n    if (!get_string_list(env, argv[4], header_include_names,\n\t\t\t &num_header_include_names))\n\treturn enif_make_badarg(env);\n\n    if (!(bp = enif_alloc(sizeof(ecl_build_data_t)))) {\n\terr =  CL_OUT_OF_RESOURCES;\n\tgoto error;\n    }\n    if (!(bp->r_env = enif_alloc_env())) {\n\terr =  CL_OUT_OF_RESOURCES;\n\tgoto error;\n    }\n\n    ref = enif_make_ref(env);\n    (void) enif_self(env, &bp->sender);\n    bp->ref    = enif_make_copy(bp->r_env, ref);\n    bp->program = o_program;\n    bp->s_env = env;\n    bp->tid = enif_thread_self();\n    enif_keep_resource(o_program);    // keep while operation is running\n\n    DBG(\"ecl_async_compile_program: program: %p, num_input_headers: %d, bp=%p\",\n\to_program->program, num_input_headers, bp);\n\n    err = ECL_CALL(clCompileProgram)(o_program->program,\n\t\t\t\t     num_devices,\n\t\t\t\t     device_list,\n\t\t\t\t     (const char*) options,\n\t\t\t\t     num_input_headers,\n\t\t\t\t     num_input_headers ? input_headers : NULL,\n\t\t\t\t     num_input_headers ?\n\t\t\t\t     (const char**)header_include_names : NULL,\n\t\t\t\t     ecl_build_notify,\n\t\t\t\t     bp);\n    DBG(\"ecl_async_compile_program: err=%d user_data=%p\", err, bp);\n\n    if ((err==CL_SUCCESS) || (err==CL_BUILD_PROGRAM_FAILURE)) {\n\t// check if we need to save this until complete!\n\tfree_string_list(header_include_names, num_header_include_names);\n\treturn enif_make_tuple2(env, ATOM(ok), ref);\n    }\n\nerror:\n    free_string_list(header_include_names, num_header_include_names);\n    if (bp) {\n\tif (bp->program) enif_release_resource(bp->program);\n\tif (bp->r_env) enif_free_env(bp->r_env);\n\tenif_free(bp);\n    }\n    return ecl_make_error(env, err);\n}\n#endif\n\n#if CL_VERSION_1_2 == 1\n// -spec link_program(Context::cl_context(),\n//\t\t   DeviceList::[cl_device_id()],\n//\t\t   Options::string(),\n//\t\t   Programs::[cl_program()]) ->\n//    {'ok',cl_program()} | {'error', cl_error()}.\n\n#define MAX_INPUT_PROGRAMS 128\n\nstatic ERL_NIF_TERM ecl_async_link_program(ErlNifEnv* env, int argc,\n\t\t\t\t\t   const ERL_NIF_TERM argv[])\n{\n    ecl_object_t*    o_context;\n    cl_program       program;\n    cl_device_id     device_list[MAX_DEVICES];\n    cl_uint          num_devices = MAX_DEVICES;\n    char             options[MAX_OPTION_LIST];\n    cl_uint          num_input_programs = MAX_INPUT_PROGRAMS;\n    cl_program       input_programs[MAX_INPUT_PROGRAMS];\n    ERL_NIF_TERM     ref;\n    ERL_NIF_TERM     prog;\n    ecl_build_data_t* bp;\n    cl_int           err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))\n\treturn enif_make_badarg(env);\n    if (!get_object_list(env, argv[1], &device_r, false,\n\t\t\t (void**) device_list, &num_devices))\n\treturn enif_make_badarg(env);\n    if (!enif_get_string(env, argv[2], options, sizeof(options),ERL_NIF_LATIN1))\n\treturn enif_make_badarg(env);\n    if (!get_object_list(env, argv[3], &program_r, false,\n\t\t\t (void**) input_programs, &num_input_programs))\n\treturn enif_make_badarg(env);\n\n    if (!(bp = enif_alloc(sizeof(ecl_build_data_t))))\n\treturn ecl_make_error(env, CL_OUT_OF_RESOURCES);\n    if (!(bp->r_env = enif_alloc_env())) {\n\tenif_free(bp);\n\treturn ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?\n    }\n\n    ref = enif_make_ref(env);\n    (void) enif_self(env, &bp->sender);\n    bp->ref    = enif_make_copy(bp->r_env, ref);\n    bp->program = NULL;\n    bp->s_env = env;\n    bp->tid = enif_thread_self();\n\n    DBG(\"ecl_async_link_program: context: %p, num_input_programs %d, bp=%p\",\n\to_context->context, num_input_programs, bp);\n\n    // lock callback inorder avoid race?\n    program = ECL_CALL(clLinkProgram)(o_context->context,\n\t\t\t\t      num_devices,\n\t\t\t\t      num_devices ? device_list : NULL,\n\t\t\t\t      (const char*) options,\n\t\t\t\t      num_input_programs,\n\t\t\t\t      input_programs,\n\t\t\t    ecl_build_notify,\n\t\t\t\t      bp,\n\t\t\t\t      &err);\n    DBG(\"ecl_async_link_program: err=%d program %p, user_data=%p\",\n\terr, program, bp);\n\n    if (program == NULL) {\n\tenif_free_env(bp->r_env);\n\tenif_free(bp);\n\treturn ecl_make_error(env, err);\n    }\n    prog = ecl_make_object(env, &program_r,(void*) program, o_context);\n    return enif_make_tuple2(env, ATOM(ok),\n\t\t\t    enif_make_tuple2(env, ref, prog));\n}\n\n#endif\n\nstatic ERL_NIF_TERM ecl_unload_compiler(ErlNifEnv* env, int argc, \n\t\t\t\t\tconst ERL_NIF_TERM argv[])\n{\n    cl_int err;\n    ecl_env_t* ecl = enif_priv_data(env);\n\n    UNUSED(argc);\n    UNUSED(argv);\n\n    if (ecl->icd_version >= 12) {\n\tecl_env_t* ecl = enif_priv_data(env);\n\tcl_platform_id platform;\n\tif (ecl->nplatforms <= 0)\n\t    return ecl_make_error(env, CL_INVALID_VALUE);\n\tplatform = (cl_platform_id) ecl->platform[0].o_platform->opaque;\n\terr = ECL_CALL(clUnloadPlatformCompiler)(platform);\n\t// err = eclUnloadPlatformCompiler(platform);\n    } else {\n\terr = ECL_CALL(clUnloadCompiler)();\n    }\n    if (err)\n\treturn ecl_make_error(env, err);\n    return ATOM(ok);\n}\n\n// Special (workaround) for checking if program may have binaries\nstatic int program_may_have_binaries(cl_program program)\n{\n    cl_int num_devices;\n    size_t returned_size;\n    cl_device_id devices[MAX_DEVICES];\n    int i;\n\n    if (ECL_CALL(clGetProgramInfo)\n\t(program,\n\t CL_PROGRAM_NUM_DEVICES,\n\t sizeof(num_devices),\n\t &num_devices,\n\t &returned_size) != CL_SUCCESS)\n\treturn 0;\n\n    if (ECL_CALL(clGetProgramInfo)(program, CL_PROGRAM_DEVICES,\n\t\t\t\t   num_devices*sizeof(cl_device_id),\n\t\t\t\t   devices, NULL) != CL_SUCCESS)\n\treturn 0;\n\n    for (i = 0; i < num_devices; i++) {\n\tcl_build_status build_status = CL_BUILD_NONE;\n        if (ECL_CALL(clGetProgramBuildInfo)\n\t    (program, devices[i], CL_PROGRAM_BUILD_STATUS,\n\t     sizeof(build_status), \n\t     &build_status, NULL) != CL_SUCCESS)\n\t    return 0;\n\tif (build_status != CL_BUILD_SUCCESS) return 0;\n    }\n    return 1;\n}\n\n// Special util to extract program binary_sizes\nstatic ERL_NIF_TERM make_program_binary_sizes(ErlNifEnv* env,\n\t\t\t\t\t      cl_program program)\n{\n    cl_int err;\n    ERL_NIF_TERM list;\n    size_t returned_size;\n    cl_uint num_devices;\n    size_t size[MAX_DEVICES];\n    int i;\n\n    memset(size, 0,     sizeof(size));\n\n    if ((err = ECL_CALL(clGetProgramInfo)\n\t (program,\n\t  CL_PROGRAM_NUM_DEVICES,\n\t  sizeof(num_devices),\n\t  &num_devices,\n\t  &returned_size)))\n\treturn ecl_make_error(env, err);\n\n    if (program_may_have_binaries(program)) {\n\tif ((err = ECL_CALL(clGetProgramInfo)\n\t     (program,\n\t      CL_PROGRAM_BINARY_SIZES,\n\t      num_devices*sizeof(size_t),\n\t      &size[0],\n\t      &returned_size)))\n\t    return ecl_make_error(env, err);\n    }\n    list = enif_make_list(env, 0);\n    for (i = num_devices-1; i >= 0; i--) {\n\tERL_NIF_TERM elem = ecl_make_sizet(env, size[i]);\n\tlist = enif_make_list_cell(env, elem, list);\n    }\n    return enif_make_tuple2(env, ATOM(ok), list);\n}\n\n\n// Special util to extract program binaries\nstatic ERL_NIF_TERM make_program_binaries(ErlNifEnv* env, cl_program program)\n{\n    cl_int err;\n    ERL_NIF_TERM list;\n    size_t returned_size;\n    cl_uint num_devices;\n    int i;\n\n    if ((err = ECL_CALL(clGetProgramInfo)\n\t (program,\n\t  CL_PROGRAM_NUM_DEVICES,\n\t  sizeof(num_devices),\n\t  &num_devices,\n\t  &returned_size)))\n\treturn ecl_make_error(env, err);\n\n    if (!program_may_have_binaries(program)) {\n\tErlNifBinary empty;\n\tenif_alloc_binary(0, &empty);\n\n\tlist = enif_make_list(env, 0);\n\tfor (i = num_devices-1; i >= 0; i--) {\n\t    ERL_NIF_TERM elem;\n\t    elem = enif_make_binary(env, &empty);\n\t    list = enif_make_list_cell(env, elem, list);\n\t}\n\tenif_release_binary(&empty);\n\treturn enif_make_tuple2(env, ATOM(ok), list);\n    }\n    else {\n\tsize_t size[MAX_DEVICES];\n\tErlNifBinary binary[MAX_DEVICES];\n\tunsigned char* data[MAX_DEVICES];\n\n\tmemset(size, 0,     sizeof(size));\n\tmemset(binary, 0,   sizeof(binary));\n\n\tif ((err = ECL_CALL(clGetProgramInfo)\n\t     (program,\n\t      CL_PROGRAM_BINARY_SIZES,\n\t      num_devices*sizeof(size_t),\n\t      &size[0],\n\t      &returned_size)))\n\t    return ecl_make_error(env, err);\n\ti = 0;\n\twhile (i < (int) num_devices) {\n\t    if (!enif_alloc_binary(size[i], &binary[i])) {\n\t\terr = CL_OUT_OF_HOST_MEMORY;\n\t\tgoto cleanup;\n\t    }\n\t    data[i] = binary[i].data;\n\t    i++;\n\t}\n\tif ((err = ECL_CALL(clGetProgramInfo)\n\t     (program,\n\t      CL_PROGRAM_BINARIES,\n\t      sizeof(unsigned char*)*num_devices,\n\t      data,\n\t      &returned_size)))\n\t    goto cleanup;\n\n\tlist = enif_make_list(env, 0);\n\tfor (i = num_devices-1; i >= 0; i--) {\n\t    ERL_NIF_TERM elem = enif_make_binary(env, &binary[i]);\n\t    list = enif_make_list_cell(env, elem, list);\n\t}\n\treturn enif_make_tuple2(env, ATOM(ok), list);\n\n    cleanup:\n\twhile(i > 0) {\n\t    i--;\n\t    enif_release_binary(&binary[i]);\n\t}\n\treturn ecl_make_error(env, err);\n    }\n}\n\nstatic ERL_NIF_TERM ecl_get_program_info(ErlNifEnv* env, int argc, \n\t\t\t\t\t const ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_program;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &program_r, false, &o_program))\n\treturn enif_make_badarg(env);\n\n    if (argv[1] == ATOM(binaries))\n\treturn make_program_binaries(env, o_program->program);\n    else if (argv[1] == ATOM(binary_sizes))\n\treturn make_program_binary_sizes(env, o_program->program);\n    else\n\treturn make_object_info(env, argv[1], o_program,\n\t\t\t\t(info_fn_t*) ECL_FUNC_PTR(clGetProgramInfo),\n\t\t\t\tprogram_info,\n\t\t\t\tsizeof_array(program_info));\n}\n\nstatic ERL_NIF_TERM ecl_get_program_build_info(ErlNifEnv* env, int argc, \n\t\t\t\t\t       const ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_program;\n    ecl_object_t* o_device;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &program_r, false, &o_program))\n\treturn enif_make_badarg(env);\n    if (!get_ecl_object(env, argv[1], &device_r, false, &o_device))\n\treturn enif_make_badarg(env);\n    return make_object_info2(env, argv[2], o_program, o_device->opaque,\n\t\t\t     (info2_fn_t*) ECL_FUNC_PTR(clGetProgramBuildInfo),\n\t\t\t     build_info,\n\t\t\t     sizeof_array(build_info));\n}\n\n\nstatic ERL_NIF_TERM ecl_create_kernel(ErlNifEnv* env, int argc, \n\t\t\t\t      const ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_program;\n    cl_kernel kernel;\n    char kernel_name[MAX_KERNEL_NAME];\n    cl_int err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &program_r, false, &o_program))\n\treturn enif_make_badarg(env);\n    if (!enif_get_string(env, argv[1], kernel_name, sizeof(kernel_name),\n\t\t\t ERL_NIF_LATIN1))\n\treturn enif_make_badarg(env);\n\n    kernel = ECL_CALL(clCreateKernel)(o_program->program,kernel_name, &err);\n    if (!err) {\n\tERL_NIF_TERM t;\n\tt = ecl_make_kernel(env, kernel, o_program);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    return ecl_make_error(env, err);\n}\n\n\n//\n// @spec create_kernels_in_program(Program::cl_program()) ->\n//    {'ok', [cl_kernel()]} | {'error', cl_error()}\n//\nstatic ERL_NIF_TERM ecl_create_kernels_in_program(ErlNifEnv* env, int argc, \n\t\t\t\t\t\t  const ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_program;\n    ERL_NIF_TERM kernv[MAX_KERNELS];\n    ERL_NIF_TERM kernel_list;\n    cl_kernel kernel[MAX_KERNELS];\n    cl_uint num_kernels_ret;\n    cl_uint i;\n    cl_int err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &program_r, false, &o_program))\n\treturn enif_make_badarg(env);\n\n    err = ECL_CALL(clCreateKernelsInProgram)\n\t(o_program->program,\n\t MAX_KERNELS,\n\t kernel,\n\t &num_kernels_ret);\n    if (err)\n\treturn ecl_make_error(env, err);\n    for (i = 0; i < num_kernels_ret; i++) {\n\t// FIXME: handle out of memory\n\tkernv[i] = ecl_make_kernel(env, kernel[i], o_program);\n    }\n    kernel_list = enif_make_list_from_array(env, kernv, num_kernels_ret);\n    return enif_make_tuple2(env, ATOM(ok), kernel_list);\n}\n\n\n//\n// cl:set_kernel_arg(Kernel::cl_kernel(), Index::non_neg_integer(),\n//                   Argument::cl_kernel_arg()) -> \n// {Type,Value}\n// {'size',Value}\n// {ecl_object,Handle,<<Res>>}   object (special for sampler)\n// integer()   ==  {'int', Value}\n// float()     ==  {'float', Value}\n// list        ==  Raw data\n// binary      ==  Raw data\n//\nstatic ERL_NIF_TERM ecl_set_kernel_arg(ErlNifEnv* env, int argc, \n\t\t\t\t       const ERL_NIF_TERM argv[])\n{\n    ecl_kernel_t* o_kernel;\n    unsigned char arg_buf[16*sizeof(double)]; // vector type buffer\n    cl_uint arg_index;\n    size_t  arg_size;\n    void*   arg_value;\n    const ERL_NIF_TERM* array;\n    double   fval;\n    int      ival;\n    long     lval;\n    unsigned long luval;\n    size_t   sval;\n    ErlNifUInt64 u64val;\n    ErlNifSInt64 i64val;\n    ErlNifBinary bval;\n    cl_int   int_arg;\n    cl_float float_arg;\n    void*    ptr_arg = 0;\n    int      arity;\n    cl_int   err;\n    int      arg_type = KERNEL_ARG_OTHER;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env,argv[0],&kernel_r,false,(ecl_object_t**)&o_kernel))\n\treturn enif_make_badarg(env);\n    if (!enif_get_uint(env, argv[1], &arg_index))\n\treturn enif_make_badarg(env);\n    if (enif_get_tuple(env, argv[2], &arity, &array)) {\n\tif (arity == 3) {\n\t    if (array[0] == ATOM(mem_t)) {\n\t\tif (!get_object(env,argv[2],&mem_r,true,&ptr_arg))\n\t\t    return enif_make_badarg(env);\n\t\targ_type = KERNEL_ARG_MEM;\n\t\targ_value = &ptr_arg;\n\t\targ_size = sizeof(cl_mem);\n\t\tgoto do_kernel_arg;\n\t    }\n\t    else if (array[0] == ATOM(sampler_t)) {\n\t\tif (!get_object(env,argv[2],&sampler_r,false,&ptr_arg))\n\t\t    return enif_make_badarg(env);\n\t\targ_type = KERNEL_ARG_SAMPLER;\n\t\targ_value = &ptr_arg;\n\t\targ_size = sizeof(cl_sampler);\n\t\tgoto do_kernel_arg;\n\t    }\n\t    return enif_make_badarg(env);\n\t}\n\telse if (arity == 2) {\n\t    cl_uint typen;\n\t    ocl_type_t base_type;\n\t    size_t     base_size;\n\t    int       vec_size;\n\t    int value_arity;\n\t    const ERL_NIF_TERM* values;\n\t    unsigned char* ptr = arg_buf;\n\t    int i;\n\n\t    if (!get_enum(env, array[0], &typen, kv_cl_type))\n\t\treturn enif_make_badarg(env);\n\t    vec_size = typen >> 16;\n\t    base_type = typen & 0xFFFF;\n\t    base_size = ecl_sizeof(base_type);\n\t    if ((vec_size == 1) && !enif_is_tuple(env, array[1])) {\n\t\tvalue_arity = 1;\n\t\tvalues = &array[1];\n\t    }\n\t    else if (!enif_get_tuple(env, array[1], &value_arity, &values))\n\t\treturn enif_make_badarg(env);\n\t    if (value_arity != vec_size)\n\t\treturn enif_make_badarg(env);\n\t    for (i = 0; i < vec_size; i++) {\n\t\tswitch(base_type) {\n\t\tcase OCL_CHAR:\n\t\t    if (!enif_get_long(env, values[i], &lval))\n\t\t\treturn enif_make_badarg(env);\n\t\t    *((cl_char*)ptr) = (cl_char) lval;\n\t\t    break;\n\t\tcase OCL_UCHAR:\n\t\t    if (!enif_get_ulong(env, values[i], &luval))\n\t\t\treturn enif_make_badarg(env);\n\t\t    *((cl_uchar*)ptr) = (cl_uchar) luval;\n\t\t    break;\n\t\tcase OCL_SHORT:\n\t\t    if (!enif_get_long(env, values[i], &lval))\n\t\t\treturn enif_make_badarg(env);\n\t\t    *((cl_short*)ptr) = (cl_short) lval;\n\t\t    break;\n\t\tcase OCL_USHORT:\n\t\t    if (!enif_get_ulong(env, values[i], &luval))\n\t\t\treturn enif_make_badarg(env);\n\t\t    *((cl_ushort*)ptr) = (cl_ushort) luval;\n\t\t    break;\n\t\tcase OCL_INT:\n\t\t    if (!enif_get_long(env, values[i], &lval))\n\t\t\treturn enif_make_badarg(env);\n\t\t    *((cl_int*)ptr) = (cl_int) lval;\n\t\t    break;\n\t\tcase OCL_UINT:\n\t\t    if (!enif_get_ulong(env, values[i], &luval))\n\t\t\treturn enif_make_badarg(env);\n\t\t    *((cl_uint*)ptr) = (cl_uint) luval;\n\t\t    break;\n\t\tcase OCL_LONG:\n\t\t    if (!enif_get_int64(env, values[i], &i64val))\n\t\t\treturn enif_make_badarg(env);\n\t\t    *((cl_long*)ptr) = i64val;\n\t\t    break;\n\t\tcase OCL_ULONG:\n\t\t    if (!enif_get_uint64(env, values[i], &u64val))\n\t\t\treturn enif_make_badarg(env);\n\t\t    *((cl_ulong*)ptr) = u64val;\n\t\t    break;\n\t\tcase OCL_HALF:\n\t\t    if (!enif_get_ulong(env, values[i], &luval))\n\t\t\treturn enif_make_badarg(env);\n\t\t    *((cl_half*)ptr) = (cl_half) luval;\n\t\t    break;\n\t\tcase OCL_FLOAT:\n\t\t    if (!enif_get_double(env, values[i], &fval))\n\t\t\treturn enif_make_badarg(env);\n\t\t    *((cl_float*)ptr) = (cl_float) fval;\n\t\t    break;\n\n\t\tcase OCL_DOUBLE:\n\t\t    if (!enif_get_double(env, values[i], &fval))\n\t\t\treturn enif_make_badarg(env);\n\t\t    *((cl_double*)ptr) = fval;\n\t\t    break;\n\t\tcase OCL_SIZE:\n\t\t    if (!ecl_get_sizet(env, values[i], &sval))\n\t\t\treturn enif_make_badarg(env);\n\t\t    *((size_t*)ptr) = sval;\n\t\t    break;\n\t\tcase OCL_BOOL:\n\t\tcase OCL_STRING:\n\t\tcase OCL_ENUM:\n\t\tcase OCL_BITFIELD:\n\t\tcase OCL_POINTER:\n\t\tcase OCL_PLATFORM:\n\t\tcase OCL_DEVICE: \n\t\tcase OCL_CONTEXT:\n\t\tcase OCL_PROGRAM:\n\t\tcase OCL_COMMAND_QUEUE:\n\t\tcase OCL_IMAGE_FORMAT:\n\t\tcase OCL_DEVICE_PARTITION:\n\t\tcase OCL_NUM_TYPES:\n\t\tdefault:\n\t\t    return enif_make_badarg(env);\n\t\t}\n\t\tptr += base_size;\n\t    }\n\t    arg_value = arg_buf;\n\t    arg_size  = base_size*vec_size;\n\t    goto do_kernel_arg;\n\t}\n\treturn enif_make_badarg(env);\n    }\n    else if (enif_get_int(env, argv[2], &ival)) {\n\tint_arg = ival;\n\targ_value = &int_arg;\n\targ_size = sizeof(int_arg);\n\tgoto do_kernel_arg;\n    }\n    else if (enif_get_double(env, argv[2], &fval)) {\n\tfloat_arg = (float) fval;\n\targ_value = &float_arg;\n\targ_size = sizeof(float_arg);\n\tgoto do_kernel_arg;\n    }\n    else if (enif_inspect_iolist_as_binary(env, argv[2], &bval)) {\n\t// rule your own case \n\targ_value = bval.data;\n\targ_size  = bval.size;\n\tgoto do_kernel_arg;\n    }\n    return enif_make_badarg(env);\n\ndo_kernel_arg:\n    err = ECL_CALL(clSetKernelArg)\n\t(o_kernel->obj.kernel,\n\t arg_index,\n\t arg_size,\n\t arg_value);\n    if (!err) {\n\tset_kernel_arg(o_kernel, arg_index, arg_type, ptr_arg);\n\treturn ATOM(ok);\n    }\n    return ecl_make_error(env, err);    \n}\n\n// cl:set_kernel_arg_size(Kernel::cl_kernel(), Index::non_neg_integer(),\n//                        Size::non_neg_integer()) ->\n//    'ok' | {'error', cl_error()}\n//\n// cl special to set kernel arg with size only (local mem etc)\n//\nstatic ERL_NIF_TERM ecl_set_kernel_arg_size(ErlNifEnv* env, int argc, \n\t\t\t\t\t    const ERL_NIF_TERM argv[])\n{\n    ecl_kernel_t* o_kernel;\n    cl_uint arg_index;\n    size_t  arg_size;\n    unsigned char* arg_value = 0;\n    cl_int  err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env,argv[0],&kernel_r,false,(ecl_object_t**)&o_kernel))\n\treturn enif_make_badarg(env);\n    if (!enif_get_uint(env, argv[1], &arg_index))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[2], &arg_size))\n\treturn enif_make_badarg(env);\n\n    err = ECL_CALL(clSetKernelArg)\n\t(o_kernel->obj.kernel,\n\t arg_index,\n\t arg_size,\n\t arg_value);\n    if (!err) {\n\tset_kernel_arg(o_kernel, arg_index, KERNEL_ARG_OTHER, (void*) 0);\n\treturn ATOM(ok);\n    }\n    return ecl_make_error(env, err);\n\n}\n\nstatic ERL_NIF_TERM ecl_get_kernel_info(ErlNifEnv* env, int argc, \n\t\t\t\t\tconst ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_kernel;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &kernel_r, false, &o_kernel))\n\treturn enif_make_badarg(env);\n    return make_object_info(env, argv[1], o_kernel,\n\t\t\t    (info_fn_t*) ECL_FUNC_PTR(clGetKernelInfo),\n\t\t\t    kernel_info,\n\t\t\t    sizeof_array(kernel_info));\n}\n\nstatic ERL_NIF_TERM ecl_get_kernel_workgroup_info(ErlNifEnv* env, int argc, \n\t\t\t\t\t\t  const ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_kernel;\n    ecl_object_t* o_device;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &kernel_r, false, &o_kernel))\n\treturn enif_make_badarg(env);\n    if (!get_ecl_object(env, argv[1], &device_r, false, &o_device))\n\treturn enif_make_badarg(env);\n    return make_object_info2(env, argv[2], o_kernel, o_device->opaque,\n\t\t\t     (info2_fn_t*) ECL_FUNC_PTR(clGetKernelWorkGroupInfo),\n\t\t\t     workgroup_info,\n\t\t\t     sizeof_array(workgroup_info));\n}\n\n#if CL_VERSION_1_2 == 1\nstatic ERL_NIF_TERM ecl_get_kernel_arg_info(ErlNifEnv* env, int argc,\n\t\t\t\t\t    const ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_kernel;\n    cl_uint arg_index;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &kernel_r, false, &o_kernel))\n\treturn enif_make_badarg(env);\n    if (!enif_get_uint(env, argv[1], &arg_index))\n\treturn enif_make_badarg(env);\n    return make_object_info2(env, argv[2], o_kernel,\n\t\t\t     (void*) (size_t) arg_index,\n\t\t\t     (info2_fn_t*) ECL_FUNC_PTR(clGetKernelArgInfo),\n\t\t\t     arg_info,\n\t\t\t     sizeof_array(arg_info));\n}\n#endif\n\n//\n// cl:enqueue_task(Queue::cl_queue(), Kernel::cl_kernel(),\n//                   WaitList::[cl_event()], WantEvent::boolean()) ->\n//    'ok' | {'ok', cl_event()} | {'error', cl_error()}\n//\nstatic ERL_NIF_TERM ecl_enqueue_task(ErlNifEnv* env, int argc, \n\t\t\t\t     const ERL_NIF_TERM argv[])\n{\n    ecl_object_t*    o_queue;\n    cl_kernel        kernel;\n    cl_event         wait_list[MAX_WAIT_LIST];\n    cl_uint          num_events = MAX_WAIT_LIST;\n    cl_event         event;\n    cl_int           err;\n    cl_bool          want_event;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))\n\treturn enif_make_badarg(env);\n    if (!get_object(env, argv[1], &kernel_r, false,(void**)&kernel))\n\treturn enif_make_badarg(env);\n    if (!get_object_list(env, argv[2], &event_r, false,\n\t\t\t (void**) wait_list, &num_events))\n\treturn enif_make_badarg(env);\n    if (!get_bool(env, argv[3], &want_event))\n\treturn enif_make_badarg(env);\n\n    err = ECL_CALL(clEnqueueTask)\n\t(o_queue->queue, \n\t kernel,\n\t num_events,\n\t num_events ? wait_list : NULL,\n\t want_event ? &event : NULL);\n    if (!err) {\n\tif (want_event) {\n\t    ERL_NIF_TERM t;\n\t    t = ecl_make_event(env, event, false, false, 0, 0, o_queue);\n\t    return enif_make_tuple2(env, ATOM(ok), t);\n\t}\n\treturn ATOM(ok);\n    }\n    return ecl_make_error(env, err);\n}\n//\n// cl:enqueue_nd_range_kernel(Queue::cl_queue(), Kernel::cl_kernel(),\n//                            Global::[non_neg_integer()],\n//                            Local::[non_neg_integer()],\n//                            WaitList::[cl_event()], WantEvent::boolean()) ->\n//    'ok' | {'ok', cl_event()} | {'error', cl_error()}\n//\nstatic ERL_NIF_TERM ecl_enqueue_nd_range_kernel(ErlNifEnv* env, int argc, \n\t\t\t\t\t\tconst ERL_NIF_TERM argv[])\n{\n    ecl_object_t*    o_queue;\n    cl_kernel     kernel;\n    cl_event      wait_list[MAX_WAIT_LIST];\n    cl_uint       num_events = MAX_WAIT_LIST;\n    size_t        global_work_size[MAX_WORK_SIZE];\n    size_t        local_work_size[MAX_WORK_SIZE];\n    size_t        work_dim = MAX_WORK_SIZE;\n    size_t        temp_dim = MAX_WORK_SIZE;\n    cl_event      event;\n    cl_int        err;\n    cl_bool       want_event;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))\n\treturn enif_make_badarg(env);\n    if (!get_object(env, argv[1], &kernel_r, false, (void**) &kernel))\n\treturn enif_make_badarg(env);\n    if (!get_sizet_list(env, argv[2], global_work_size, &work_dim))\n\treturn enif_make_badarg(env);\t\n    if (!get_sizet_list(env, argv[3], local_work_size, &temp_dim))\n\treturn enif_make_badarg(env);\n    if (!get_object_list(env, argv[4], &event_r, false, \n\t\t\t (void**) wait_list, &num_events))\n\treturn enif_make_badarg(env);\n    if (!get_bool(env, argv[5], &want_event))\n\treturn enif_make_badarg(env);\n\n    if (work_dim == 0) {\n\treturn enif_make_badarg(env);\n    }\n\n    if ((temp_dim > 0) && (work_dim != temp_dim)) {\n\treturn enif_make_badarg(env);\n    }\n\n    err = ECL_CALL(clEnqueueNDRangeKernel)\n\t(o_queue->queue, kernel,\n\t (cl_uint) work_dim,\n\t 0, // global_work_offset,\n\t global_work_size,\n\t temp_dim ? local_work_size : NULL,\n\t num_events, \n\t num_events ? wait_list : NULL,\n\t want_event ? &event : NULL);\n    if (!err) {\n\tif (want_event) {\n\t    ERL_NIF_TERM t;\n\t    t = ecl_make_event(env, event, false, false, 0, 0, o_queue);\n\t    return enif_make_tuple2(env, ATOM(ok), t);\n\t}\n\treturn ATOM(ok);\n    }\n    return ecl_make_error(env, err);    \n}\n\n// 1.2 -> 1.1 wrapper: clEnqueueMarkerWithWaitList implement clEnqueueMarker\ncl_int CL_CALLBACK eclEnqueueMarker(cl_command_queue queue,\n\t\t\tcl_event * event)\n{\n    return ECL_CALL(clEnqueueMarkerWithWaitList)(queue,0, NULL,event);\n}\n\nstatic ERL_NIF_TERM ecl_enqueue_marker(ErlNifEnv* env, int argc, \n\t\t\t\t       const ERL_NIF_TERM argv[])\n{\n    ecl_object_t*    o_queue;\n    cl_event event;\n    cl_int err;\n    ERL_NIF_TERM t;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))\n\treturn enif_make_badarg(env);\n    if (o_queue->version >= 12) {\n\terr = eclEnqueueMarker(o_queue->queue, &event);\n    } else { // deprecated in 1.2 available in 1.1\n\terr = ECL_CALL(clEnqueueMarker)(o_queue->queue, &event);\n    }\n    if (!err) {\n\tt = ecl_make_event(env, event, false, false, 0, 0, o_queue);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    return ecl_make_error(env, err);\n}\n\n// 1.2 -> 1.1 wrapper: clEnqueueMarkerWithWaitList implement clEnqueueWaitForEvents\ncl_int CL_CALLBACK eclEnqueueWaitForEvents(cl_command_queue queue,\n\t\t\t       cl_uint num_events,\n\t\t\t       const cl_event * event_list)\n{\n    return ECL_CALL(clEnqueueMarkerWithWaitList)(queue,\n\t\t\t\t\t\t num_events,\n\t\t\t\t\t\t num_events ? event_list : NULL,\n\t\t\t\t\t\t NULL);\n}\n\n//\n// cl:enqueue_wait_for_events(Queue::cl_queue(), WaitList::[cl_event()]) ->\n//    'ok' | {'error', cl_error()}\n//\nstatic ERL_NIF_TERM ecl_enqueue_wait_for_events(ErlNifEnv* env, int argc, \n\t\t\t\t\t\tconst ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_queue;\n    cl_event      wait_list[MAX_WAIT_LIST];\n    cl_uint       num_events = MAX_WAIT_LIST;\n    cl_int        err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))\n\treturn enif_make_badarg(env);\n    if (!get_object_list(env, argv[1], &event_r, false,\n\t\t\t (void**) wait_list, &num_events))\n\treturn enif_make_badarg(env);\n    if(o_queue->version >= 12) {\n\terr = eclEnqueueWaitForEvents(o_queue->queue,\n\t\t\t\t      num_events,\n\t\t\t\t      num_events ? wait_list : NULL);\n    } else {\n\terr = ECL_CALL(clEnqueueWaitForEvents)(o_queue->queue,\n\t\t\t\t\t       num_events,\n\t\t\t\t\t       num_events ? wait_list : NULL);\n    }\n    if (!err)\n\treturn ATOM(ok);\n    return ecl_make_error(env, err);    \n}\n//\n// cl:enqueue_read_buffer(Queue::cl_queue(), Buffer::cl_mem(),\n//                        Offset::non_neg_integer(), \n//                           Size::non_neg_integer(), \n//                           WaitList::[cl_event()]) ->\n//    {'ok', cl_event()} | {'error', cl_error()}\nstatic ERL_NIF_TERM ecl_enqueue_read_buffer(ErlNifEnv* env, int argc,\n\t\t\t\t\t    const ERL_NIF_TERM argv[])\n{\n    ecl_object_t*    o_queue;\n    cl_mem           buffer;\n    size_t           offset;\n    size_t           size;\n    cl_event         wait_list[MAX_WAIT_LIST];\n    cl_uint          num_events = MAX_WAIT_LIST;\n    cl_event         event;\n    ErlNifBinary*    bin;\n    cl_int           err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))\n\treturn enif_make_badarg(env);\n    if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[2], &offset))\n\treturn enif_make_badarg(env);\t\n    if (!ecl_get_sizet(env, argv[3], &size))\n\treturn enif_make_badarg(env);\n    if (!get_object_list(env, argv[4], &event_r, false,\n\t\t\t (void**) wait_list, &num_events))\n\treturn enif_make_badarg(env);\n    if (!(bin = enif_alloc(sizeof(ErlNifBinary))))\n\treturn ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?\n    if (!enif_alloc_binary(size, bin)) {\n\tenif_free(bin);\n\treturn ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?\t\n    }\n    err = ECL_CALL(clEnqueueReadBuffer)\n\t(o_queue->queue, buffer,\n\t CL_FALSE,\n\t offset,\n\t size,\n\t bin->data,\n\t num_events,\n\t num_events ? wait_list : 0,\n\t &event);\n    if (!err) {\n\tERL_NIF_TERM t;\n\tt = ecl_make_event(env, event, true, false, 0, bin, o_queue);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    else {\n\tenif_free(bin);\n\treturn ecl_make_error(env, err);    \n    }\n}\n//\n// cl:enqueue_write_buffer(Queue::cl_queue(), Buffer::cl_mem(),\n//                         Offset::non_neg_integer(), \n//                         Size::non_neg_integer(), \n//                         Data::binary(),\n//                         WaitList::[cl_event()],\n//                         WantEvent::boolean()\n//                       ) ->\n//    {'ok', cl_event()} | {'error', cl_error()}\n//\nstatic ERL_NIF_TERM ecl_enqueue_write_buffer(ErlNifEnv* env, int argc, \n\t\t\t\t\t     const ERL_NIF_TERM argv[])\n{\n    ecl_object_t*    o_queue;\n    cl_mem           buffer;\n    size_t           offset;\n    size_t           size;\n    cl_event         wait_list[MAX_WAIT_LIST];\n    cl_uint          num_events = MAX_WAIT_LIST;\n    cl_event         event;\n    ErlNifBinary     bin;\n    ErlNifEnv*       bin_env;\n    cl_int           err;\n    cl_bool          want_event;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))\n\treturn enif_make_badarg(env);\n    if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[2], &offset))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[3], &size))\n\treturn enif_make_badarg(env);\n    /*  Check argv[4] (bin) last */\n    if (!get_object_list(env, argv[5], &event_r, false,\n\t\t\t (void**) wait_list, &num_events))\n\treturn enif_make_badarg(env);\n    if (!get_bool(env, argv[6], &want_event))\n\treturn enif_make_badarg(env);\n\n    if (!(bin_env = enif_alloc_env())) {  // create binary environment\n\treturn ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?\n    }\n    if (!ecl_make_binary(env, argv[4], bin_env, &bin)) {\n\tenif_free_env(bin_env);\n\treturn enif_make_badarg(env);\n    }\n\n    // handle binary and iolist as binary\n    if (bin.size < size) {   // FIXME: handle offset!\n\treturn enif_make_badarg(env);\n    }\n\n    err = ECL_CALL(clEnqueueWriteBuffer)(o_queue->queue, buffer,\n\t\t\t       !want_event, // FALSE for async\n\t\t\t       offset,\n\t\t\t       size,\n\t\t\t       bin.data,\n\t\t\t       num_events,\n\t\t\t       num_events ? wait_list : NULL,\n\t\t\t       want_event ? &event : NULL);\n    if (!err) {\n\tif (want_event) {\n\t    ERL_NIF_TERM t;\n\t    t = ecl_make_event(env, event, false, true, bin_env, NULL, o_queue);\n\t    return enif_make_tuple2(env, ATOM(ok), t);\n\t} else {\n\t    enif_free_env(bin_env);\n\t}\n\treturn ATOM(ok);\n    }\n    else {\n\tenif_free_env(bin_env);\n\treturn ecl_make_error(env, err);\n    }\n}\n\n//\n// enqueue_read_image(_Queue, _Image, _Origin, _Region, _RowPitch, _SlicePitch,\n//\t\t   _WaitList) -> {'ok',Event} | {error,Error}\n//\nstatic ERL_NIF_TERM ecl_enqueue_read_image(ErlNifEnv* env, int argc, \n\t\t\t\t\t   const ERL_NIF_TERM argv[])\n{\n    ecl_object_t*    o_queue;\n    cl_mem           buffer;\n    size_t           origin[3];\n    size_t           region[3];\n    size_t           row_pitch;\n    size_t           slice_pitch;\n    cl_event         wait_list[MAX_WAIT_LIST];\n    cl_uint          num_events = MAX_WAIT_LIST;\n    size_t           num_origin = 3;\n    size_t           num_region = 3;\n    size_t           psize;\n    size_t           size;\n    cl_event         event;\n    ErlNifBinary*    bin;\n    cl_int           err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))\n\treturn enif_make_badarg(env);\n    if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer))\n\treturn enif_make_badarg(env);\n    origin[0] = origin[1] = origin[2] = 0;\n    if (!get_sizet_list(env, argv[2], origin, &num_origin))\n\treturn enif_make_badarg(env);\n    region[0] = region[1] = region[2] = 1;\n    if (!get_sizet_list(env, argv[3], region, &num_region))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[4], &row_pitch))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[5], &slice_pitch))\n\treturn enif_make_badarg(env);\n    if (!get_object_list(env, argv[6], &event_r, false,\n\t\t\t (void**) wait_list, &num_events))\n\treturn enif_make_badarg(env);\n    if (!(bin = enif_alloc(sizeof(ErlNifBinary))))\n\treturn ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?\n\n    // calculate the read size of the image, FIXME: check error return\n    ECL_CALL(clGetImageInfo)(buffer, CL_IMAGE_ELEMENT_SIZE,\n\t\t\t     sizeof(psize), &psize, 0);\n    size = region[0]*region[1]*region[2]*psize;\n    if (!enif_alloc_binary(size, bin)) {\n\tenif_free(bin);\n\treturn ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?\t\n    }\n    err = ECL_CALL(clEnqueueReadImage)(o_queue->queue, buffer,\n\t\t\t     CL_FALSE,\n\t\t\t     origin,\n\t\t\t     region,\n\t\t\t     row_pitch,\n\t\t\t     slice_pitch,\n\t\t\t     bin->data,\n\t\t\t     num_events,\n\t\t\t     num_events ? wait_list : 0,\n\t\t\t     &event);\n    if (!err) {\n\tERL_NIF_TERM t;\n\tt = ecl_make_event(env, event, true, false, 0, bin, o_queue);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    else {\n\tenif_free(bin);\n\treturn ecl_make_error(env, err);\n    }\n}\n\n//\n// enqueue_read_buffer_rect(_Queue, _Buffer,\n//    BufferOrigin, HostOrigin, Region,\n//    BufferRowPitch, BufferSlicePitch,\n//    HostRowPitch, HostSlicePitch,\n//    WaitList) -> {'ok',Event} | {error,Error}\n//\n#if CL_VERSION_1_1 == 1\nstatic ERL_NIF_TERM ecl_enqueue_read_buffer_rect(ErlNifEnv* env, int argc,\n\t\t\t\t\t\t const ERL_NIF_TERM argv[])\n{\n    ecl_object_t*    o_queue;\n    cl_mem           buffer;\n    size_t           buffer_origin[3];\n    size_t           host_origin[3];\n    size_t           region[3];\n    size_t           buffer_row_pitch;\n    size_t           buffer_slice_pitch;\n    size_t           host_row_pitch;\n    size_t           host_slice_pitch;\n    cl_event         wait_list[MAX_WAIT_LIST];\n    cl_uint          num_events = MAX_WAIT_LIST;\n    size_t           num_buffer_origin = 3;\n    size_t           num_host_origin = 3;\n    size_t           num_region = 3;\n    size_t           size;\n    cl_event         event;\n    ErlNifBinary*    bin;\n    cl_int           err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))\n\treturn enif_make_badarg(env);\n    if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer))\n\treturn enif_make_badarg(env);\n    buffer_origin[0] = buffer_origin[1] = buffer_origin[2] = 0;\n    if (!get_sizet_list(env, argv[2], buffer_origin, &num_buffer_origin))\n\treturn enif_make_badarg(env);\n    host_origin[0] = host_origin[1] = host_origin[2] = 0;\n    if (!get_sizet_list(env, argv[3], host_origin, &num_host_origin))\n\treturn enif_make_badarg(env);\n    region[0] = region[1] = region[2] = 1;\n    if (!get_sizet_list(env, argv[4], region, &num_region))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[5], &buffer_row_pitch))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[6], &buffer_slice_pitch))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[7], &host_row_pitch))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[8], &host_slice_pitch))\n\treturn enif_make_badarg(env);\n    if (!get_object_list(env, argv[9], &event_r, false,\n\t\t\t (void**) wait_list, &num_events))\n\treturn enif_make_badarg(env);\n    if (!(bin = enif_alloc(sizeof(ErlNifBinary))))\n\treturn ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?\n\n    // calculate the read size of the image, FIXME: check error return\n    size = (host_origin[0]+region[0])*(host_origin[1]+region[1])*\n\t(host_origin[2]+region[2]);\n    if (!enif_alloc_binary(size, bin)) {\n\tenif_free(bin);\n\treturn ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?\n    }\n    err = ECL_CALL(clEnqueueReadBufferRect)(o_queue->queue, buffer,\n\t\t\t\t  CL_FALSE,\n\t\t\t\t  buffer_origin,\n\t\t\t\t  host_origin,\n\t\t\t\t  region,\n\t\t\t\t  buffer_row_pitch,\n\t\t\t\t  buffer_slice_pitch,\n\t\t\t\t  host_row_pitch,\n\t\t\t\t  host_slice_pitch,\n\t\t\t\t  bin->data,\n\t\t\t\t  num_events,\n\t\t\t\t  num_events ? wait_list : 0,\n\t\t\t\t  &event);\n    if (!err) {\n\tERL_NIF_TERM t;\n\tt = ecl_make_event(env, event, true, false, 0, bin, o_queue);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    else {\n\tenif_free(bin);\n\treturn ecl_make_error(env, err);\n    }\n}\n#endif\n\n\n//\n// enqueue_write_buffer_rect(_Queue, _Buffer,\n//    BufferOrigin, HostOrigin, Region,\n//    BufferRowPitch, BufferSlicePitch,\n//    HostRowPitch, HostSlicePitch,\n//    Data::binary(),\n//    WaitList) -> {'ok',Event} | {error,Error}\n//\n#if CL_VERSION_1_1 == 1\nstatic ERL_NIF_TERM ecl_enqueue_write_buffer_rect(ErlNifEnv* env, int argc,\n\t\t\t\t\t\t  const ERL_NIF_TERM argv[])\n{\n    ecl_object_t*    o_queue;\n    cl_mem           buffer;\n    size_t           buffer_origin[3];\n    size_t           host_origin[3];\n    size_t           region[3];\n    size_t           buffer_row_pitch;\n    size_t           buffer_slice_pitch;\n    size_t           host_row_pitch;\n    size_t           host_slice_pitch;\n    cl_event         wait_list[MAX_WAIT_LIST];\n    cl_uint          num_events = MAX_WAIT_LIST;\n    size_t           num_buffer_origin = 3;\n    size_t           num_host_origin = 3;\n    size_t           num_region = 3;\n    size_t           size;\n    cl_event         event;\n    ErlNifBinary     bin;\n    ErlNifEnv*       bin_env;\n    cl_int           err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))\n\treturn enif_make_badarg(env);\n    if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer))\n\treturn enif_make_badarg(env);\n    buffer_origin[0] = buffer_origin[1] = buffer_origin[2] = 0;\n    if (!get_sizet_list(env, argv[2], buffer_origin, &num_buffer_origin))\n\treturn enif_make_badarg(env);\n    host_origin[0] = host_origin[1] = host_origin[2] = 0;\n    if (!get_sizet_list(env, argv[3], host_origin, &num_host_origin))\n\treturn enif_make_badarg(env);\n    region[0] = region[1] = region[2] = 1;\n    if (!get_sizet_list(env, argv[4], region, &num_region))\n\treturn enif_make_badarg(env);\n\n    if (!ecl_get_sizet(env, argv[5], &buffer_row_pitch))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[6], &buffer_slice_pitch))\n\treturn enif_make_badarg(env);\n\n    if (!ecl_get_sizet(env, argv[7], &host_row_pitch))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[8], &host_slice_pitch))\n\treturn enif_make_badarg(env);\n    /*  Check argv[9] (bin) last */\n    if (!get_object_list(env, argv[10], &event_r, false,\n\t\t\t (void**) wait_list, &num_events))\n\treturn enif_make_badarg(env);\n\n    if (!(bin_env = enif_alloc_env())) {  // create binary environment\n\treturn ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?\n    }\n    if (!ecl_make_binary(env, argv[9], bin_env, &bin)) {\n\tenif_free_env(bin_env);\n\treturn enif_make_badarg(env);\n    }\n    size = (host_origin[0]+region[0])*(host_origin[1]+region[1])*\n\t(host_origin[2]+region[2]);\n    // handle binary and iolist as binary\n    if (bin.size < size) {   // FIXME: handle offset!\n\treturn enif_make_badarg(env);\n    }\n    err = ECL_CALL(clEnqueueWriteBufferRect)(o_queue->queue, buffer,\n\t\t\t\t   CL_FALSE,\n\t\t\t\t   buffer_origin,\n\t\t\t\t   host_origin,\n\t\t\t\t   region,\n\t\t\t\t   buffer_row_pitch,\n\t\t\t\t   buffer_slice_pitch,\n\t\t\t\t   host_row_pitch,\n\t\t\t\t   host_slice_pitch,\n\t\t\t\t   bin.data,\n\t\t\t\t   num_events,\n\t\t\t\t   num_events ? wait_list : 0,\n\t\t\t\t   &event);\n    if (!err) {\n\tERL_NIF_TERM t;\n\tt = ecl_make_event(env, event, false, true, bin_env, NULL, o_queue);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    else {\n\tenif_free_env(bin_env);\n\treturn ecl_make_error(env, err);\n    }\n}\n#endif\n\n//\n// cl:enqueue_fill_buffer(Queue, Buffer, Pattern, Offset, Size, WaitList) ->\n//   {ok,Event} | {error,Reason}\n//\n#if CL_VERSION_1_2 == 1\nstatic ERL_NIF_TERM ecl_enqueue_fill_buffer(ErlNifEnv* env, int argc,\n\t\t\t\t\t    const ERL_NIF_TERM argv[])\n{\n    ecl_object_t*    o_queue;\n    cl_mem           buffer;\n    ErlNifBinary     pattern;\n    size_t           offset;\n    size_t           size;\n    cl_event         wait_list[MAX_WAIT_LIST];\n    cl_uint          num_events = MAX_WAIT_LIST;\n    cl_event         event;\n    cl_int           err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))\n\treturn enif_make_badarg(env);\n    if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer))\n\treturn enif_make_badarg(env);\n    if (!enif_inspect_binary(env, argv[2], &pattern))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[3], &offset))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[4], &size))\n\treturn enif_make_badarg(env);\n    if (!get_object_list(env, argv[5], &event_r, false,\n\t\t\t (void**) wait_list, &num_events))\n\treturn enif_make_badarg(env);\n\n    // Note: pattern must not be retained, it can be freed after this call\n    // according to spec.\n    err = ECL_CALL(clEnqueueFillBuffer)(o_queue->queue, buffer,\n\t\t\t      pattern.data,\n\t\t\t      pattern.size,\n\t\t\t      offset,\n\t\t\t      size,\n\t\t\t      num_events,\n\t\t\t      num_events ? wait_list : 0,\n\t\t\t      &event);\n    if (!err) {\n\tERL_NIF_TERM t;\n\tt = ecl_make_event(env, event, false, false, 0, 0, o_queue);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    return ecl_make_error(env, err);\n}\n#endif\n\n\n//\n// enqueue_write_image(_Queue, _Image, _Origin, _Region, _RowPitch, _SlicePitch,\n//\t\t    _Data, _WaitList, _WantEvent) ->\n//\nstatic ERL_NIF_TERM ecl_enqueue_write_image(ErlNifEnv* env, int argc, \n\t\t\t\t\t    const ERL_NIF_TERM argv[])\n{\n    ecl_object_t*    o_queue;\n    cl_mem           buffer;\n    size_t           origin[3];\n    size_t           region[3];\n    size_t           row_pitch;\n    size_t           slice_pitch;\n    cl_event         wait_list[MAX_WAIT_LIST];\n    cl_uint          num_events = MAX_WAIT_LIST;\n    size_t           num_origin = 3;\n    size_t           num_region = 3;\n    size_t           psize;\n    size_t           size;\n    cl_event         event;\n    ErlNifBinary     bin;\n    ErlNifEnv*       bin_env;\n    cl_int           err;\n    cl_bool          want_event;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))\n\treturn enif_make_badarg(env);\n    if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer))\n\treturn enif_make_badarg(env);\n    origin[0] = origin[1] = origin[2] = 0;\n    if (!get_sizet_list(env, argv[2], origin, &num_origin))\n\treturn enif_make_badarg(env);\n    region[0] = region[1] = region[2] = 1;\n    if (!get_sizet_list(env, argv[3], region, &num_region))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[4], &row_pitch))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[5], &slice_pitch))\n\treturn enif_make_badarg(env);\n    /*  Check argv[6] (bin) last */\n    if (!get_object_list(env, argv[7], &event_r, false,\n\t\t\t (void**) wait_list, &num_events))\n\treturn enif_make_badarg(env);\n    if (!get_bool(env, argv[8], &want_event))\n        return enif_make_badarg(env);\n    if (!(bin_env = enif_alloc_env())) {  // create binary environment\n        return ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?\n    }\n    if (!ecl_make_binary(env, argv[6], bin_env, &bin)) {\n       enif_free_env(bin_env);\n       return enif_make_badarg(env);\n    }\n\n    // calculate the read size of the image FIXME: check error return\n    ECL_CALL(clGetImageInfo)(buffer, CL_IMAGE_ELEMENT_SIZE,\n\t\t\t     sizeof(psize), &psize, 0);\n    size = region[0]*region[1]*region[2]*psize;\n    if (bin.size < size) {\n\treturn enif_make_badarg(env);\n    }\n\n    err = ECL_CALL(clEnqueueWriteImage)(o_queue->queue, buffer,\n\t\t\t      !want_event, // FALSE for ASYNC\n\t\t\t      origin,\n\t\t\t      region,\n\t\t\t      row_pitch,\n\t\t\t      slice_pitch,\n\t\t\t      bin.data,\n\t\t\t      num_events,\n\t\t\t      num_events ? wait_list : NULL,\n\t\t\t      want_event ? &event : NULL );\n    if (!err) {\n\tif (want_event) {\n\t    ERL_NIF_TERM t;\n\t    t = ecl_make_event(env, event, false, true, bin_env, NULL, o_queue);\n\t    return enif_make_tuple2(env, ATOM(ok), t);\n\t} else {\n\t    enif_free_env(bin_env);\n\t}\n\treturn ATOM(ok);\n    }\n    else {\n\tenif_free_env(bin_env);\t\n\treturn ecl_make_error(env, err);\n    }\n}\n\n//\n// cl:enqueue_copy_buffer(Queue, SrcBuffer, DstBuffer,\n//                        SrcOffset, DstOffset, Cb,\n//                        WaitList) ->\n//\nstatic ERL_NIF_TERM ecl_enqueue_copy_buffer(ErlNifEnv* env, int argc,\n\t\t\t\t\t    const ERL_NIF_TERM argv[])\n{\n    ecl_object_t*    o_queue;\n    cl_mem           src_buffer;\n    cl_mem           dst_buffer;\n    size_t           src_offset;\n    size_t           dst_offset;\n    size_t           cb;\n    cl_event         wait_list[MAX_WAIT_LIST];\n    cl_uint          num_events = MAX_WAIT_LIST;\n    cl_event         event;\n    cl_int           err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))\n\treturn enif_make_badarg(env);\n    if (!get_object(env, argv[1], &mem_r, false, (void**)&src_buffer))\n\treturn enif_make_badarg(env);\n    if (!get_object(env, argv[2], &mem_r, false, (void**)&dst_buffer))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[3], &src_offset))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[4], &dst_offset))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[5], &cb))\n\treturn enif_make_badarg(env);\n    if (!get_object_list(env, argv[6], &event_r, false,\n\t\t\t (void**) wait_list, &num_events))\n\treturn enif_make_badarg(env);\n    err = ECL_CALL(clEnqueueCopyBuffer)(o_queue->queue,\n\t\t\t      src_buffer,\n\t\t\t      dst_buffer,\n\t\t\t      src_offset,\n\t\t\t      dst_offset,\n\t\t\t      cb,\n\t\t\t      num_events,\n\t\t\t      num_events ? wait_list : 0,\n\t\t\t      &event);\n    if (!err) {\n\tERL_NIF_TERM t;\n\tt = ecl_make_event(env, event, false, false, 0, 0, o_queue);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    return ecl_make_error(env, err);\n}\n\n//\n// enqueue_copy_buffer_rect(_Queue, _SrcBuffer, _DstBuffer,\n//    SrcOrigin, DstOrigin, Region,\n//    SrcRowPitch, SrcSlicePitch,\n//    DstRowPitch, DstSlicePitch,\n//    WaitList) -> {'ok',Event} | {error,Error}\n//\n#if CL_VERSION_1_1 == 1\nstatic ERL_NIF_TERM ecl_enqueue_copy_buffer_rect(ErlNifEnv* env, int argc,\n\t\t\t\t\t\t const ERL_NIF_TERM argv[])\n{\n    ecl_object_t*    o_queue;\n    cl_mem           src_buffer;\n    cl_mem           dst_buffer;\n    size_t           src_origin[3];\n    size_t           dst_origin[3];\n    size_t           region[3];\n    size_t           src_row_pitch;\n    size_t           src_slice_pitch;\n    size_t           dst_row_pitch;\n    size_t           dst_slice_pitch;\n    cl_event         wait_list[MAX_WAIT_LIST];\n    cl_uint          num_events = MAX_WAIT_LIST;\n    size_t           num_src_origin = 3;\n    size_t           num_dst_origin = 3;\n    size_t           num_region = 3;\n    cl_event         event;\n    ErlNifBinary*    bin;\n    cl_int           err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))\n\treturn enif_make_badarg(env);\n    if (!get_object(env, argv[1], &mem_r, false, (void**)&src_buffer))\n\treturn enif_make_badarg(env);\n    if (!get_object(env, argv[2], &mem_r, false, (void**)&dst_buffer))\n\treturn enif_make_badarg(env);\n    src_origin[0] = src_origin[1] = src_origin[2] = 0;\n    if (!get_sizet_list(env, argv[3], src_origin, &num_src_origin))\n\treturn enif_make_badarg(env);\n    dst_origin[0] = dst_origin[1] = dst_origin[2] = 0;\n    if (!get_sizet_list(env, argv[4], dst_origin, &num_dst_origin))\n\treturn enif_make_badarg(env);\n    region[0] = region[1] = region[2] = 1;\n    if (!get_sizet_list(env, argv[5], region, &num_region))\n\treturn enif_make_badarg(env);\n\n    if (!ecl_get_sizet(env, argv[6], &src_row_pitch))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[7], &src_slice_pitch))\n\treturn enif_make_badarg(env);\n\n    if (!ecl_get_sizet(env, argv[8], &dst_row_pitch))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[9], &dst_slice_pitch))\n\treturn enif_make_badarg(env);\n\n    if (!get_object_list(env, argv[10], &event_r, false,\n\t\t\t (void**) wait_list, &num_events))\n\treturn enif_make_badarg(env);\n    if (!(bin = enif_alloc(sizeof(ErlNifBinary))))\n\treturn ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?\n\n    err = ECL_CALL(clEnqueueCopyBufferRect)(o_queue->queue,\n\t\t\t\t  src_buffer, dst_buffer,\n\t\t\t\t  src_origin, dst_origin,\n\t\t\t\t  region,\n\t\t\t\t  src_row_pitch, src_slice_pitch,\n\t\t\t\t  dst_row_pitch, dst_slice_pitch,\n\t\t\t\t  num_events,\n\t\t\t\t  num_events ? wait_list : 0,\n\t\t\t\t  &event);\n    if (!err) {\n\tERL_NIF_TERM t;\n\tt = ecl_make_event(env, event, true, false, 0, bin, o_queue);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    else {\n\tenif_free(bin);\n\treturn ecl_make_error(env, err);\n    }\n}\n#endif\n\nstatic ERL_NIF_TERM ecl_enqueue_copy_image(ErlNifEnv* env, int argc, \n\t\t\t\t\t   const ERL_NIF_TERM argv[])\n{\n    ecl_object_t*    o_queue;\n    cl_mem           src_image;\n    cl_mem           dst_image;\n    size_t           src_origin[3];\n    size_t           dst_origin[3];\n    size_t           region[3];\n    cl_event         wait_list[MAX_WAIT_LIST];\n    cl_uint          num_events = MAX_WAIT_LIST;\n    size_t           num_src_origin = 3;\n    size_t           num_dst_origin = 3;\n    size_t           num_region = 3;\n    cl_event         event;\n    cl_int           err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))\n\treturn enif_make_badarg(env);\n    if (!get_object(env, argv[1], &mem_r, false, (void**)&src_image))\n\treturn enif_make_badarg(env);\n    if (!get_object(env, argv[2], &mem_r, false, (void**)&dst_image))\n\treturn enif_make_badarg(env);\n    src_origin[0] = src_origin[1] = src_origin[2] = 0;\n    if (!get_sizet_list(env, argv[3], src_origin, &num_src_origin))\n\treturn enif_make_badarg(env);\n    dst_origin[0] = dst_origin[1] = dst_origin[2] = 0;\n    if (!get_sizet_list(env, argv[4], dst_origin, &num_dst_origin))\n\treturn enif_make_badarg(env);\n    region[0] = region[1] = region[2] = 1;\n    if (!get_sizet_list(env, argv[5], region, &num_region))\n\treturn enif_make_badarg(env);\n    if (!get_object_list(env, argv[6], &event_r, false,\n\t\t\t (void**) wait_list, &num_events))\n\treturn enif_make_badarg(env);\n    err = ECL_CALL(clEnqueueCopyImage)(o_queue->queue, src_image, dst_image,\n\t\t\t     src_origin,\n\t\t\t     dst_origin,\n\t\t\t     region,\n\t\t\t     num_events,\n\t\t\t     num_events ? wait_list : 0,\n\t\t\t     &event);\n    if (!err) {\n\tERL_NIF_TERM t;\n\tt = ecl_make_event(env, event, false, false, 0, 0, o_queue);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    return ecl_make_error(env, err);    \n}\n\n//\n//  cl:enqueue_fill_image(Queue,Image,FillColor,Origin,Region,WaitList) ->\n//  FillColor = <<R:32/unsigned,G:32/unsigned,B:32/unsigned,A:32/unsigned>>\n//            | <<R:32/signed,G:32/signed,B:32/signed,A:32/signed>>\n//            | <<R:32/float,G:32/float,B:32/float,A:32/float>>\n//            Use device endian! check device_info(D, endian_little)\n//\n//\n#if CL_VERSION_1_2 == 1\n\nstatic ERL_NIF_TERM ecl_enqueue_fill_image(ErlNifEnv* env, int argc,\n\t\t\t\t\t   const ERL_NIF_TERM argv[])\n{\n    ecl_object_t*    o_queue;\n    cl_mem           image;\n    ErlNifBinary     fill_color;\n    size_t           origin[3];\n    size_t           region[3];\n    cl_event         wait_list[MAX_WAIT_LIST];\n    cl_uint          num_events = MAX_WAIT_LIST;\n    size_t           num_origin = 3;\n    size_t           num_region = 3;\n    cl_event         event;\n    cl_int           err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))\n\treturn enif_make_badarg(env);\n    if (!get_object(env, argv[1], &mem_r, false, (void**)&image))\n\treturn enif_make_badarg(env);\n    if (!enif_inspect_binary(env, argv[2], &fill_color))\n\treturn enif_make_badarg(env);\n    if (fill_color.size != 4*4)\n\treturn enif_make_badarg(env);\n    origin[0] = origin[1] = origin[2] = 0;\n    if (!get_sizet_list(env, argv[3], origin, &num_origin))\n\treturn enif_make_badarg(env);\n    region[0] = region[1] = region[2] = 1;\n    if (!get_sizet_list(env, argv[4], region, &num_region))\n\treturn enif_make_badarg(env);\n    if (!get_object_list(env, argv[5], &event_r, false,\n\t\t\t (void**) wait_list, &num_events))\n\treturn enif_make_badarg(env);\n\n    err = ECL_CALL(clEnqueueFillImage)(o_queue->queue, image,\n\t\t\t     fill_color.data, // validate size etc!\n\t\t\t     origin,\n\t\t\t     region,\n\t\t\t     num_events,\n\t\t\t     num_events ? wait_list : 0,\n\t\t\t     &event);\n    if (!err) {\n\tERL_NIF_TERM t;\n\tt = ecl_make_event(env, event, false, false, 0, 0, o_queue);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    return ecl_make_error(env, err);\n}\n#endif\n\n// cl:enqueue_copy_image_to_buffer(_Queue, _SrcImage, _DstBuffer, \n//                                 _Origin, _Region,\n//\t\t\t           _DstOffset, _WaitList) ->\nstatic ERL_NIF_TERM ecl_enqueue_copy_image_to_buffer(ErlNifEnv* env, int argc, \n\t\t\t\t\t\t     const ERL_NIF_TERM argv[])\n{\n    ecl_object_t*    o_queue;\n    cl_mem           src_image;\n    cl_mem           dst_buffer;\n    size_t           origin[3];\n    size_t           region[3];\n    size_t           dst_offset;\n    cl_event         wait_list[MAX_WAIT_LIST];\n    cl_uint          num_events = MAX_WAIT_LIST;\n    size_t           num_src_origin = 3;\n    size_t           num_region = 3;\n    cl_event         event;\n    cl_int           err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))\n\treturn enif_make_badarg(env);\n    if (!get_object(env, argv[1], &mem_r, false, (void**)&src_image))\n\treturn enif_make_badarg(env);\n    if (!get_object(env, argv[2], &mem_r, false, (void**)&dst_buffer))\n\treturn enif_make_badarg(env);\n    origin[0] =  origin[1] = origin[2] = 0;\n    if (!get_sizet_list(env, argv[3], origin, &num_src_origin))\n\treturn enif_make_badarg(env);\n    region[0] = region[1] = region[2] = 1;\n    if (!get_sizet_list(env, argv[4], region, &num_region))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[5], &dst_offset))\n\treturn enif_make_badarg(env);\n    if (!get_object_list(env, argv[6], &event_r, false,\n\t\t\t (void**) wait_list, &num_events))\n\treturn enif_make_badarg(env);\n    err = ECL_CALL(clEnqueueCopyImageToBuffer)(o_queue->queue, \n\t\t\t\t     src_image,\n\t\t\t\t     dst_buffer,\n\t\t\t\t     origin,\n\t\t\t\t     region,\n\t\t\t\t     dst_offset,\n\t\t\t\t     num_events,\n\t\t\t\t     num_events ? wait_list : 0,\n\t\t\t\t     &event);\n    if (!err) {\n\tERL_NIF_TERM t;\n\tt = ecl_make_event(env, event, false, false, 0, 0, o_queue);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    return ecl_make_error(env, err);    \n}\n//\n// cl:enqueue_copy_buffer_to_image(_Queue, _SrcBuffer, _DstImage,\n//                                  _SrcOffset, _DstOrigin, \n//                                _Region, _WaitList) ->\n//\nstatic ERL_NIF_TERM ecl_enqueue_copy_buffer_to_image(ErlNifEnv* env, int argc, \n\t\t\t\t\t\t     const ERL_NIF_TERM argv[])\n{\n    ecl_object_t*    o_queue;\n    cl_mem           src_buffer;\n    cl_mem           dst_image;\n    size_t           src_offset;\n    size_t           origin[3];\n    size_t           region[3];\n    cl_event         wait_list[MAX_WAIT_LIST];\n    cl_uint          num_events = MAX_WAIT_LIST;\n    size_t           num_src_origin = 3;\n    size_t           num_region = 3;\n    cl_event         event;\n    cl_int           err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))\n\treturn enif_make_badarg(env);\n    if (!get_object(env, argv[1], &mem_r, false, (void**)&src_buffer))\n\treturn enif_make_badarg(env);\n    if (!get_object(env, argv[2], &mem_r, false, (void**)&dst_image))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[3], &src_offset))\n\treturn enif_make_badarg(env);\n    origin[0] =  origin[1] = origin[2] = 0;\n    if (!get_sizet_list(env, argv[4], origin, &num_src_origin))\n\treturn enif_make_badarg(env);\n    region[0] = region[1] = region[2] = 1;\n    if (!get_sizet_list(env, argv[5], region, &num_region))\n\treturn enif_make_badarg(env);\n    if (!get_object_list(env, argv[6], &event_r, false,\n\t\t\t (void**) wait_list, &num_events))\n\treturn enif_make_badarg(env);\n    err = ECL_CALL(clEnqueueCopyBufferToImage)(o_queue->queue, \n\t\t\t\t     src_buffer,\n\t\t\t\t     dst_image,\n\t\t\t\t     src_offset,\n\t\t\t\t     origin,\n\t\t\t\t     region,\n\t\t\t\t     num_events,\n\t\t\t\t     num_events ? wait_list : 0,\n\t\t\t\t     &event);\n    if (!err) {\n\tERL_NIF_TERM t;\n\tt = ecl_make_event(env, event, false, false, 0, 0, o_queue);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    return ecl_make_error(env, err);    \n}\n\nstatic ERL_NIF_TERM ecl_enqueue_map_buffer(ErlNifEnv* env, int argc, \n\t\t\t\t\t   const ERL_NIF_TERM argv[])\n{\n    ecl_object_t*    o_queue;\n    cl_mem           buffer;\n    cl_map_flags     map_flags;\n    size_t           offset;\n    size_t           size;\n    cl_event         wait_list[MAX_WAIT_LIST];\n    cl_uint          num_events = MAX_WAIT_LIST;\n    cl_event         event;\n    cl_int           err;\n    void*            ptr;\n    UNUSED(argc);\n    UNUSED(ptr);\n\n    if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))\n\treturn enif_make_badarg(env);\n    if (!get_object(env, argv[1], &mem_r, false, (void**)&buffer))\n\treturn enif_make_badarg(env);\n    if (!get_bitfields(env, argv[2], &map_flags, kv_map_flags))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[3], &offset))\n\treturn enif_make_badarg(env);\n    if (!ecl_get_sizet(env, argv[4], &size))\n\treturn enif_make_badarg(env);\n    if (!get_object_list(env, argv[5], &event_r, false,\n\t\t\t (void**) wait_list, &num_events))\n\treturn enif_make_badarg(env);\n\n    ptr = ECL_CALL(clEnqueueMapBuffer)(o_queue->queue,\n\t\t\t\t       buffer,\n\t\t\t\t       CL_FALSE,\n\t\t\t\t       map_flags,\n\t\t\t\t       offset,\n\t\t\t\t       size,\n\t\t\t\t       num_events,\n\t\t\t\t       num_events ? wait_list : 0,\n\t\t\t\t       &event,\n\t\t\t\t       &err);\n    if (!err) {\n\tERL_NIF_TERM t;\n\t// FIXME: how should we handle ptr????\n\tt = ecl_make_event(env, event, false, false, 0, 0, o_queue);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    return ecl_make_error(env, err);\n}\n\n//\n// enqueue_map_image(_Queue, _Image, _MapFlags, _Origin, _Region, _WaitList) ->\n//\nstatic ERL_NIF_TERM ecl_enqueue_map_image(ErlNifEnv* env, int argc, \n\t\t\t\t\t  const ERL_NIF_TERM argv[])\n{\n    ecl_object_t*    o_queue;\n    cl_mem           image;\n    cl_map_flags     map_flags;\n    size_t           origin[3];\n    size_t           region[3];\n    size_t           row_pitch;\n    size_t           slice_pitch;\n    cl_event         wait_list[MAX_WAIT_LIST];\n    cl_uint          num_events = MAX_WAIT_LIST;\n    size_t           num_origin = 3;\n    size_t           num_region = 3;\n    cl_event         event;\n    cl_int           err;\n    void*            ptr;\n    UNUSED(argc);\n    UNUSED(ptr);\n\n    if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))\n\treturn enif_make_badarg(env);\n    if (!get_object(env, argv[1], &mem_r, false, (void**)&image))\n\treturn enif_make_badarg(env);\n    if (!get_bitfields(env, argv[2], &map_flags, kv_map_flags))\n\treturn enif_make_badarg(env);\n    origin[0] = origin[1] = origin[2] = 0;\n    if (!get_sizet_list(env, argv[3], origin, &num_origin))\n\treturn enif_make_badarg(env);\n    region[0] = region[1] = region[2] = 1;\n    if (!get_sizet_list(env, argv[4], region, &num_region))\n\treturn enif_make_badarg(env);\n    if (!get_object_list(env, argv[5], &event_r, false,\n\t\t\t (void**) wait_list, &num_events))\n\treturn enif_make_badarg(env);\n\n    ptr = ECL_CALL(clEnqueueMapImage)(o_queue->queue,\n\t\t\t\t      image,\n\t\t\t\t      CL_FALSE,\n\t\t\t\t      map_flags,\n\t\t\t\t      origin,\n\t\t\t\t      region,\n\t\t\t\t      &row_pitch,\n\t\t\t\t      &slice_pitch,\n\t\t\t\t      num_events,\n\t\t\t\t      num_events ? wait_list : 0,\n\t\t\t\t      &event,\n\t\t\t\t      &err);\n    if (!err) {\n\tERL_NIF_TERM t;\n\t// FIXME: send binary+event to event thread\n\tt = ecl_make_event(env, event, false, false, 0, 0, o_queue);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    return ecl_make_error(env, err);    \n}\n\n//\n//  enqueue_unmap_mem_object(_Queue, _Mem, _WaitList) ->    \n//    \n//\nstatic ERL_NIF_TERM ecl_enqueue_unmap_mem_object(ErlNifEnv* env, int argc, \n\t\t\t\t\t\t const ERL_NIF_TERM argv[])\n{\n    ecl_object_t*    o_queue;\n    cl_mem           memobj;\n    cl_event         wait_list[MAX_WAIT_LIST];\n    cl_uint          num_events = MAX_WAIT_LIST;\n    cl_event         event;\n    void* mapped_ptr;\n    cl_int err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))\n\treturn enif_make_badarg(env);\n    if (!get_object(env, argv[1], &mem_r, false, (void**)&memobj))\n\treturn enif_make_badarg(env);\n    if (!get_object_list(env, argv[3], &event_r, false,\n\t\t\t (void**) wait_list, &num_events))\n\treturn enif_make_badarg(env);\n    mapped_ptr = 0;  // FIXME!!!!\n    \n    err = ECL_CALL(clEnqueueUnmapMemObject)(o_queue->queue, memobj,\n\t\t\t\t  mapped_ptr,\n\t\t\t\t  num_events,\n\t\t\t\t  num_events ? wait_list : 0,\n\t\t\t\t  &event);\n    if (!err) {\n\tERL_NIF_TERM t;\n\tt = ecl_make_event(env, event, false, false, 0, 0, o_queue);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    return ecl_make_error(env, err);    \n}\n\n#if CL_VERSION_1_2 == 1\n//\nstatic ERL_NIF_TERM ecl_enqueue_migrate_mem_objects(ErlNifEnv* env, int argc,\n\t\t\t\t\t\t    const ERL_NIF_TERM argv[])\n{\n    ecl_object_t*    o_queue;\n    cl_uint          num_mem_objects = MAX_MEM_OBJECTS;\n    cl_mem           mem_objects[MAX_MEM_OBJECTS];\n    cl_mem_migration_flags flags = 0;\n    cl_event         wait_list[MAX_WAIT_LIST];\n    cl_uint          num_events = MAX_WAIT_LIST;\n    cl_event         event;\n    cl_int err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))\n\treturn enif_make_badarg(env);\n    if (!get_object_list(env, argv[1], &mem_r, false,\n\t\t\t (void**) mem_objects, &num_mem_objects))\n\treturn enif_make_badarg(env);\n    if (!get_bitfields(env, argv[2], &flags, kv_migration_flags))\n\treturn enif_make_badarg(env);\n\n    if (!get_object_list(env, argv[3], &event_r, false,\n\t\t\t (void**) wait_list, &num_events))\n\treturn enif_make_badarg(env);\n\n    err = ECL_CALL(clEnqueueMigrateMemObjects)(o_queue->queue,\n\t\t\t\t     num_mem_objects,\n\t\t\t\t     num_mem_objects ? mem_objects : NULL,\n\t\t\t\t     flags,\n\t\t\t\t     num_events,\n\t\t\t\t     num_events ? wait_list : 0,\n\t\t\t\t     &event);\n    if (!err) {\n\tERL_NIF_TERM t;\n\tt = ecl_make_event(env, event, false, false, 0, 0, o_queue);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    return ecl_make_error(env, err);\n}\n#endif\n\ncl_int eclEnqueueBarrier(cl_command_queue queue)\n{\n    return ECL_CALL(clEnqueueBarrierWithWaitList)(queue,0,NULL,NULL);\n}\n\nstatic ERL_NIF_TERM ecl_enqueue_barrier(ErlNifEnv* env, int argc, \n\t\t\t\t\tconst ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_queue;\n    cl_int           err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))\n\treturn enif_make_badarg(env);\n    if(o_queue->version >= 12) {\n\tif (!(err = eclEnqueueBarrier(o_queue->queue))) {\n\t    return ATOM(ok);\n\t}\n    } else {  // deprecated in 1.2, available in 1.1\n\tif (!(err = ECL_CALL(clEnqueueBarrier)(o_queue->queue))) {\n\t    return ATOM(ok);\n\t}\n    }\n    return ecl_make_error(env, err);    \n}\n\n#if CL_VERSION_1_2 == 1\n//\n// cl:enqueue_barrier_with_wait_list(Queue::cl_queue(),\n//                                   WaitList::[cl_event()]) ->\n//    {'ok',cl_event()} | {'error', cl_error()}\n//\nstatic ERL_NIF_TERM ecl_enqueue_barrier_with_wait_list(ErlNifEnv* env,\n\t\t\t\t\t\t       int argc, \n\t\t\t\t\t\t       const ERL_NIF_TERM argv[])\n{\n    ecl_object_t*    o_queue;\n    cl_event         wait_list[MAX_WAIT_LIST];\n    cl_uint          num_events = MAX_WAIT_LIST;\n    cl_event         event;\n    cl_int           err;\n    cl_bool          want_event = true;  // make this an arg?\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))\n\treturn enif_make_badarg(env);\n    if (!get_object_list(env, argv[1], &event_r, false,\n\t\t\t (void**) wait_list, &num_events))\n\treturn enif_make_badarg(env);\n    err = ECL_CALL(clEnqueueBarrierWithWaitList)(o_queue->queue,num_events,\n\t\t\t\t\t\t num_events ? wait_list : NULL,\n\t\t\t\t\t\t want_event ? &event : NULL );\n    if (!err) {\n\tif (want_event) {\n\t    ERL_NIF_TERM t;\n\t    t = ecl_make_event(env, event, false, false, 0, 0, o_queue);\n\t    return enif_make_tuple2(env, ATOM(ok), t);\n\t}\n\treturn ATOM(ok);\n    }\n    return ecl_make_error(env, err);\n}\n\n//\n// cl:enqueue_marker_with_wait_list(Queue::cl_queue(),\n//                                   WaitList::[cl_event()]) ->\n//    {'ok',cl_event()} | {'error', cl_error()}\n//\nstatic ERL_NIF_TERM ecl_enqueue_marker_with_wait_list(ErlNifEnv* env,\n\t\t\t\t\t\t      int argc, \n\t\t\t\t\t\t      const ERL_NIF_TERM argv[])\n{\n    ecl_object_t*    o_queue;\n    cl_event      wait_list[MAX_WAIT_LIST];\n    cl_uint        num_events = MAX_WAIT_LIST;\n    cl_int           err;\n    cl_event         event;\n    cl_bool          want_event = true;  // make this an arg?\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))\n\treturn enif_make_badarg(env);\n    if (!get_object_list(env, argv[1], &event_r, false,\n\t\t\t (void**) wait_list, &num_events))\n\treturn enif_make_badarg(env);\n    err = ECL_CALL(clEnqueueMarkerWithWaitList)(o_queue->queue,num_events,\n\t\t\t\t\t\tnum_events ? wait_list : NULL,\n\t\t\t\t\t\twant_event ? &event : NULL );\n    if (!err) {\n\tif (want_event) {\n\t    ERL_NIF_TERM t;\n\t    t = ecl_make_event(env, event, false, false, 0, 0, o_queue);\n\t    return enif_make_tuple2(env, ATOM(ok), t);\n\t}\n\treturn ATOM(ok);\n    }\n    return ecl_make_error(env, err);\n}\n#endif\n\n\n//\n// cl:async_flush(Queue::cl_queue()) -> reference()\n//\nstatic ERL_NIF_TERM ecl_async_flush(ErlNifEnv* env, int argc, \n\t\t\t\t    const ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_queue;\n    ecl_context_t* o_context;\n    ecl_message_t m;\n    ERL_NIF_TERM ref;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))\n\treturn enif_make_badarg(env);\n    if (!(o_context = (ecl_context_t*) o_queue->parent)) // must have context\n\treturn enif_make_badarg(env);\n    if (!(m.env = enif_alloc_env()))\n\treturn ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?\n    ref = enif_make_ref(env);\n\n    m.type   = ECL_MESSAGE_FLUSH;\n    (void) enif_self(env, &m.sender);\n    m.ref    = enif_make_copy(m.env, ref);\n    m.queue  = o_queue;\n    // keep while operation is running, release after operation in the thread\n    enif_keep_resource(o_queue);\n    ecl_message_send(o_context->thr, &m);\n    return enif_make_tuple2(env, ATOM(ok), ref);\n}\n\n//\n// cl:async_finish(Queue::cl_queue()) -> reference()\n//\nstatic ERL_NIF_TERM ecl_async_finish(ErlNifEnv* env, int argc, \n\t\t\t\t     const ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_queue;\n    ecl_context_t* o_context;\n    ecl_message_t m;\n    ERL_NIF_TERM ref;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &command_queue_r, false, &o_queue))\n\treturn enif_make_badarg(env);\n    if (!(o_context = (ecl_context_t*) o_queue->parent)) // must have context\n\treturn enif_make_badarg(env);\n    if (!(m.env = enif_alloc_env()))\n\treturn ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?\n    ref = enif_make_ref(env);\n\n    m.type   =  ECL_MESSAGE_FINISH;\n    (void) enif_self(env, &m.sender);\n    m.ref    = enif_make_copy(m.env, ref);\n    m.queue  = o_queue;\n    // keep while operation is running, release after operation in the thread\n    enif_keep_resource(o_queue);   \n    ecl_message_send(o_context->thr, &m);\n    return enif_make_tuple2(env, ATOM(ok), ref);\n}\n//\n// cl:async_wait_for_event(Event) -> {ok,Ref} | {error,Reason}\n// async reply {cl_event, Ref, Result}\n//\nstatic ERL_NIF_TERM ecl_async_wait_for_event(ErlNifEnv* env, int argc, \n\t\t\t\t\t     const ERL_NIF_TERM argv[])\n{\n    ecl_event_t* o_event;\n    ecl_object_t* o_queue;\n    ecl_context_t* o_context;\n    ecl_message_t m;\n    ERL_NIF_TERM ref;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0],&event_r,false,(ecl_object_t**)&o_event))\n\treturn enif_make_badarg(env);\n    if (!(o_queue = o_event->obj.parent))  // queue not found !\n\treturn enif_make_badarg(env);\n    if (!(o_context = (ecl_context_t*) o_queue->parent)) // must have context\n\treturn enif_make_badarg(env);\n    if (!(m.env = enif_alloc_env()))\n\treturn ecl_make_error(env, CL_OUT_OF_RESOURCES);  // enomem?\n    ref = enif_make_ref(env);\n\n    m.type   = ECL_MESSAGE_WAIT_FOR_EVENT;\n    (void) enif_self(env, &m.sender);\n    m.ref    = enif_make_copy(m.env, ref);\n    m.event  = o_event;\n    // keep while operation is running, release after operation in the thread\n    enif_keep_resource(o_event);\n    ecl_message_send(o_context->thr, &m);\n    return enif_make_tuple2(env, ATOM(ok), ref);\n}\n\n// return event info\nstatic ERL_NIF_TERM ecl_get_event_info(ErlNifEnv* env, int argc, \n\t\t\t\t       const ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_event;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &event_r, false, &o_event))\n\treturn enif_make_badarg(env);\n    return make_object_info(env, argv[1], o_event,\n\t\t\t    (info_fn_t*) ECL_FUNC_PTR(clGetEventInfo),\n\t\t\t    event_info,\n\t\t\t    sizeof_array(event_info));\n}\n\n// return event profiling info\nstatic ERL_NIF_TERM ecl_get_event_profiling_info(ErlNifEnv* env, int argc, \n\t\t\t\t\t\t const ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_event;\n    UNUSED(argc);\n    \n    if (!get_ecl_object(env, argv[0], &event_r, false, &o_event))\n\treturn enif_make_badarg(env);\n    return make_object_info(env, argv[1], o_event,\n\t\t\t    (info_fn_t*) ECL_FUNC_PTR(clGetEventProfilingInfo),\n\t\t\t    event_profile_info,\n\t\t\t    sizeof_array(event_profile_info));    \n}\n\n\n#if CL_VERSION_2_0 == 1\n\nstatic ERL_NIF_TERM ecl_create_pipe(ErlNifEnv* env, int argc,\n\t\t\t\t    const ERL_NIF_TERM argv[])\n{\n    ecl_object_t* o_context;\n    cl_mem_flags flags;\n    cl_mem mem;\n    cl_uint pipe_packet_size;\n    cl_uint pipe_max_packets;\n    cl_int err;\n    UNUSED(argc);\n\n    if (!get_ecl_object(env, argv[0], &context_r, false, &o_context))\n\treturn enif_make_badarg(env);\n    if (!get_bitfields(env, argv[1], &flags, kv_mem_flags))\n\treturn enif_make_badarg(env);\n    if (!enif_get_uint(env, argv[2], &pipe_packet_size))\n\treturn enif_make_badarg(env);\n    if (!enif_get_uint(env, argv[3], &pipe_max_packets))\n\treturn enif_make_badarg(env);\n\n    DBG(\"context version: %d\", o_context->version);\n    if (o_context->version < 20)\n\terr = CL_INVALID_CONTEXT;\n    else\n\tmem = ECL_CALL(clCreatePipe)(o_context->context,\n\t\t\t\t     flags,\n\t\t\t\t     pipe_packet_size,\n\t\t\t\t     pipe_max_packets,\n\t\t\t\t     NULL, &err);\n    if (!err) {\n\tERL_NIF_TERM t;\n\tt = ecl_make_object(env, &mem_r,(void*) mem, o_context);\n\treturn enif_make_tuple2(env, ATOM(ok), t);\n    }\n    return ecl_make_error(env, err);\n}\n\n#endif\n\nstatic cl_uint get_version(char *version)\n{\n    cl_uint ver = 0;\n    version += 7;\n    if(*version >= '0' && *version <= '9')\n\tver += (*version-'0')*10;\n    version++;\n    if(*version == '.') {\n\tversion++;\n\tif(*version >= '0' && *version <= '9')\n\t    ver += (*version-'0');\n    }\n    /* fprintf(stderr, \"V3 %s %d\\r\\n\", version, ver); */\n    return ver;\n}\n\n// pre-Load Platform Ids and Device Ids, this will make the \n// internal IDs kind of static for the application code. The IDs\n// can then be used in matching etc.\n\nstatic int ecl_pre_load(ErlNifEnv* env, ecl_env_t* ecl, cl_int* rerr)\n{\n    cl_platform_id   platform_id[MAX_PLATFORMS];\n    cl_int           platform_ver[MAX_PLATFORMS];\n    cl_uint          num_platforms;\n    cl_uint          i;\n    cl_int           err;\n    \n    if ((err = ECL_CALL(clGetPlatformIDs)\n\t (MAX_PLATFORMS, platform_id, &num_platforms))) {\n\t*rerr = err;\n\treturn -1;\n    }\n\n    ecl->platform = enif_alloc(num_platforms*sizeof(ecl_platform_t*));\n    ecl->nplatforms = num_platforms;\n    ecl->icd_version = 11;\n\n    // first calculate the icd_version (as max of platform versions)\n    for (i = 0; i < num_platforms; i++) {\n\tchar             version[128];\n\tif(CL_SUCCESS == ECL_CALL(clGetPlatformInfo)\n\t   (platform_id[i], CL_PLATFORM_VERSION, 64, version, NULL)) {\n\t    platform_ver[i] = get_version(version);\n\t    \n\t    if (platform_ver[i] >  ecl->icd_version)\n\t\tecl->icd_version = platform_ver[i];\n\t}\n    }\n\n    for (i = 0; i < num_platforms; i++) {\n\tecl_object_t* obj;\n\tcl_device_id     device_id[MAX_DEVICES];\n\tcl_uint          num_devices;\n\tcl_uint          j;\n\tchar             version[128];\n\tcl_int           ver = -1;\n\n\tobj = ecl_new(env, &platform_r,platform_id[i],0,platform_ver[i]);\n\tecl->platform[i].o_platform = obj;\n\n\tif ((err = ECL_CALL(clGetDeviceIDs)\n\t     (platform_id[i], CL_DEVICE_TYPE_ALL,\n\t      MAX_DEVICES, device_id, &num_devices))) {\n\t    *rerr = err;\n\t    return -1;\n\t}\n\tDBG(\"platform: %d, ver=%d\", i, platform_ver[i]);\n\tecl->platform[i].o_device=enif_alloc(num_devices*sizeof(ecl_object_t));\n\tecl->platform[i].ndevices = num_devices;\n\tfor (j = 0; j < num_devices; j++) {\n\t    ver = ecl->icd_version; // assumed version\n\t    if(CL_SUCCESS == ECL_CALL(clGetDeviceInfo)\n\t       (device_id[j], CL_DEVICE_VERSION, 64, version, NULL)) {\n\t\tver = get_version(version);\n\t    }\n\t    obj = ecl_new(env, &device_r, device_id[j],0, ver);\n\t    ecl->platform[i].o_device[j] = obj;\n\t    DBG(\"  device:%d, ver=%d\", j, ver);\n\t}\n    }\n    DBG(\"icd: ver=%d\", ecl->icd_version);\n    return 0;\n}\n\nstatic int  ecl_load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info)\n{\n    ErlNifResourceFlags tried;\n    ecl_env_t* ecl;\n    cl_int err;\n    lhash_func_t func = { ref_hash, ref_cmp, ref_release, 0 };\n    ErlNifSysInfo sys_info;\n    UNUSED(env);\n    UNUSED(load_info);\n\n    DBG(\"ecl_load\");\n\n    if (!(ecl = enif_alloc(sizeof(ecl_env_t))))\n\treturn -1;\n    ecl->ref_count = 1;\n    ecl->context_list = NULL;\n\n    if (!(ecl->ref_lock = enif_rwlock_create(\"ref_lock\")))\n\treturn -1;\n    if (!(ecl->context_list_lock = enif_rwlock_create(\"context_list_lock\")))\n\treturn -1;\n    if (ecl_queue_init(&ecl->q) < 0)\n\treturn -1;\n    lhash_init(&ecl->ref, \"ref\", 2, &func);\n\n    DBG(\"ecl_load: ecl=%p\", ecl);\n    DBG(\"ecl_load: ecl->context_list_lock=%p\", ecl->context_list_lock);\n\n#if (ERL_NIF_MAJOR_VERSION > 2) || ((ERL_NIF_MAJOR_VERSION == 2) && (ERL_NIF_MINOR_VERSION >= 7))    \n    enif_system_info(&sys_info, sizeof(sys_info));\n    ecl->dirty_scheduler_support = sys_info.dirty_scheduler_support;\n#else\n    ecl->dirty_scheduler_support = 0;\n#endif\n    DBG(\"dirty_scheduler_support = %d\", ecl->dirty_scheduler_support);\n    \n    // load OpenCL functions\n    if (ecl_load_dynfunctions(ecl) < 0)\n\treturn -1;\n\n    // Load atoms\n\n    // General atoms\n    LOAD_ATOM(ok);\n    LOAD_ATOM(error);\n    LOAD_ATOM(unknown);\n    LOAD_ATOM(undefined);\n    LOAD_ATOM(true);\n    LOAD_ATOM(false);\n\n    // async messages\n    LOAD_ATOM(cl_async);\n    LOAD_ATOM(cl_event);\n    \n    // Type names\n    LOAD_ATOM(platform_t);\n    LOAD_ATOM(device_t);\n    LOAD_ATOM(context_t);\n    LOAD_ATOM(command_queue_t);\n    LOAD_ATOM(mem_t);\n    LOAD_ATOM(sampler_t);\n    LOAD_ATOM(program_t);\n    LOAD_ATOM(kernel_t);\n    LOAD_ATOM(event_t);\n\n    LOAD_ATOM(char);\n    LOAD_ATOM(char2);\n    LOAD_ATOM(char4);\n    LOAD_ATOM(char8);\n    LOAD_ATOM(char16);\n\n    LOAD_ATOM(uchar);\n    LOAD_ATOM(uchar2);\n    LOAD_ATOM(uchar4);\n    LOAD_ATOM(uchar8);\n    LOAD_ATOM(uchar16);\n\n    LOAD_ATOM(short);\n    LOAD_ATOM(short2);\n    LOAD_ATOM(short4);\n    LOAD_ATOM(short8);\n    LOAD_ATOM(short16);\n\n    LOAD_ATOM(ushort);\n    LOAD_ATOM(ushort2);\n    LOAD_ATOM(ushort4);\n    LOAD_ATOM(ushort8);\n    LOAD_ATOM(ushort16);\n\n    LOAD_ATOM(int);\n    LOAD_ATOM(int2);\n    LOAD_ATOM(int4);\n    LOAD_ATOM(int8);\n    LOAD_ATOM(int16);\n\n    LOAD_ATOM(uint);\n    LOAD_ATOM(uint2);\n    LOAD_ATOM(uint4);\n    LOAD_ATOM(uint8);\n    LOAD_ATOM(uint16);\n\n    LOAD_ATOM(long);\n    LOAD_ATOM(long2);\n    LOAD_ATOM(long4);\n    LOAD_ATOM(long8);\n    LOAD_ATOM(long16);\n\n    LOAD_ATOM(ulong);\n    LOAD_ATOM(ulong2);\n    LOAD_ATOM(ulong4);\n    LOAD_ATOM(ulong8);\n    LOAD_ATOM(ulong16);\n\n    LOAD_ATOM(half);\n\n    LOAD_ATOM(float);\n    LOAD_ATOM(float2);\n    LOAD_ATOM(float4);\n    LOAD_ATOM(float8);\n    LOAD_ATOM(float16);\n\n    LOAD_ATOM(double);\n    LOAD_ATOM(double2);\n    LOAD_ATOM(double4);\n    LOAD_ATOM(double8);\n    LOAD_ATOM(double16);\n\n    // records\n    LOAD_ATOM(cl_image_desc);\n    LOAD_ATOM(cl_image_format);\n\n    // channel type\n    LOAD_ATOM(snorm_int8);\n    LOAD_ATOM(snorm_int16);\n    LOAD_ATOM(unorm_int8);\n    LOAD_ATOM(unorm_int16);\n    LOAD_ATOM(unorm_int24);\n    LOAD_ATOM(unorm_short_565);\n    LOAD_ATOM(unorm_short_555);\n    LOAD_ATOM(unorm_int_101010);\n    LOAD_ATOM(signed_int8);\n    LOAD_ATOM(signed_int16);\n    LOAD_ATOM(signed_int32);\n    LOAD_ATOM(unsigned_int8);\n    LOAD_ATOM(unsigned_int16);\n    LOAD_ATOM(unsigned_int32);\n    LOAD_ATOM(half_float);\n\n    // channel order\n    LOAD_ATOM(r);\n    LOAD_ATOM(a);\n    LOAD_ATOM(rg);\n    LOAD_ATOM(ra);\n    LOAD_ATOM(rgb);\n    LOAD_ATOM(rgba);\n    LOAD_ATOM(bgra);\n    LOAD_ATOM(argb);\n    LOAD_ATOM(intensity);\n    LOAD_ATOM(luminance);\n    LOAD_ATOM(rx);\n    LOAD_ATOM(rgx);\n    LOAD_ATOM(rgbx);\n    LOAD_ATOM(depth);\n    LOAD_ATOM(depth_stencil);\n\n    // partition_property \n    LOAD_ATOM(equally);\n    LOAD_ATOM(by_counts);\n    LOAD_ATOM(by_counts_list_end);\n    LOAD_ATOM(by_affinity_domain);\n\n    // affinity_domain\n    LOAD_ATOM(numa);\n    LOAD_ATOM(l4_cache);\n    LOAD_ATOM(l3_cache);\n    LOAD_ATOM(l2_cache);\n    LOAD_ATOM(l1_cache);\n    LOAD_ATOM(next_partitionable);\n\n    // Load options & flags\n\n    // Device info\n    LOAD_ATOM(type);\n    LOAD_ATOM(vendor_id);\n    LOAD_ATOM(max_compute_units);\n    LOAD_ATOM(max_work_item_dimensions);\n    LOAD_ATOM(max_work_group_size);\n    LOAD_ATOM(max_work_item_sizes);\n    LOAD_ATOM(preferred_vector_width_char);\n    LOAD_ATOM(preferred_vector_width_short);\n    LOAD_ATOM(preferred_vector_width_int);\n    LOAD_ATOM(preferred_vector_width_long);\n    LOAD_ATOM(preferred_vector_width_float);\n    LOAD_ATOM(preferred_vector_width_double);\n    LOAD_ATOM(max_clock_frequency);\n    LOAD_ATOM(address_bits);\n    LOAD_ATOM(max_read_image_args);\n    LOAD_ATOM(max_write_image_args);\n    LOAD_ATOM(max_read_write_image_args);\n    LOAD_ATOM(il_version);    \n    LOAD_ATOM(max_mem_alloc_size);\n    LOAD_ATOM(image2d_max_width);\n    LOAD_ATOM(image2d_max_height);\n    LOAD_ATOM(image3d_max_width);\n    LOAD_ATOM(image3d_max_height);\n    LOAD_ATOM(image3d_max_depth);\n    LOAD_ATOM(image_support);\n    LOAD_ATOM(max_parameter_size);\n    LOAD_ATOM(max_samplers);\n    LOAD_ATOM(mem_base_addr_align);\n    LOAD_ATOM(min_data_type_align_size);\n    LOAD_ATOM(single_fp_config);\n    LOAD_ATOM(global_mem_cache_type);\n    LOAD_ATOM(global_mem_cacheline_size);\n    LOAD_ATOM(global_mem_cache_size);\n    LOAD_ATOM(global_mem_size);\n    LOAD_ATOM(max_constant_buffer_size);\n    LOAD_ATOM(max_constant_args);\n    LOAD_ATOM(local_mem_type);\n    LOAD_ATOM(local_mem_size);\n    LOAD_ATOM(error_correction_support);\n    LOAD_ATOM(profiling_timer_resolution);\n    LOAD_ATOM(endian_little);\n    LOAD_ATOM(available);\n    LOAD_ATOM(compiler_available);\n    LOAD_ATOM(execution_capabilities);\n    LOAD_ATOM(queue_properties);\n    LOAD_ATOM(name);\n    LOAD_ATOM(vendor);\n    LOAD_ATOM(driver_version);\n    LOAD_ATOM(profile);\n    LOAD_ATOM(version);\n    LOAD_ATOM(extensions);\n    LOAD_ATOM(platform);\n\n    LOAD_ATOM(double_fp_config);\n    LOAD_ATOM(half_fp_config);\n    LOAD_ATOM(preferred_vector_width_half);\n    LOAD_ATOM(host_unified_memory);\n    LOAD_ATOM(native_vector_width_char);\n    LOAD_ATOM(native_vector_width_short);\n    LOAD_ATOM(native_vector_width_int);\n    LOAD_ATOM(native_vector_width_long);\n    LOAD_ATOM(native_vector_width_float);\n    LOAD_ATOM(native_vector_width_double);\n    LOAD_ATOM(native_vector_width_half);\n    LOAD_ATOM(opencl_c_version);\n    LOAD_ATOM(linker_available);\n    LOAD_ATOM(built_in_kernels);\n    LOAD_ATOM(image_max_buffer_size);\n    LOAD_ATOM(image_max_array_size);\n    LOAD_ATOM(parent_device);\n    LOAD_ATOM(partition_max_sub_devices);\n    LOAD_ATOM(partition_properties);\n    LOAD_ATOM(partition_affinity_domain);\n    LOAD_ATOM(partition_type);\n    LOAD_ATOM(reference_count);\n    LOAD_ATOM(preferred_interop_user_sync);\n    LOAD_ATOM(printf_buffer_size);\n    LOAD_ATOM(image_pitch_alignment);\n    LOAD_ATOM(image_base_address_alignment);\n    // cl_nv_device_attribute_query extension\n    LOAD_ATOM(compute_capability_major_nv);\n    LOAD_ATOM(compute_capability_minor_nv);\n    LOAD_ATOM(registers_per_block_nv);\n    LOAD_ATOM(warp_size_nv);\n    LOAD_ATOM(gpu_overlap_nv);\n    LOAD_ATOM(kernel_exec_timeout_nv);\n    LOAD_ATOM(device_integrated_memory_nv);\n\n     // Platform info\n    LOAD_ATOM(profile);\n    LOAD_ATOM(version);\n    LOAD_ATOM(name);\n    LOAD_ATOM(vendor);\n    LOAD_ATOM(extensions);\n\n     // Context info\n    LOAD_ATOM(reference_count);\n    LOAD_ATOM(devices);\n    LOAD_ATOM(properties);\n\n    // Queue info\n    LOAD_ATOM(context);\n    LOAD_ATOM(num_devices);\n    LOAD_ATOM(device);\n    LOAD_ATOM(reference_count);\n    LOAD_ATOM(properties);\n\n    // Mem info\n    LOAD_ATOM(object_type);\n    LOAD_ATOM(flags);\n    LOAD_ATOM(size);\n    LOAD_ATOM(host_ptr);\n    LOAD_ATOM(map_count);\n    LOAD_ATOM(reference_count); \n    LOAD_ATOM(context);\n\n    // Image info\n    LOAD_ATOM(format);\n    LOAD_ATOM(element_size);\n    LOAD_ATOM(row_pitch);\n    LOAD_ATOM(slice_pitch);\n    LOAD_ATOM(width);\n    LOAD_ATOM(height);\n    LOAD_ATOM(depth);\n\n    // Sampler info\n    LOAD_ATOM(reference_count);\n    LOAD_ATOM(context);\n    LOAD_ATOM(normalized_coords);\n    LOAD_ATOM(addressing_mode);\n    LOAD_ATOM(filter_mode);\n\n    // Program info\n    LOAD_ATOM(reference_count);\n    LOAD_ATOM(context);\n    LOAD_ATOM(num_decices);\n    LOAD_ATOM(devices);\n    LOAD_ATOM(source); \n    LOAD_ATOM(binary_sizes);\n    LOAD_ATOM(binaries);\n\n    // Build Info\n    LOAD_ATOM(status);\n    LOAD_ATOM(options);\n    LOAD_ATOM(log);\n    LOAD_ATOM(binary_type);\n\n    // Kernel Info\n    LOAD_ATOM(function_name);\n    LOAD_ATOM(num_args);\n    LOAD_ATOM(reference_count);\n    LOAD_ATOM(context);\n    LOAD_ATOM(program);\n\n    // Event Info\n    LOAD_ATOM(command_queue);\n    LOAD_ATOM(command_type);\n    LOAD_ATOM(reference_count);\n    LOAD_ATOM(execution_status);\n\n    // Event Profile Info\n    LOAD_ATOM(command_queued);\n    LOAD_ATOM(command_submit);\n    LOAD_ATOM(command_start);\n    LOAD_ATOM(command_end);\n    LOAD_ATOM(command_complete);\n\n    // Workgroup info\n    LOAD_ATOM(work_group_size);\n    LOAD_ATOM(compile_work_group_size);\n    LOAD_ATOM(local_mem_size);\n    LOAD_ATOM(preferred_work_group_size_multiple);\n    LOAD_ATOM(private_mem_size);\n    LOAD_ATOM(global_work_size);\n\n    // Error codes\n    LOAD_ATOM(device_not_found);\n    LOAD_ATOM(device_not_available);\n    LOAD_ATOM(compiler_not_available);\n    LOAD_ATOM(mem_object_allocation_failure);\n    LOAD_ATOM(out_of_resources);\n    LOAD_ATOM(out_of_host_memory);\n    LOAD_ATOM(profiling_info_not_available);\n    LOAD_ATOM(mem_copy_overlap);\n    LOAD_ATOM(image_format_mismatch);\n    LOAD_ATOM(image_format_not_supported);\n    LOAD_ATOM(build_program_failure);\n    LOAD_ATOM(map_failure);\n    LOAD_ATOM(invalid_value);\n    LOAD_ATOM(invalid_device_type);\n    LOAD_ATOM(invalid_platform);\n    LOAD_ATOM(invalid_device);\n    LOAD_ATOM(invalid_context);\n    LOAD_ATOM(invalid_queue_properties);\n    LOAD_ATOM(invalid_command_queue);\n    LOAD_ATOM(invalid_host_ptr);\n    LOAD_ATOM(invalid_mem_object);\n    LOAD_ATOM(invalid_image_format_descriptor);\n    LOAD_ATOM(invalid_image_size);\n    LOAD_ATOM(invalid_sampler);\n    LOAD_ATOM(invalid_binary);\n    LOAD_ATOM(invalid_build_options);\n    LOAD_ATOM(invalid_program);\n    LOAD_ATOM(invalid_program_executable);\n    LOAD_ATOM(invalid_kernel_name);\n    LOAD_ATOM(invalid_kernel_definition);\n    LOAD_ATOM(invalid_kernel);\n    LOAD_ATOM(invalid_arg_index);\n    LOAD_ATOM(invalid_arg_value);\n    LOAD_ATOM(invalid_arg_size);\n    LOAD_ATOM(invalid_kernel_args);\n    LOAD_ATOM(invalid_work_dimension);\n    LOAD_ATOM(invalid_work_group_size);\n    LOAD_ATOM(invalid_work_item_size);\n    LOAD_ATOM(invalid_global_offset);\n    LOAD_ATOM(invalid_event_wait_list);\n    LOAD_ATOM(invalid_event);\n    LOAD_ATOM(invalid_operation);\n    LOAD_ATOM(invalid_gl_object);\n    LOAD_ATOM(invalid_buffer_size);\n    LOAD_ATOM(invalid_mip_level);\n    LOAD_ATOM(invalid_global_work_size);\n    LOAD_ATOM(device_partition_failed);\n    LOAD_ATOM(invalid_device_partition_count);\n\n    // cl_device_type\n    LOAD_ATOM(all);\n    LOAD_ATOM(default);\n    LOAD_ATOM(cpu);\n    LOAD_ATOM(gpu);\n    LOAD_ATOM(accelerator);\n    LOAD_ATOM(custom);\n\n    // fp_config\n    LOAD_ATOM(denorm);\n    LOAD_ATOM(inf_nan);\n    LOAD_ATOM(round_to_nearest);\n    LOAD_ATOM(round_to_zero);\n    LOAD_ATOM(round_to_inf);\n    LOAD_ATOM(fma);\n    LOAD_ATOM(soft_float);\n    LOAD_ATOM(correctly_rounded_divide_sqrt);\n\n    // mem_cache_type\n    LOAD_ATOM(none);\n    LOAD_ATOM(read_only);\n    LOAD_ATOM(read_write);\n\n    // local_mem_type\n    LOAD_ATOM(local);\n    LOAD_ATOM(global);\n\n    // exec capability\n    LOAD_ATOM(kernel);\n    LOAD_ATOM(native_kernel);\n\n    // command_queue_properties\n    LOAD_ATOM(out_of_order_exec_mode_enable);\n    LOAD_ATOM(profiling_enable);\n\n    // mem_flags\n    LOAD_ATOM(read_write);\n    LOAD_ATOM(write_only);\n    LOAD_ATOM(read_only);\n    LOAD_ATOM(use_host_ptr);\n    LOAD_ATOM(alloc_host_ptr);\n    LOAD_ATOM(copy_host_ptr);\n\n    // migration_flags\n    LOAD_ATOM(host);\n    LOAD_ATOM(content_undefined);\n\n    // mem_object_type\n    LOAD_ATOM(buffer);\n    LOAD_ATOM(image2d);\n    LOAD_ATOM(image3d);\n    LOAD_ATOM(image2d_array);\n    LOAD_ATOM(image1d);\n    LOAD_ATOM(image1d_array);\n    LOAD_ATOM(image1d_buffer);\n    LOAD_ATOM(pipe);\n\n    // addressing_mode\n    LOAD_ATOM(none);\n    LOAD_ATOM(clamp_to_edge);\n    LOAD_ATOM(clamp);\n    LOAD_ATOM(repeat);\n\n    // filter_mode\n    LOAD_ATOM(nearest);\n    LOAD_ATOM(linear);\n\n    // map_flags\n    LOAD_ATOM(read);\n    LOAD_ATOM(write);\n\n    // build_status\n    LOAD_ATOM(success);\n    LOAD_ATOM(none);\n    LOAD_ATOM(error);\n    LOAD_ATOM(in_progress);\n\n    // program_binary_type\n    LOAD_ATOM(none);\n    LOAD_ATOM(compiled_object);\n    LOAD_ATOM(library);\n    LOAD_ATOM(executable);\n\n    // command_type\n    LOAD_ATOM(ndrange_kernel);\n    LOAD_ATOM(task);\n    LOAD_ATOM(native_kernel);\n    LOAD_ATOM(read_buffer);\n    LOAD_ATOM(write_buffer);\n    LOAD_ATOM(copy_buffer);\n    LOAD_ATOM(read_image);\n    LOAD_ATOM(write_image);\n    LOAD_ATOM(copy_image);\n    LOAD_ATOM(copy_image_to_buffer);\n    LOAD_ATOM(copy_buffer_to_image);\n    LOAD_ATOM(map_buffer);\n    LOAD_ATOM(map_image);\n    LOAD_ATOM(unmap_mem_object);\n    LOAD_ATOM(marker);\n    LOAD_ATOM(aquire_gl_objects);\n    LOAD_ATOM(release_gl_objects);\n    LOAD_ATOM(migreate_mem_objects);\n    LOAD_ATOM(fill_buffer);\n    LOAD_ATOM(fill_image);\n\n    // execution_status\n    LOAD_ATOM(complete);\n    LOAD_ATOM(running);\n    LOAD_ATOM(submitted);\n    LOAD_ATOM(queued);\n\n    // arguments\n    LOAD_ATOM(region);\n\n    LOAD_ATOM(global);\n    LOAD_ATOM(local);\n    LOAD_ATOM(constant);\n    LOAD_ATOM(private);\n\n    LOAD_ATOM(read_only);\n    LOAD_ATOM(write_only);\n    LOAD_ATOM(read_write);\n    LOAD_ATOM(none);\n\n    LOAD_ATOM(none);\n    LOAD_ATOM(const);\n    LOAD_ATOM(restrict);\n    LOAD_ATOM(volatile);\n\n    LOAD_ATOM(address_qualifier);\n    LOAD_ATOM(access_qualifier);\n    LOAD_ATOM(type_name);\n    LOAD_ATOM(type_qualifier);\n    LOAD_ATOM(name);\n\n    // Create resource types\n    ecl_resource_init(env, &platform_r, \"platform_t\", \n\t\t      sizeof(ecl_object_t),\n\t\t      ecl_platform_dtor,\n\t\t      ERL_NIF_RT_CREATE, &tried);\n    ecl_resource_init(env, &device_r, \"device_t\",\n\t\t      sizeof(ecl_object_t),\n\t\t      ecl_device_dtor,\n\t\t      ERL_NIF_RT_CREATE, &tried);\n    ecl_resource_init(env, &context_r, \"context_t\",\n\t\t      sizeof(ecl_context_t),     // NOTE! specialized!\n\t\t      ecl_context_dtor,\n\t\t      ERL_NIF_RT_CREATE, &tried);\n    ecl_resource_init(env, &command_queue_r, \"command_queue_t\",\n\t\t      sizeof(ecl_object_t),\n\t\t      ecl_queue_dtor,\n\t\t      ERL_NIF_RT_CREATE, &tried);\n    ecl_resource_init(env, &mem_r, \"mem_t\", \n\t\t      sizeof(ecl_object_t),\n\t\t      ecl_mem_dtor,\n\t\t      ERL_NIF_RT_CREATE, &tried);\n    ecl_resource_init(env, &sampler_r, \"sampler_t\",\n\t\t      sizeof(ecl_object_t),\n\t\t      ecl_sampler_dtor,\n\t\t      ERL_NIF_RT_CREATE, &tried);\n    ecl_resource_init(env, &program_r, \"program_t\",\n\t\t      sizeof(ecl_object_t),\n\t\t      ecl_program_dtor,\n\t\t      ERL_NIF_RT_CREATE, &tried);\n    ecl_resource_init(env, &kernel_r, \"kernel_t\",\n\t\t      sizeof(ecl_kernel_t),   // NOTE! specialized!\n\t\t      ecl_kernel_dtor,\n\t\t      ERL_NIF_RT_CREATE, &tried);\n    ecl_resource_init(env, &event_r, \"event_t\",\n\t\t      sizeof(ecl_event_t),    // NOTE! specialized!\n\t\t      ecl_event_dtor,\n\t\t      ERL_NIF_RT_CREATE, &tried);\n    *priv_data = ecl;\n\n    if (ecl_pre_load(env, ecl, &err) < 0) {\n\tCL_ERROR(\"ecl_pre_load: error code = %d\", err);\n    }\n\n    return 0;\n}\n\n#ifdef WIN32\n#define RTLD_LAZY 0\n#define OPENCL_LIB \"opencl.dll\"\ntypedef HMODULE DL_LIB_P;\nvoid * dlsym(HMODULE Lib, const char *func) {\n    return (void *) GetProcAddress(Lib, func);\n}\n\nHMODULE dlopen(const CHAR *DLL, int unused) {\n  UNUSED(unused);\n  return LoadLibrary(DLL);\n}\n#else\ntypedef void * DL_LIB_P;\n# ifdef DARWIN\n#   define OPENCL_LIB \"/System/Library/Frameworks/OpenCL.framework/OpenCL\"\n# else\n#   define OPENCL_LIB \"libOpenCL.so\"\n# endif\n#endif\n\nstatic int ecl_load_dynfunctions(ecl_env_t* ecl)\n{\n    DL_LIB_P handle;\n//    if(ecl->icd_version < 12)\n//\treturn;\n    if ((handle = dlopen(OPENCL_LIB, RTLD_LAZY))) {\n\tint i = 0;\n\n\twhile(ecl_function[i].name != NULL) {\n\t    if (ecl_function[i].func != NULL) {\n\t\tfprintf(stderr, \"function %s already loaded\\r\\n\",\n\t\t\tecl_function[i].name);\n\t    }\n\t    else {\n\t\tecl_function[i].func = dlsym(handle, ecl_function[i].name);\n\t\tif (ecl_function[i].func == NULL) {\n#ifdef DEBUG\n\t\t    fprintf(stderr, \"unabled to load function %s\\r\\n\",\n\t\t\t    ecl_function[i].name);\n#endif\n\t\t}\n\t\telse {\n#ifdef DEBUG\n\t\t    fprintf(stderr, \"load function %s/%d.%d @ %p\\r\\n\",\n\t\t\t    ecl_function[i].name,\n\t\t\t    ecl_function[i].version / 10,\n\t\t\t    ecl_function[i].version % 10,\n\t\t\t    ecl_function[i].func);\n#endif\n\t\t}\n\t    }\n\t    i++;\n\t}\n\tif (ecl_function[i_clCreateImage].func == NULL)\n\t    ecl_function[i_clCreateImage].func = e_clCreateImage;\n\n\t// patch functions not present or deprecated functions when possible\n\tif (ecl->icd_version >= 12) {\n\t    ecl_function[i_clCreateImage2D].func = eclCreateImage2D;\n\t    ecl_function[i_clCreateImage3D].func = eclCreateImage3D;\n\t}\n\treturn 0;\n    }\n    fprintf(stderr, \"Failed open OpenCL dynamic library\\r\\n\");\n    return -1;\n}\n\nstatic int ecl_upgrade(ErlNifEnv* env, void** priv_data, void** old_priv_data,\n\t\t\tERL_NIF_TERM load_info)\n{\n    ErlNifResourceFlags tried;\n    ecl_context_t* ctx;\n    ecl_env_t* ecl = (ecl_env_t*) *old_priv_data;\n    int sync_count;\n    UNUSED(load_info);\n\n    ecl->ref_count++;\n    DBG(\"ecl_upgrade: ecl=%p\", ecl, ecl->ref_count);\n\n    // upgrade resource types\n    ecl_resource_init(env, &platform_r, \"platform_t\", \n\t\t      sizeof(ecl_object_t),\n\t\t      ecl_platform_dtor,\n\t\t      ERL_NIF_RT_CREATE|ERL_NIF_RT_TAKEOVER, &tried);\n    ecl_resource_init(env, &device_r, \"device_t\",\n\t\t      sizeof(ecl_object_t),\n\t\t      ecl_device_dtor,\n\t\t      ERL_NIF_RT_CREATE|ERL_NIF_RT_TAKEOVER, &tried);\n\n    ecl_resource_init(env, &command_queue_r, \"command_queue_t\",\n\t\t      sizeof(ecl_object_t),\n\t\t      ecl_queue_dtor,\n\t\t      ERL_NIF_RT_CREATE|ERL_NIF_RT_TAKEOVER, &tried);\n    ecl_resource_init(env, &mem_r, \"mem_t\", \n\t\t      sizeof(ecl_object_t),\n\t\t      ecl_mem_dtor,\n\t\t      ERL_NIF_RT_CREATE|ERL_NIF_RT_TAKEOVER, &tried);\n    ecl_resource_init(env, &sampler_r, \"sampler_t\",\n\t\t      sizeof(ecl_object_t),\n\t\t      ecl_sampler_dtor,\n\t\t      ERL_NIF_RT_CREATE|ERL_NIF_RT_TAKEOVER, &tried);\n    ecl_resource_init(env, &program_r, \"program_t\",\n\t\t      sizeof(ecl_object_t),\n\t\t      ecl_program_dtor,\n\t\t      ERL_NIF_RT_CREATE|ERL_NIF_RT_TAKEOVER, &tried);\n    ecl_resource_init(env, &kernel_r, \"kernel_t\",\n\t\t      sizeof(ecl_kernel_t),   // NOTE! specialized!\n\t\t      ecl_kernel_dtor,\n\t\t      ERL_NIF_RT_CREATE|ERL_NIF_RT_TAKEOVER, &tried);\n    ecl_resource_init(env, &event_r, \"event_t\",\n\t\t      sizeof(ecl_event_t),    // NOTE! specialized!\n\t\t      ecl_event_dtor,\n\t\t      ERL_NIF_RT_CREATE|ERL_NIF_RT_TAKEOVER, &tried);\n\n    ecl_resource_init(env, &context_r, \"context_t\",\n\t\t      sizeof(ecl_context_t),     // NOTE! specialized!\n\t\t      ecl_context_dtor,\n\t\t      ERL_NIF_RT_CREATE|ERL_NIF_RT_TAKEOVER, &tried);\n\n    // Scan through all contexts and initiate upgrade & sync of the threads\n    DBG(\"ecl_upgrade: upgrade and sync ecl=%p\", ecl);\n    DBG(\"ecl_upgrade: upgrade and sync ecl->context_list_lock=%p\",\n\tecl->context_list_lock);\n    sync_count = 0;\n    enif_rwlock_rwlock(ecl->context_list_lock);\n    for (ctx = ecl->context_list; ctx != NULL; ctx = ctx->next) {\n\tecl_message_t m;\n\tDBG(\"ecl_upgrade: ctx=%p\", ctx);\n\tm.type   = ECL_MESSAGE_UPGRADE;\n\tm.upgrade = ecl_context_main;\n\tDBG(\"ecl_upgrade: send upgrade func=%p to %p\", \n\t    ecl_context_main, ctx->thr);\n\tecl_message_send(ctx->thr, &m);\n\n\tm.type   = ECL_MESSAGE_SYNC;\n\tDBG(\"ecl_upgrade: send sync to %p\", ctx->thr);\n\tecl_message_send(ctx->thr, &m);\n\tsync_count++;\n    }\n    enif_rwlock_rwunlock(ecl->context_list_lock);\n    \n    while(sync_count) {\n\tecl_message_t m;\n\tint r;\n\tif ((r = ecl_queue_get(&ecl->q, &m)) < 0)\n\t    return -1;\n\tif (m.type != ECL_MESSAGE_SYNC_ACK)\n\t    return -1;\n\tsync_count--;\n    }\n\n    *priv_data = *old_priv_data;\n    return 0;\n}\n\nstatic void ecl_unload(ErlNifEnv* env, void* priv_data)\n{\n    ecl_env_t* ecl = priv_data;\n    UNUSED(env);\n\n    ecl->ref_count--;\n    DBG(\"ecl_unload: ecl=%p ref_count=%d\", ecl, ecl->ref_count);\n    if (ecl->ref_count == 0) {\n\tcl_uint i;\n\tcl_uint j;\n\n\tfor (i = 0; i < ecl->nplatforms; i++) {\n\t    ecl_object_t* obj;\n\n\t    for (j = 0; j < ecl->platform[i].ndevices; j++) {\n\t\tobj = ecl->platform[i].o_device[j];\n\t\tenif_release_resource(obj);\n\t    }\n\t    enif_free(ecl->platform[i].o_device);\n\t    \n\t    obj = ecl->platform[i].o_platform;\n\t    enif_release_resource(obj);\n\t}\n\tenif_free(ecl->platform);\n\n\tenif_rwlock_rwlock(ecl->ref_lock);\n\tlhash_delete(&ecl->ref);\n\tenif_rwlock_rwunlock(ecl->ref_lock);\n\n\tenif_rwlock_destroy(ecl->ref_lock);\n\n\tenif_rwlock_rwlock(ecl->context_list_lock);\n\tDBG(\"ecl->context_list = %p\", ecl->context_list);\n\tenif_rwlock_rwunlock(ecl->context_list_lock);\n\n\tenif_rwlock_destroy(ecl->context_list_lock);\n\n\tenif_free(ecl);\n    }\n}\n\n/*\n#warning \"testing only, REMOVE before release\"\n#define ERL_NIF_INIT_BODY\t\t\t\\\n     DBG(\"erl_nif_init\")\n*/\n\nERL_NIF_INIT(cl, ecl_funcs, \n\t     ecl_load, NULL,\n\t     ecl_upgrade, ecl_unload)\n"
  },
  {
    "path": "c_src/ecl_types.h",
    "content": "//\n// definition of types needed to implement cl functions \n// for a range of different version.\n//\n#ifndef __ECL_TYPES_H__\n#define __ECL_TYPES_H__\n\n//\n#if !defined(CL_VERSION_1_2)\ntypedef struct _cl_image_desc {\n    cl_mem_object_type      image_type;\n    size_t                  image_width;\n    size_t                  image_height;\n    size_t                  image_depth;\n    size_t                  image_array_size;\n    size_t                  image_row_pitch;\n    size_t                  image_slice_pitch;\n    cl_uint                 num_mip_levels;\n    cl_uint                 num_samples;\n    cl_mem                  buffer;\n} cl_image_desc;\n#endif\n\n// Function types t_<functionName>\n\ntypedef cl_int (CL_CALLBACK * t_clGetPlatformIDs)(cl_uint, cl_platform_id *, cl_uint *);\n\ntypedef cl_int (CL_CALLBACK * t_clGetPlatformInfo)(cl_platform_id, cl_platform_info, size_t , void *, size_t *);\n\ntypedef cl_int (CL_CALLBACK * t_clGetDeviceIDs)(cl_platform_id,\n cl_device_type, cl_uint, cl_device_id *, cl_uint *);\n\ntypedef cl_int (CL_CALLBACK * t_clGetDeviceInfo)(cl_device_id, cl_device_info, size_t, void *,\n size_t *);\n \ntypedef cl_int (CL_CALLBACK * t_clCreateSubDevices)(cl_device_id, const cl_device_partition_property *, cl_uint, cl_device_id *, cl_uint *);\n\ntypedef cl_int (CL_CALLBACK * t_clRetainDevice)(cl_device_id );\n \ntypedef cl_int (CL_CALLBACK * t_clReleaseDevice)(cl_device_id );\n \n\ntypedef cl_context (CL_CALLBACK * t_clCreateContext)(const cl_context_properties *,cl_uint,const cl_device_id *,void (CL_CALLBACK *)(const char *, const void *, size_t, void *),void *,cl_int *);\n\ntypedef cl_context (CL_CALLBACK * t_clCreateContextFromType)(const cl_context_properties *,cl_device_type,void (CL_CALLBACK *)(const char *, const void *, size_t, void *),void *,cl_int *);\n\ntypedef cl_int (CL_CALLBACK * t_clRetainContext)(cl_context );\n\ntypedef cl_int (CL_CALLBACK * t_clReleaseContext)(cl_context );\n\ntypedef cl_int (CL_CALLBACK * t_clGetContextInfo)(cl_context, cl_context_info, size_t, void *, size_t *);\n\n\ntypedef cl_command_queue (CL_CALLBACK * t_clCreateCommandQueue)(cl_context, cl_device_id, cl_command_queue_properties,cl_int *);\n\ntypedef cl_int (CL_CALLBACK * t_clRetainCommandQueue)(cl_command_queue );\n\ntypedef cl_int (CL_CALLBACK * t_clReleaseCommandQueue)(cl_command_queue );\n\ntypedef cl_int (CL_CALLBACK * t_clGetCommandQueueInfo)(cl_command_queue,cl_command_queue_info,size_t,void *,size_t *);\n\n\ntypedef cl_mem (CL_CALLBACK * t_clCreateBuffer)(cl_context,cl_mem_flags,size_t,void *,cl_int *);\n\ntypedef cl_mem (CL_CALLBACK * t_clCreateSubBuffer)(cl_mem,cl_mem_flags,cl_buffer_create_type,const void *,cl_int *);\n\ntypedef cl_mem (CL_CALLBACK * t_clCreateImage)(cl_context,cl_mem_flags,const cl_image_format *,const cl_image_desc *, void *,cl_int *);\n \ntypedef cl_int (CL_CALLBACK * t_clRetainMemObject)(cl_mem );\n\ntypedef cl_int (CL_CALLBACK * t_clReleaseMemObject)(cl_mem );\n\ntypedef cl_int (CL_CALLBACK * t_clGetSupportedImageFormats)(cl_context,cl_mem_flags,cl_mem_object_type,cl_uint,cl_image_format *,cl_uint *);\n \ntypedef cl_int (CL_CALLBACK * t_clGetMemObjectInfo)(cl_mem,cl_mem_info, size_t,void *,size_t *);\n\ntypedef cl_int (CL_CALLBACK * t_clGetImageInfo)(cl_mem,cl_image_info, size_t,void *,size_t *);\n\ntypedef cl_int (CL_CALLBACK * t_clSetMemObjectDestructorCallback)( cl_mem, void (CL_CALLBACK *)( cl_mem, void*), void *) ; \n\n\ntypedef cl_sampler (CL_CALLBACK * t_clCreateSampler)(cl_context,cl_bool, cl_addressing_mode, cl_filter_mode,cl_int *);\ntypedef cl_int (CL_CALLBACK * t_clRetainSampler)(cl_sampler );\n\ntypedef cl_int (CL_CALLBACK * t_clReleaseSampler)(cl_sampler );\n\ntypedef cl_int (CL_CALLBACK * t_clGetSamplerInfo)(cl_sampler,cl_sampler_info,size_t,void *,size_t *);\n \n\ntypedef cl_program (CL_CALLBACK * t_clCreateProgramWithSource)(cl_context,cl_uint,const char **,const size_t *,cl_int *);\n\ntypedef cl_program (CL_CALLBACK * t_clCreateProgramWithBinary)(cl_context,cl_uint,const cl_device_id *,const size_t *,const unsigned char **,cl_int *,cl_int *);\n\ntypedef cl_program (CL_CALLBACK * t_clCreateProgramWithBuiltInKernels)(cl_context,cl_uint,const cl_device_id *,const char *,cl_int *);\n\ntypedef cl_program (CL_CALLBACK * t_clCreateProgramWithIL)(cl_context,cl_uint,const void *,const size_t,cl_int *);\n\ntypedef cl_int (CL_CALLBACK * t_clRetainProgram)(cl_program );\n\ntypedef cl_int (CL_CALLBACK * t_clReleaseProgram)(cl_program );\n\ntypedef cl_int (CL_CALLBACK * t_clBuildProgram)(cl_program,cl_uint,const cl_device_id *,const char *, void (CL_CALLBACK *)(cl_program, void *),void *);\n\ntypedef cl_int (CL_CALLBACK * t_clCompileProgram)(cl_program,cl_uint,const cl_device_id *,const char *, cl_uint,const cl_program *,const char **,void (CL_CALLBACK *)(cl_program, void *),void *);\n\ntypedef cl_program (CL_CALLBACK * t_clLinkProgram)(cl_context,cl_uint,const cl_device_id *,const char *, cl_uint,const cl_program *,void (CL_CALLBACK *)(cl_program, void *),void *,cl_int *);\n\n\ntypedef cl_int (CL_CALLBACK * t_clUnloadPlatformCompiler)(cl_platform_id );\n\ntypedef cl_int (CL_CALLBACK * t_clGetProgramInfo)(cl_program,cl_program_info,size_t,void *,size_t *);\n\ntypedef cl_int (CL_CALLBACK * t_clGetProgramBuildInfo)(cl_program,cl_device_id,cl_program_build_info,size_t,void *,size_t *);\n \n\ntypedef cl_kernel (CL_CALLBACK * t_clCreateKernel)(cl_program,const char *,cl_int *);\n\ntypedef cl_int (CL_CALLBACK * t_clCreateKernelsInProgram)(cl_program,cl_uint,cl_kernel *,cl_uint *);\n\ntypedef cl_int (CL_CALLBACK * t_clRetainKernel)(cl_kernel );\n\ntypedef cl_int (CL_CALLBACK * t_clReleaseKernel)(cl_kernel );\n\ntypedef cl_int (CL_CALLBACK * t_clSetKernelArg)(cl_kernel,cl_uint,size_t,const void *);\n\ntypedef cl_int (CL_CALLBACK * t_clGetKernelInfo)(cl_kernel,cl_kernel_info,size_t,void *,size_t *);\n\ntypedef cl_int (CL_CALLBACK * t_clGetKernelArgInfo)(cl_kernel,cl_uint,cl_kernel_arg_info,size_t,void *,size_t *);\n\ntypedef cl_int (CL_CALLBACK * t_clGetKernelWorkGroupInfo)(cl_kernel,cl_device_id,cl_kernel_work_group_info,size_t,void *,size_t *);\n\n\ntypedef cl_int (CL_CALLBACK * t_clWaitForEvents)(cl_uint,const cl_event *);\n\ntypedef cl_int (CL_CALLBACK * t_clGetEventInfo)(cl_event,cl_event_info,size_t,void *,size_t *);\n \ntypedef cl_event (CL_CALLBACK * t_clCreateUserEvent)(cl_context,cl_int *); \n \ntypedef cl_int (CL_CALLBACK * t_clRetainEvent)(cl_event );\n\ntypedef cl_int (CL_CALLBACK * t_clReleaseEvent)(cl_event );\n\ntypedef cl_int (CL_CALLBACK * t_clSetUserEventStatus)(cl_event,cl_int );\n \ntypedef cl_int (CL_CALLBACK * t_clSetEventCallback)( cl_event,cl_int,void (CL_CALLBACK *)(cl_event, cl_int, void *),void *);\n\ntypedef cl_int (CL_CALLBACK * t_clGetEventProfilingInfo)(cl_event,cl_profiling_info,size_t,void *,size_t *);\n\ntypedef cl_int (CL_CALLBACK * t_clFlush)(cl_command_queue );\n\ntypedef cl_int (CL_CALLBACK * t_clFinish)(cl_command_queue );\n\ntypedef cl_int (CL_CALLBACK * t_clEnqueueReadBuffer)(cl_command_queue,cl_mem,cl_bool,size_t,size_t, void *,cl_uint,const cl_event *,cl_event *);\n \ntypedef cl_int (CL_CALLBACK * t_clEnqueueReadBufferRect)(cl_command_queue,cl_mem,cl_bool,const size_t *,const size_t *, const size_t *,size_t,size_t,size_t,size_t, void *,cl_uint,const cl_event *,cl_event *);\n \ntypedef cl_int (CL_CALLBACK * t_clEnqueueWriteBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *);\n \ntypedef cl_int (CL_CALLBACK * t_clEnqueueWriteBufferRect)(cl_command_queue,cl_mem,cl_bool,const size_t *,const size_t *, const size_t *,size_t,size_t,size_t,size_t, const void *,cl_uint,const cl_event *,cl_event *);\n \ntypedef cl_int (CL_CALLBACK * t_clEnqueueFillBuffer)(cl_command_queue,cl_mem, const void *, size_t, size_t, size_t, cl_uint, const cl_event *, cl_event *);\n \ntypedef cl_int (CL_CALLBACK * t_clEnqueueCopyBuffer)(cl_command_queue, cl_mem,cl_mem, size_t,size_t,size_t, cl_uint,const cl_event *,cl_event *);\n \ntypedef cl_int (CL_CALLBACK * t_clEnqueueCopyBufferRect)(cl_command_queue, cl_mem,cl_mem, const size_t *,const size_t *,const size_t *, size_t,size_t,size_t,size_t,cl_uint,const cl_event *,cl_event *);\n \ntypedef cl_int (CL_CALLBACK * t_clEnqueueReadImage)(cl_command_queue,cl_mem,cl_bool, const size_t *,const size_t *,size_t,size_t, void *,cl_uint,const cl_event *,cl_event *);\n\ntypedef cl_int (CL_CALLBACK * t_clEnqueueWriteImage)(cl_command_queue,cl_mem,cl_bool, const size_t *,const size_t *,size_t,size_t, const void *,cl_uint,const cl_event *,cl_event *);\n\ntypedef cl_int (CL_CALLBACK * t_clEnqueueFillImage)(cl_command_queue,cl_mem, const void *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *);\n \ntypedef cl_int (CL_CALLBACK * t_clEnqueueCopyImage)(cl_command_queue,cl_mem,cl_mem, const size_t *,const size_t *,const size_t *, cl_uint,const cl_event *,cl_event *);\n\ntypedef cl_int (CL_CALLBACK * t_clEnqueueCopyImageToBuffer)(cl_command_queue,cl_mem,cl_mem, const size_t *,const size_t *, size_t,cl_uint,const cl_event *,cl_event *);\n\ntypedef cl_int (CL_CALLBACK * t_clEnqueueCopyBufferToImage)(cl_command_queue,cl_mem,cl_mem, size_t,const size_t *,const size_t *, cl_uint,const cl_event *,cl_event *);\n\ntypedef void * (CL_CALLBACK * t_clEnqueueMapBuffer)(cl_command_queue,cl_mem,cl_bool, cl_map_flags,size_t,size_t,cl_uint,const cl_event *,cl_event *,cl_int *);\n\ntypedef void * (CL_CALLBACK * t_clEnqueueMapImage)(cl_command_queue,cl_mem, cl_bool, cl_map_flags, const size_t *,const size_t *,size_t *,size_t *,cl_uint,const cl_event *,cl_event *,cl_int *);\n\ntypedef cl_int (CL_CALLBACK * t_clEnqueueUnmapMemObject)(cl_command_queue,cl_mem,void *,cl_uint,const cl_event *,cl_event *);\n\ntypedef cl_int (CL_CALLBACK * t_clEnqueueMigrateMemObjects)(cl_command_queue,cl_uint,const cl_mem *,cl_mem_migration_flags,cl_uint,const cl_event *,cl_event *);\n\ntypedef cl_int (CL_CALLBACK * t_clEnqueueNDRangeKernel)(cl_command_queue,cl_kernel,cl_uint,const size_t *,const size_t *,const size_t *,cl_uint,const cl_event *,cl_event *);\n\ntypedef cl_int (CL_CALLBACK * t_clEnqueueTask)(cl_command_queue,cl_kernel,cl_uint,const cl_event *,cl_event *);\n\ntypedef cl_int (CL_CALLBACK * t_clEnqueueNativeKernel)(cl_command_queue, void (CL_CALLBACK *)(void *), void *,size_t, cl_uint,const cl_mem *,const void **,cl_uint,const cl_event *,cl_event *);\n\ntypedef cl_int (CL_CALLBACK * t_clEnqueueMarkerWithWaitList)(cl_command_queue,cl_uint,const cl_event *,cl_event *);\n\ntypedef cl_int (CL_CALLBACK * t_clEnqueueBarrierWithWaitList)(cl_command_queue,cl_uint,const cl_event *,cl_event *);\n\ntypedef void * (CL_CALLBACK * t_clGetExtensionFunctionAddressForPlatform)(cl_platform_id, const char *);\n \ntypedef cl_mem (CL_CALLBACK * t_clCreateImage2D)(cl_context,cl_mem_flags,const cl_image_format *,size_t,size_t,size_t, void *, cl_int *);\n\ntypedef cl_mem (CL_CALLBACK * t_clCreateImage3D)(cl_context,cl_mem_flags,const cl_image_format *,size_t, size_t,size_t, size_t, size_t, void *,cl_int *);\n \ntypedef cl_int (CL_CALLBACK * t_clEnqueueMarker)(cl_command_queue, cl_event *);\n \ntypedef cl_int (CL_CALLBACK * t_clEnqueueWaitForEvents)(cl_command_queue, cl_uint, const cl_event *);\ntypedef cl_int (CL_CALLBACK * t_clEnqueueBarrier)(cl_command_queue );\ntypedef cl_int (CL_CALLBACK * t_clUnloadCompiler)(void);\ntypedef void * (CL_CALLBACK * t_clGetExtensionFunctionAddress)(const char *);\n\ntypedef cl_mem (CL_CALLBACK * t_clCreatePipe)(cl_context,cl_mem_flags,cl_uint,cl_uint, void*, cl_int*);\n#endif\n"
  },
  {
    "path": "doc/.gitignore",
    "content": "*.html\n*.css\nedoc-info\nerlang.png\n"
  },
  {
    "path": "doc/overview.edoc",
    "content": "@author  Tony Rogvall <tony@rogvall.se>\n@version 1.0\n@title Erlang binding to OpenCL 1.0.\n@doc This is a binding to OpenCL. You can find\nthe PDF specification at http://www.khronos.org/opencl. This API will\nconform as close as possible to the C specification. In cases where the API \nhas special quirks or features, this will be noted.\n\n\n\n"
  },
  {
    "path": "ebin/.gitignore",
    "content": "*.beam\n*.app\n"
  },
  {
    "path": "examples/Makefile",
    "content": "\nMODULES = \\\n\tcl_basic \\\n\tcl_square_float \\\n\tcl_map \\\n\tcl_binary_test \\\n\tcl_bandwidth \\\n\tcl_mul \\\n\tcl_test \\\n\tcl_buffer \\\n\tcl_image \\\n\tcl_compile\n\n\nEBIN = .\nERLC = erlc\n\noverride ERLC_FLAGS = -W -pa ../../cl/ebin\n\nOBJS = $(MODULES:%=$(EBIN)/%.beam)\n\nTARGET_FILES = $(OBJS)\n\ndebug: ERLC_FLAGS += -Ddebug \n\nall: $(TARGET_FILES)\n\ndebug: all\n\nrelease: all\n\ndepend:\n\tedep -MM -o . $(ERLC_FLAGS) $(MODULES:%=%.erl) > depend.mk\n\ndialyze:\n\tdialyzer --src -o dia.out $(ERLC_FLAGS) -c $(MODULES:%=%.erl) \n\nclean:\n\trm -f $(OBJS)\n\n-include depend.mk\n\n$(EBIN)/%.beam:\t%.erl\n\t$(ERLC) $(ERLC_FLAGS) -o $(EBIN) $<\n"
  },
  {
    "path": "examples/cc_subdiv.cl",
    "content": "// -*- c++ -*-\n// @Author: <dgud@users.sf.net>\n// @copyright (C) 2010\n// @doc Catmull Clark subdivision\n\ntypedef struct {\n  int start;\n  int len;\n} FaceIndex;\n\ntypedef struct {\n  int start;\n  int len;\n  int vab;\n} VabIndex;\n\n\nvoid find_faces(int V0, int V1, FaceIndex Fi, __global int *Fs, \n\t\tint * F1, int *F2, int *CCW);\n\n__kernel void gen_faces(\n\t\t\t__global float4 *VsIn,\n\t\t\t__global int *FsIn,\n\t\t\t__global FaceIndex *FiIn,\n\t\t\t__global float4 *VsOut,\n\t\t\t__global int4 *FsOut,\n\t\t\t//__global int *locks,\n\t\t\tconst uint noFs,\n\t\t\tconst uint noVs\n\t\t\t)\n{\n  int i;\n  const int face_id = get_global_id(0);\n  if (face_id >= noFs)\n      return;\n  const FaceIndex fi = FiIn[face_id];\n  float4 center = {0.0,0.0,0.0,0.0};\n  \n  for(i=0; i < fi.len; i++) {\n      center.xyz += VsIn[FsIn[fi.start+i]].xyz;\n  }\n\n  center /= (float) i;\n  // Create new center vertex\n  const uint ov_id = noVs + face_id;\n  center.w = fi.len*4.0;  // Valance = faceVs and hard_edge count = 0 (Valance << 2)\n  VsOut[ov_id] = center;\n\n  center.w = 0.0;\n\n  for(i=0; i < fi.len; i++) {\n    int id = fi.start+i;\n    int v_id = FsIn[id];\n    // Add center to all face verts\n\n    //lock(v_id, locks); VsOut[v_id] += center; unlock(v_id, locks);\n    // locking doesn't work (for me) do it in a separate pass \n    // single threaded\n    \n    // Create Faces    \n    FsOut[id].x = v_id;\n    FsOut[id].y = -5; \n    FsOut[id].z = ov_id; \n    FsOut[id].w = -5;\n  }\n}\n\n__kernel void add_center(\n\t\t\t __global int *FsIn,\n\t\t\t __global FaceIndex *FiIn,\n\t\t\t __global float4 *VsOut,\n\t\t\t const uint noFs,\n\t\t\t const uint noVs\n\t\t\t )\n{\n  int i, face_id;\n  const int id = get_global_id(0);\n  if (id >= 1) return;  // Should only run by one \"thread\"\n\n  FaceIndex fi;\n  uint v_id, ov_id;\n  float4 center;\n  float4 zero = {0.0,0.0,0.0,0.0}; \n\n  for(face_id=0; face_id < noFs; face_id++) {\n      FaceIndex fi = FiIn[face_id];\n      ov_id = noVs + face_id;\n      center = VsOut[ov_id];\n      center.w = 0.0;\n      for(i=0; i < fi.len; i++) {\n\t  int v_id = FsIn[fi.start+i];\t  \n\t  float4 v = VsOut[v_id];\n\t  uint he_c = trunc(v.w);\n\t  he_c = he_c % 4;\n\t  if(he_c < 2) {\n\t      VsOut[v_id] = v + center;\n\t  } else if(he_c == 2) {\n\t      zero.w = v.w;\n\t      VsOut[v_id] = zero;\n\t  };\n      }\n  }\n}\n\n__kernel void gen_edges(__global float4 *VsIn,\n\t\t\t__global int *FsIn,\n\t\t\t__global int4 *EsIn,\n\t\t\t__global FaceIndex *FiIn,\n\t\t\t__global float4 *VsOut,\n\t\t\t__global int  *FsOut,\n\t\t\t__global int4 *EsOut,\n\t\t\tconst uint noFs,\n\t\t\tconst uint noVs,\n\t\t\tconst uint noEs)\n{\n  int i;\n  const int edge_id = get_global_id(0);\n  if (edge_id >= noEs)\n      return;\n  float4 center = {0.0,0.0,0.0,0.0};\n  int4 edge = EsIn[edge_id];\n  int hard = 0;\n  int ov_id = noVs+noFs+edge_id;\n  int hov_id = ov_id;\n  const int4 hole_edge = {-1,-1,-1,-1};\n\n  if(edge.y < 0) { // Indicates edge in hole\n      const int oe_id = edge_id*4;\n      EsOut[oe_id+0] = hole_edge;\n      EsOut[oe_id+1] = hole_edge;\n      EsOut[oe_id+2] = hole_edge;\n      EsOut[oe_id+3] = hole_edge;\n      return;\n  }\n\n  if(edge.x < 0) {  // Indicates hard edge\n      hard = 1;\n      edge.x = -1-edge.x;\n      hov_id = -1-ov_id;\n  }\n\n  center += VsIn[edge.x];  // V0\n  center += VsIn[edge.y];  // V1\n  if(hard) {\n      center /= 2.0;\n      center.w = 18.0; // Valance 4 and 2 hard edges ((4 << 2) | 2)\n  } else {\n      center += VsOut[noVs+edge.z]; // F1 Center\n      center += VsOut[noVs+edge.w]; // F2 Center\n      center /= 4.0;\n      center.w = 16.0; // Valance 4 and 0 hard edges ((4 << 2) | 2)\n  }\n  \n  // New vertex at edge center position\n  VsOut[ov_id] = center;\n  // Complete faces \n  int F11=-1,F12=-1,F21=-1,F22=-1, CCW1,CCW2;\n  const int oe_id = edge_id*4;\n  // Be sure to create faces with the correct order   \n  if(edge.z >= 0) { // Edge is not a border\n      FaceIndex IF1 = FiIn[edge.z];\n      find_faces(edge.x,edge.y,IF1,FsIn,&F11,&F12,&CCW1);\n      const int4 e0 = {ov_id,noVs+edge.z,F11,F12};\n      EsOut[oe_id+0] = e0;\n      if(CCW1) {\n\t  FsOut[F11*4+1] = ov_id;\n\t  FsOut[F12*4+3] = ov_id;\n      } else {\n\t  FsOut[F11*4+3] = ov_id;\n\t  FsOut[F12*4+1] = ov_id;\n      }\n  } else {\n      EsOut[oe_id+0] = hole_edge;\n  }\n  if(edge.w >= 0) { // Edge is not a border\n      FaceIndex IF2 = FiIn[edge.w];\n      find_faces(edge.x,edge.y,IF2,FsIn,&F21,&F22,&CCW2);\n      const int4 e1 = {ov_id,noVs+edge.w,F21,F22};\n      EsOut[oe_id+1] = e1;\n      if(CCW2) {\n\t  FsOut[F21*4+1] = ov_id;\n\t  FsOut[F22*4+3] = ov_id;\n      } else {\n\t  FsOut[F21*4+3] = ov_id;\n\t  FsOut[F22*4+1] = ov_id;\n      }\n  } else {\n      EsOut[oe_id+1] = hole_edge;\n  }\n  // Hmm init only when declaring var on nvidia? \n  const int4 e2 = {hov_id,edge.x,F11,F21};\n  EsOut[oe_id+2] = e2;\n  const int4 e3 = {hov_id,edge.y,F12,F22};\n  EsOut[oe_id+3] = e3;\n}\n\n\n__kernel void add_edge_verts(\n\t\t\t     __global float4 *VsIn,\n\t\t\t     __global float4 *VsOut,\n\t\t\t     __global int4 *EsIn,\n\t\t\t     const uint noEs\n\t\t\t     )\n{\n  const int thread = get_global_id(0);\n  if (thread >= 1) return;  // Should only run by one \"thread\"\n  \n  int id;\n  int4 edge;\n  float4 v0,v1;\n  int hard_v0=0, hard_v1=0;\n\n  for(id=0; id < noEs; id++) {\n      edge = EsIn[id];\n      if(edge.y >= 0) {\n\t  if(edge.x < 0) { // Hard edge\n\t      edge.x = -1-edge.x;\n\t      v0 = VsIn[edge.x];\n\t      v0.w = 0.0;\n\t      VsOut[edge.y] += v0;\n\t      v1 = VsIn[edge.y];\n\t      v1.w = 0.0;\n\t      VsOut[edge.x] += v1;\n\t  } else { // Only add soft edges if vertex have <2 hardedges\n\t      v0 = VsIn[edge.x];\n\t      v1 = VsIn[edge.y];\n\t      \n\t      hard_v0 = trunc(v0.w);\t  \n\t      hard_v1 = trunc(v1.w);\n\t      hard_v0 = hard_v0 % 4;\n\t      hard_v1 = hard_v1 % 4;\n\t      if(hard_v1 < 2) {\n\t\t  v0.w = 0.0;\n\t\t  VsOut[edge.y] += v0;\n\t      }\n\t      if(hard_v0 < 2) {\n\t\t  v1.w = 0.0;\t  \n\t\t  VsOut[edge.x] += v1;\n\t      }\n\t  }\n      }    \n  }\n}\n\n__kernel void move_verts(\n\t\t\t __global float4 *VsIn,\n\t\t\t __global float4 *VsOut,\n\t\t\t const uint noInVs,\n\t\t\t const uint noOutVs\n\t\t\t )\n{\n  const int v_id = get_global_id(0);\n  if(v_id >= noOutVs)\n    return;\n  if(v_id >= noInVs) {  \n    // Copy buffer VsIn and VsOut should be equal\n    // after this pass\n    VsIn[v_id] = VsOut[v_id];\n    return;\n  }\n  float4 v_in  = VsIn[v_id];\n  float4 v_out = VsOut[v_id];\n  uint hc = trunc(v_in.w);  \n  uint vc = hc; \n  hc = hc % 4;\n  vc = vc / 4;\n  if(hc < 2) {\n    float a = 1.0/(vc*vc);\n    float b = (vc-2.0)/vc;\n    //  We started with Inpos remove it\n    v_out -= v_in;\n    v_out *= a;\n    v_out += (v_in * b);\n    v_out.w = v_in.w;\n    VsOut[v_id] = v_out;\n    VsIn[v_id] = v_out;\n  } else if(hc == 2) {\n    v_out += v_in * 6.0;\n    v_out *= 1.0f/8.0f;\n    v_out.w = v_in.w;\n    VsOut[v_id] = v_out;\n    VsIn[v_id] = v_out;\n  } else {\n    VsOut[v_id] = v_in;\n  }\n}\n\n__kernel void create_vab(\n\t\t\t __global float4 *VsIn,\n\t\t\t __global int4 *FsIn,\n\t\t\t __global VabIndex *FiIn,\n\t\t\t __global float *Vab,\n\t\t\t const int noFs\n\t\t\t )\n{\n    const int id = get_global_id(0);\n    if(id >= noFs)\n\treturn;\n    VabIndex fi = FiIn[id];\n    const int f_sz;\n    int4 face;\n    float4 v1, v2, v3, v4, normal;\n    int vab, out = fi.vab*24;\n    for(int i=0; i < fi.len; i++) {\n\tface = FsIn[fi.start+i];\n\tvab = out+i*24;\n\tv1 = VsIn[face.x];\n\tv2 = VsIn[face.y];\n\tv3 = VsIn[face.z];\n\tv4 = VsIn[face.w];\n\tnormal = normalize(cross(v3-v1,v4-v2));\n\t// Output V1\n\tVab[vab+0] = v1.x;  Vab[vab+3] = normal.x;\n\tVab[vab+1] = v1.y;  Vab[vab+4] = normal.y;\n\tVab[vab+2] = v1.z;  Vab[vab+5] = normal.z;   \n\t// Output V2        \n\tVab[vab+6] = v2.x;  Vab[vab+9]  = normal.x;\n\tVab[vab+7] = v2.y;  Vab[vab+10] = normal.y;\n\tVab[vab+8] = v2.z;  Vab[vab+11] = normal.z;\n\t// Output V3\n\tVab[vab+12] = v3.x; Vab[vab+15] = normal.x;\n\tVab[vab+13] = v3.y; Vab[vab+16] = normal.y;\n\tVab[vab+14] = v3.z; Vab[vab+17] = normal.z;\n\t// Output V4\n\tVab[vab+18] = v4.x; Vab[vab+21] = normal.x;\n\tVab[vab+19] = v4.y; Vab[vab+22] = normal.y;\n\tVab[vab+20] = v4.z; Vab[vab+23] = normal.z;    \n    }\n}\n__kernel void collect_face_info(\n\t\t\t\t__global float4 *Vs,\n\t\t\t\t__global int4 *Fs,\n\t\t\t\t__global float *Vab,\n\t\t\t\tconst uint noFs\n\t\t\t\t)\n{\n    const int id = get_global_id(0);\n    if(id >= noFs) \n\treturn;\n    const int f_sz = 4*6;\n    int4 face = Fs[id];\n    float4 v1, v2, v3, v4, normal;\n    v1 = Vs[face.x];\n    v2 = Vs[face.y];\n    v3 = Vs[face.z];\n    v4 = Vs[face.w];\n    normal = normalize(cross(v3-v1,v4-v2));\n    // Output V1    \n    Vab[id*f_sz+0] = v1.x;  Vab[id*f_sz+3] = normal.x;\n    Vab[id*f_sz+1] = v1.y;  Vab[id*f_sz+4] = normal.y;\n    Vab[id*f_sz+2] = v1.z;  Vab[id*f_sz+5] = normal.z;   \n    // Output V2        \n    Vab[id*f_sz+6] = v2.x;  Vab[id*f_sz+9]  = normal.x;\n    Vab[id*f_sz+7] = v2.y;  Vab[id*f_sz+10] = normal.y;\n    Vab[id*f_sz+8] = v2.z;  Vab[id*f_sz+11] = normal.z;\n    // Output V3\n    Vab[id*f_sz+12] = v3.x; Vab[id*f_sz+15] = normal.x;\n    Vab[id*f_sz+13] = v3.y; Vab[id*f_sz+16] = normal.y;\n    Vab[id*f_sz+14] = v3.z; Vab[id*f_sz+17] = normal.z;\n    // Output V4\n    Vab[id*f_sz+18] = v4.x; Vab[id*f_sz+21] = normal.x;\n    Vab[id*f_sz+19] = v4.y; Vab[id*f_sz+22] = normal.y;\n    Vab[id*f_sz+20] = v4.z; Vab[id*f_sz+23] = normal.z;    \n}\n\n\n// Helpers\n// Find the order of faces so that vertices for a face\n// comes in the ccw order\nvoid find_faces(int V0, int V1, FaceIndex Fi, __global int *Fs, \n\t\tint * F1, int *F2, int *CCW)\n{\n    int fva,fvb;\n    fva = Fs[Fi.start];\n    for(int i=Fi.start; i < (Fi.start+Fi.len); i++) {\n\tfvb = Fs[i+1];\n\tif(V0==fva) {\n\t    *F1 = i;\n\t    if(V1==fvb) {\n\t\t*F2 = i+1;\n\t\t*CCW = 1;\n\t    } else {\n\t\t*F2 = i+Fi.len-1;\n\t\t*CCW = 0;\n\t    }\n\t    return;\n\t}\n\tif(V1==fva) {\n\t    *F2 = i;\n\t    if(V0==fvb) {\n\t\t*F1 = i+1;\n\t\t*CCW = 0;\n\t    } else {\n\t\t*F1 = i+Fi.len-1;\n\t\t*CCW = 1;\n\t    }\n\t    return;\n\t}\n\tfva = fvb;\n    };\n    *F1 = -1;\n    *F2 = -2;\n    *CCW = 1;\n}\n\n// void lock(int v_id, __global int *locks) {\n//   int pos = v_id % LOCK_SZ;\n//   __global int * semaphor = &(locks[pos]);\n//   int occupied = atom_xchg(semaphor, 1);\n//   while(occupied > 0) {\n//     occupied = atom_xchg(semaphor, 1);\n//   }\n// }\n\n// void unlock(int v_id, __global int *locks) {\n//   int pos = v_id % LOCK_SZ;\n//   __global int * semaphor = &(locks[pos]);\n//   atom_xchg(semaphor, 0);\n// }\n"
  },
  {
    "path": "examples/cc_subdiv.erl",
    "content": "%%%-------------------------------------------------------------------\n%%% File    : cc_subdiv.erl\n%%% Author  : Dan Gudmundsson\n%%% Description : Catmull Clark subdivision in OpenCL \n%%%               The example is the same as I will use in wings3D\n%%% Created : 8 Feb 2011\n%%%-------------------------------------------------------------------\n-module(cc_subdiv).\n-compile(export_all).\n\n-include_lib(\"wx/include/wx.hrl\"). \n-include_lib(\"wx/include/gl.hrl\"). \n-include_lib(\"cl/include/cl.hrl\").\n\n-record(cli,      {context, kernels, q, cl, device, \n\t\t   %% CL temp buffers and respective sizes\n\t\t   vab, vab_sz=0, fl, fl_sz=0, fi, fi_sz=0}).\n-record(cl_mem,   {v, v_no, f, fs_no, e, e_no, fi, fi0}).\n-record(kernel,   {name, id, wg}).\n\n-record(base, {v,    %% array of {x,y,z, {Valance, HardEdges}} nv\n\t       f,    %% array of [v0,v1..,vn]   nf\n\t       e,    %% array of v0,v1,f1,f2    ne\n\t       level %% Subdiv levels\n\t      }).\n-define(I32,  32/signed-native).\n\n-record(state, {f,    % wxFrame\n\t\tcl,   % CL record above\n\t\tgl,   % wxGLCanvas\n\t\torig, % Orig Mesh \n\t\tsd    % Sub Mesh\n\t       }).\n\nstart() ->\n    WX = wx:new(),\n    Frame   = wxFrame:new(WX,1,\"OpenCL does CC subdivision\",[{size, {800,600}}]),\n    ok = wxFrame:connect(Frame, close_window),\n    wxFrame:createStatusBar(Frame,[]),\n    setup_menus(Frame),\n    GLAttrs = [?WX_GL_RGBA,?WX_GL_DOUBLEBUFFER,0],\n    Canvas = wxGLCanvas:new(Frame, [{attribList, GLAttrs},{size, {800,600}}]),\n    Self = self(),\n    Redraw = fun(_Ev,_) ->   \n\t\t     DC = wxPaintDC:new(Canvas),\n \t\t     Self ! repaint,\n \t\t     wxPaintDC:destroy(DC)\n \t     end,\n    wxFrame:connect(Canvas, paint, [{callback, Redraw}]),\n    wxWindow:show(Frame),    %% Must show to initilize context.\n    wxGLCanvas:setCurrent(Canvas), %% Init context\n    Base = #base{v=verts(), f=faces(), e=edges(), level=4},\n    initGL(Canvas),\n    CL0 = initCL(),\n    {In, Out, CL} = cl_allocate(Base, CL0),\n    Wait0 = cl_write_input(Base, In, Out, CL),\n    OrigMesh = setup_gl_buff(gen_va(size(faces()) div 16, In, Wait0, CL)),\n    Wait1 = cl_write_input(Base, In, Out, CL),\n    SDMesh   = setup_gl_buff(subdiv(4, In, Out, Wait1, CL)),\n    gl:clear(?GL_COLOR_BUFFER_BIT bor ?GL_DEPTH_BUFFER_BIT),\n    draw_buff(OrigMesh),\n    wxGLCanvas:swapBuffers(Canvas),\n    R = loop(0, #state{f=Frame, cl=CL, gl=Canvas, orig=OrigMesh, sd=SDMesh}),\n    wx:destroy(),\n    R.\n\nloop(R, S = #state{f=Frame, cl=CL}) ->\n    receive \n\t#wx{event=#wxClose{}} ->\n\t    quit;\n\t#wx{id=?wxID_EXIT} ->\n\t    quit;\n\t#wx{id=?wxID_ABOUT} ->\n\t    about_box(Frame, CL),\n\t    loop(R, S);\n\t_Msg ->\n\t    draw(R, S),\n\t    loop(R, S)\n    after 10 ->\n\t    draw(R, S),\n\t    _ = wxWindow:getSize(Frame),\n\t    loop(R+1, S)\n    end.\n\ndraw(R, #state{gl=Canvas, orig=OrigMesh, sd=SDMesh}) ->\n    gl:clear(?GL_COLOR_BUFFER_BIT bor ?GL_DEPTH_BUFFER_BIT),\n    gl:matrixMode(?GL_MODELVIEW),\n    gl:loadIdentity(),  \n    glu:lookAt(15,15,15, 0,0,0, 0,1,0),    \n    drawBox(R),\n    gl:disable(?GL_BLEND),\n    gl:color4f(1.0,1.0,0.0,1.0),\n    draw_buff(SDMesh),\n    gl:enable(?GL_BLEND),\n    gl:color4f(0.5,0.5,0.5,0.5),\n    draw_buff(OrigMesh),\n    wxGLCanvas:swapBuffers(Canvas).\n\ngen_va(NoFs, #cl_mem{v=Vs, f=Fs}, Wait, CL=#cli{q=Q, vab=Vab}) ->\n    WVab = cl_apply(collect_face_info,[Vs,Fs,Vab,NoFs], NoFs, Wait,CL),\n    {ok, WData} = cl:enqueue_read_buffer(Q,Vab,0,NoFs*4*6*4,[WVab]),\n    {ok, Bin} = cl:wait(WData),\n    Bin.\n\nsetup_gl_buff(Data) ->\n    [Buff] = gl:genBuffers(1),\n    gl:bindBuffer(?GL_ARRAY_BUFFER,Buff),\n    gl:bufferData(?GL_ARRAY_BUFFER, size(Data), Data, ?GL_STATIC_DRAW),\n    <<_:3/unit:32,Ns/bytes>> = Data,\n    {Buff, Ns, size(Data) div (6*4)}.\n\ndraw_buff(Data = {Buff,_Ns,NoVs}) ->\n    gl:bindBuffer(?GL_ARRAY_BUFFER,Buff),\n    gl:vertexPointer(3, ?GL_FLOAT, 6*4, 0),\n    gl:normalPointer(?GL_FLOAT, 6*4, 3*4),\n    gl:enableClientState(?GL_VERTEX_ARRAY),\n    gl:enableClientState(?GL_NORMAL_ARRAY),\n    gl:drawArrays(?GL_QUADS, 0, NoVs),\n    Data.\n\nsubdiv(N, In, Out, Wait0, CL) ->\n    {Res, Wait} = subdiv_1(N, In, Out, CL, Wait0),\n    gen_va(Res#cl_mem.fs_no, Res, Wait, CL).\n\nsubdiv_1(N,\n\t  In = #cl_mem{v=VsIn, f=FsIn, fi=FiIn, e=EsIn,\n\t\t       v_no=NoVs, fs_no=NoFs, e_no=NoEs},\n\t  Out= #cl_mem{v=VsOut, f=FsOut, e=EsOut, fi=FiOut,\n\t\t       v_no=NoVs1,fs_no=NoFs1, e_no=NoEs1},\n\t  CL, Wait0)\n  when N > 0 ->\n    Args1 = [VsIn, FsIn, FiIn, VsOut, FsOut, NoFs, NoVs],\n    W0 = cl_apply(gen_faces, Args1, NoFs, Wait0, CL),\n    [cl:release_event(Ev) || Ev <- Wait0],\n    Args2 = [FsIn, FiIn, VsOut, NoFs, NoVs],\n    W1 = cl_apply(add_center, Args2, 1, [W0], CL),\n\n    Args3 = [VsIn, FsIn, EsIn, FiIn, \n\t     VsOut, FsOut, EsOut, \n\t     NoFs, NoVs, NoEs],\n    W2 = cl_apply(gen_edges, Args3, NoEs, [W1], CL),\n    Args4 = [VsIn, VsOut, EsIn, NoEs],\n    W3 = cl_apply(add_edge_verts, Args4, 1, [W2], CL),\n\n    Args5 = [VsIn,VsOut,NoVs,NoVs1],\n    Wait = cl_apply(move_verts, Args5, NoVs1, [W3], CL),\n    %% cl_vs(\"cvs_out3\", N, VsOut, NoVs1, CL, Wait),\n    [cl:release_event(Ev) || Ev <- [W0,W1,W2,W3]],\n    subdiv_1(N-1, Out, \n\t      In#cl_mem{fi=FiOut, v_no=NoVs1+NoFs1+NoEs1,\n\t\t\tfs_no=NoFs1*4, e_no=NoEs1*4},\n\t      CL, [Wait]);\nsubdiv_1(_C, ResultBuffs, _OutBuffs, _, Wait) ->\n    {ResultBuffs,Wait}.\n\ninitCL() ->\n    Opts = [],\n    Prefered = proplists:get_value(cl_type, Opts, cpu),\n    Other = [gpu,cpu] -- [Prefered],\n    CL = case clu:setup(Prefered) of \n\t     {error, _} -> \n\t\t case clu:setup(Other) of\n\t\t     {error, R} -> \n\t\t\t exit({no_opencl_device, R});\n\t\t     Cpu -> Cpu\n\t\t end;\n\t     Gpu ->\n\t\t Gpu\n\t end,\n    [Device|_] = CL#cl.devices,\n    {ok,Queue} = cl:create_queue(CL#cl.context,Device,[]),\n    %%% Compile\n    Dir = filename:join(code:lib_dir(cl),\"examples\"),\n    Bin = case file:read_file(filename:join([Dir, \"cc_subdiv.cl\"])) of\n\t      {ok, B} -> B;\n\t      {error, _} ->\n\t\t  io:format(\"OpenCL code not found run: erl -pa ABS_PATH/cl/ebin~n\", []),\n\t\t  exit({file_not_found, Dir})\n\t  end,\n    case clu:build_source(CL, Bin) of\n\t{error, {Err={error,build_program_failure}, _}} ->\n\t    %% io:format(\"~s\", [Str]),\n\t    exit(Err);\n\t{ok, Program} -> \n\t    {ok, MaxWGS} = cl:get_device_info(Device, max_work_group_size),\n\t    {ok, Kernels0} = cl:create_kernels_in_program(Program),\n\t    Kernels = [kernel_info(K,Device, MaxWGS) || K <- Kernels0],\n\t    %% io:format(\"Kernels ~p~n\",[Kernels]),\n\t    CLI = #cli{context=CL#cl.context,kernels=Kernels,\n\t\t       q=Queue, device=Device, cl=CL},\n\t    cl:release_program(Program),\n\t    CLI\n    end.\n\nkernel_info(K,Device, MaxWGS) ->\n    {ok, WG} = cl:get_kernel_workgroup_info(K, Device, work_group_size),\n    {ok, Name} = cl:get_kernel_info(K, function_name),\n    #kernel{name=list_to_atom(Name), wg=min(WG,MaxWGS), id=K}.\n\ncl_apply(Name, Args, No, Wait, #cli{q=Q, kernels=Ks}) ->\n    #kernel{id=K, wg=WG0} = lists:keyfind(Name, 2, Ks),\n    try clu:apply_kernel_args(K, Args) of\n\tok -> ok\n    catch error:Reason ->\n\t    io:format(\"Bad args ~p: ~p~n\",[Name, Args]),\n\t    erlang:raise(error,Reason, erlang:get_stacktrace())\n    end,\n    {GWG,WG} = if  No > WG0  -> \n\t\t       {(1+(No div WG0))*WG0, WG0};\n\t\t   true -> {No,No}\n\t       end,\n    {ok, Event} = cl:enqueue_nd_range_kernel(Q,K,[GWG],[WG],Wait),\n    Event.\n\n%% OpenCL Memory allocation\ncl_allocate(Base, CL0=#cli{context=Ctxt}) ->\n    {NoFs,NoEs,NoVs,NoFs1,MaxFs,MaxEs,MaxVs} = verify_size(Base, CL0),\n    {ok,FsIn}  = cl:create_buffer(Ctxt, [], MaxFs*16),\n    {ok,EsIn}  = cl:create_buffer(Ctxt, [], MaxEs*16),\n    {ok,VsIn}  = cl:create_buffer(Ctxt, [], MaxVs*16),\n    \n    {ok,FsOut} = cl:create_buffer(Ctxt, [], MaxFs*16),\n    {ok,EsOut} = cl:create_buffer(Ctxt, [], MaxEs*16),\n    {ok,VsOut} = cl:create_buffer(Ctxt, [], MaxVs*16),\n   \n    CL = #cli{fi=FiOut} = check_temp_buffs(CL0, MaxFs),\n    FiIn = FiOut,\n    {#cl_mem{v=VsIn, f=FsIn, e=EsIn, fi=FiIn, fi0=FiIn,\n\t     v_no=NoVs, fs_no=NoFs, e_no=NoEs},\n     #cl_mem{v=VsOut, f=FsOut, e=EsOut, fi=FiOut, fi0=FiIn,\n\t     v_no=NoVs+NoFs+NoEs, fs_no=NoFs1, e_no=NoEs*4},\n     CL}.\n\ncl_write_input(#base{f=Fs,e=Es,v=Vs}, \n\t       #cl_mem{v=VsIn,f=FsIn,e=EsIn}, #cl_mem{v=VsOut}, \n\t       #cli{q=Q}) ->\n    {ok, W1} = cl:enqueue_write_buffer(Q,  VsIn, 0, byte_size(Vs), Vs, []),\n    {ok, W2} = cl:enqueue_write_buffer(Q, VsOut, 0, byte_size(Vs), Vs, []),\n    {ok, W3} = cl:enqueue_write_buffer(Q,  FsIn, 0, byte_size(Fs), Fs, []),\n    {ok, W4} = cl:enqueue_write_buffer(Q,  EsIn, 0, byte_size(Es), Es, []),\n    [W1,W2,W3,W4].\n    \ncl_release(#cl_mem{v=Vs,f=Fs,e=Es, fi0=Fi0}, All) ->\n    Vs /= undefined andalso cl:release_mem_object(Vs),\n    Fs /= undefined andalso cl:release_mem_object(Fs),\n    Es /= undefined andalso cl:release_mem_object(Es),\n    All andalso cl:release_mem_object(Fi0).\n\ncheck_temp_buffs(CL=#cli{context=Ctxt, \n\t\t\t vab=Vab0, vab_sz=VabSz0, \n\t\t\t fl=FL0, fl_sz=FLSz0, \n\t\t\t fi=Fi0, fi_sz=FiSz0}, MaxFs0) ->\n    MaxFs = trunc(MaxFs0*1.5),  \n    %% Overallocate so we don't need new buffers all the time\n    GenFi = fun() -> \n\t\t    << <<(C*4):?I32, 4:?I32>> || \n\t\t\tC <- lists:seq(0, MaxFs-1) >> \n\t    end,\n    {Vab,VabSz} = check_temp(Vab0,VabSz0,MaxFs*(3+3)*4*4,\n\t\t\t     Ctxt,[write_only],none),\n    {FL,FLSz} = check_temp(FL0,FLSz0,MaxFs*3*4,\n\t\t\t   Ctxt,[read_only],none),\n    {Fi,FiSz} = check_temp(Fi0,FiSz0,MaxFs*2*4,\n\t\t\t   Ctxt,[read_only],GenFi),\n    CLI = CL#cli{vab=Vab, vab_sz=VabSz, \n\t\t fl=FL, fl_sz=FLSz, \n\t\t fi=Fi, fi_sz=FiSz},\n    put({?MODULE, cl}, CLI),\n    CLI.\n\ncheck_temp(Buff, Current, Req, _, _, _) \n  when Current >= Req ->\n    {Buff, Current};\ncheck_temp(undefined, _, Req, Ctxt, Opt, none) ->\n    {ok, Buff} = cl:create_buffer(Ctxt, Opt, Req),\n    {Buff, Req};\ncheck_temp(undefined, _, Req, Ctxt, Opt, Fun) ->\n    {ok,Buff} = cl:create_buffer(Ctxt, Opt, Req, Fun()),\n    {Buff, Req};\ncheck_temp(Buff0, _, Req, Ctxt, Opt, Data) ->\n    cl:release_mem_object(Buff0),\n    check_temp(undefined, 0, Req, Ctxt, Opt, Data).\n\nverify_size(#base{f=Fs, e=Es, v=Vs, level=N}, #cli{device=Device}) ->\n    NoFs = size(Fs) div 16,\n    NoEs = size(Es) div 16,\n    NoVs = size(Vs) div 16,\n    \n    {ok, DevTotal} = cl:get_device_info(Device, max_mem_alloc_size),\n    Res = verify_size_1(N-1, N, NoFs*4, NoEs*4, NoVs+NoEs+NoFs, DevTotal),\n    case Res of\n\tfalse -> \n\t    io:format(\"Can not subdivide, out of memory~n\",[]),\n\t    exit(out_of_memory);\n\t{MaxFs, MaxEs, MaxVs} ->\n\t    {NoFs, NoEs, NoVs, NoFs*4, MaxFs, MaxEs, MaxVs}\n    end.\n\t\nverify_size_1(N, No, Fs, Es, Vs, CardMax) ->\n    VertexSz = (3+3)*4*4,\n    Total = Fs*VertexSz+2*(Fs*16+Es*16+Vs*16),\n    case Total < CardMax of\n\ttrue when N == 0 ->\n\t    {Fs,Es,Vs};\n\ttrue -> \n\t    case verify_size_1(N-1, No, Fs*4, Es*4, Vs+Fs+Es, CardMax) of\n\t\tfalse -> \n\t\t    io:format(\"Out of memory, does not meet the number of sub-division\"\n\t\t\t      \"levels ~p(~p)~n\",[No-N,No]),\n\t\t    {Fs,Es,Vs};\n\t\tOther -> Other\n\t    end;\n\tfalse ->\n\t    false\n    end.\n\n%%%%% OpenGL\n\ninitGL(Canvas) ->\n    {W,H} = wxWindow:getClientSize(Canvas),\n    io:format(\"Size ~p ~n\",[{W,H}]),\n    gl:viewport(0,0,W,H),\n\n    gl:matrixMode(?GL_PROJECTION),\n    gl:loadIdentity(),\n    gl:ortho( -10.0, 10.0, -10.0*H/W, 10.0*H/W, -100.0, 100.0),\n    \n    gl:enable(?GL_DEPTH_TEST),\n    gl:depthFunc(?GL_LESS),\n    gl:clearColor(0.8,0.8,0.8,1.0),\n    gl:shadeModel(?GL_SMOOTH),\n    gl:disable(?GL_CULL_FACE),\n    %% Nowadays you should really use a shader to do the lighting but I'm lazy.\n    gl:enable(?GL_COLOR_MATERIAL),\n    gl:enable(?GL_LIGHTING),\n    gl:lightfv(?GL_LIGHT0, ?GL_DIFFUSE,  {1,1,1,1}), \n    gl:lightfv(?GL_LIGHT0, ?GL_SPECULAR, {0.5,0.5,0.5,1}),\n    gl:lightfv(?GL_LIGHT0, ?GL_POSITION, {0.71,0.71,0.0,0.0}),\n    gl:enable(?GL_LIGHT0),\n    gl:enable(?GL_BLEND),\n    gl:blendFunc(?GL_SRC_ALPHA, ?GL_ONE_MINUS_SRC_ALPHA),\n    ok.\n\n-define(VS, {{ 0.5,  0.5, -0.5},  %1\n\t     { 0.5, -0.5, -0.5},  %2\n\t     {-0.5, -0.5, -0.5},   \n\t     {-0.5,  0.5, -0.5},  %4\n\t     {-0.5,  0.5,  0.5},\n\t     { 0.5,  0.5,  0.5},  %6\n\t     { 0.5, -0.5,  0.5}, \n\t     {-0.5, -0.5,  0.5}}).%8\n\n-define(FS, \n\t%% Faces    Normal   \n\t[{{1,2,3,4},{0,0,-1} },   % \n\t {{3,8,5,4},{-1,0,0}},   %\n\t {{1,6,7,2},{1,0,0} },   %\n\t {{6,5,8,7},{0,0,1} },   %\n\t {{6,1,4,5},{0,1,0} },   %\n\t {{7,8,3,2},{0,-1,0}}]).\n\ndrawBox(Deg) ->\n    gl:matrixMode(?GL_MODELVIEW),\n    gl:loadIdentity(),\n    gl:rotatef(Deg, 0.0, 1.0, 0.3),\n    gl:rotatef(20, 1.0, 0.0, 1.0),\n    gl:'begin'(?GL_QUADS),    \n    lists:foreach(fun(Face) -> drawFace(Face,?VS) end, ?FS),\n    gl:'end'().\n\ndrawFace({{V1,V2,V3,V4},N={N1,N2,N3}}, Cube) ->\n    gl:normal3fv(N),\n    gl:color3f(abs(N1),abs(N2),abs(N3)),\n    gl:texCoord2f(0.0, 1.0), gl:vertex3fv(element(V1, Cube)),\n    gl:texCoord2f(0.0, 0.0), gl:vertex3fv(element(V2, Cube)),\n    gl:texCoord2f(1.0, 0.0), gl:vertex3fv(element(V3, Cube)),\n    gl:texCoord2f(1.0, 1.0), gl:vertex3fv(element(V4, Cube)).\n\nsetup_menus(Frame) ->\n    MenuBar = wxMenuBar:new(),\n    Menu    = wxMenu:new([]),\n    true = wxMenuBar:append(MenuBar, Menu, \"File\"),\n    wxMenu:append(Menu, ?wxID_ABOUT,\"About\"),\n    wxMenu:append(Menu, ?wxID_EXIT, \"Quit\"),\n    \n    ok = wxFrame:connect(Frame, command_menu_selected), \n    ok = wxFrame:setMenuBar(Frame,MenuBar).\n\nabout_box(Frame, #cli{device=Device}) ->\n    Env = wx:get_env(),\n    OsInfo = [wx_misc:getOsDescription(),gl:getString(?GL_VENDOR),\n\t      gl:getString(?GL_RENDERER),gl:getString(?GL_VERSION)],\n\n    DeviceInfo = [{Type, cl:get_device_info(Device, Type)} \n\t\t  || Type <- [name, vendor, version]],\n\n    spawn(fun() ->\n\t\t  wx:set_env(Env),\n\t\t  Str = \"An OpenGL demo showing how to combine \"\n\t\t      \" OpenCL and OpenGL, Catmull-Clark subdivision is done in OpenCL\\n\"\n\t\t      \" The transparent \\\"box\\\" is the original mesh and the subdivided\"\n\t\t      \" yellow pipes is the result of the subdivision\\n\\n\",\n\t\t  \n\t\t  Info = io_lib:format(\"Os:         ~s~n~nGL Vendor:     ~s~n\"\n\t\t\t\t       \"GL Renderer:  ~s~nGL Version:    ~s~n\",\n\t\t\t\t       OsInfo), \n\t\t  CLInfo = [io_lib:format(\"~-25.w   ~s~n\",[Type,I]) ||\n\t\t\t       {Type, {ok, I}} <- DeviceInfo],\n\n\t\t  MD = wxMessageDialog:new(Frame, [Str, Info, \"\\nOpenCL info:\\n\",CLInfo], \n\t\t\t\t\t   [{style, ?wxOK}, \n\t\t\t\t\t    {caption, \"Opengl Example\"}]),\n\t\t  wxDialog:showModal(MD),\n\t\t  wxDialog:destroy(MD)\n\t  end),\n    ok.\n\nfaces() ->\n    <<1,0,0,0,3,0,0,0,2,0,0,0,0,0,0,0,4,0,0,0,5,0,0,0,3,0,0,0,1,0,0,0,6,0,0,0,\n      7,0,0,0,5,0,0,0,4,0,0,0,0,0,0,0,2,0,0,0,7,0,0,0,6,0,0,0,4,0,0,0,9,0,0,0,\n      8,0,0,0,6,0,0,0,10,0,0,0,11,0,0,0,9,0,0,0,4,0,0,0,12,0,0,0,13,0,0,0,11,\n      0,0,0,10,0,0,0,6,0,0,0,8,0,0,0,13,0,0,0,12,0,0,0,1,0,0,0,15,0,0,0,14,0,\n      0,0,4,0,0,0,16,0,0,0,17,0,0,0,15,0,0,0,1,0,0,0,10,0,0,0,18,0,0,0,17,0,0,\n      0,16,0,0,0,4,0,0,0,14,0,0,0,18,0,0,0,10,0,0,0,6,0,0,0,20,0,0,0,19,0,0,0,\n      0,0,0,0,12,0,0,0,21,0,0,0,20,0,0,0,6,0,0,0,22,0,0,0,23,0,0,0,21,0,0,0,12,\n      0,0,0,0,0,0,0,19,0,0,0,23,0,0,0,22,0,0,0,22,0,0,0,25,0,0,0,24,0,0,0,16,0,\n      0,0,12,0,0,0,26,0,0,0,25,0,0,0,22,0,0,0,10,0,0,0,27,0,0,0,26,0,0,0,12,0,\n      0,0,16,0,0,0,24,0,0,0,27,0,0,0,10,0,0,0,0,0,0,0,29,0,0,0,28,0,0,0,1,0,0,\n      0,22,0,0,0,30,0,0,0,29,0,0,0,0,0,0,0,16,0,0,0,31,0,0,0,30,0,0,0,22,0,0,\n      0,1,0,0,0,28,0,0,0,31,0,0,0,16,0,0,0,29,0,0,0,33,0,0,0,32,0,0,0,28,0,0,\n      0,30,0,0,0,34,0,0,0,33,0,0,0,29,0,0,0,31,0,0,0,35,0,0,0,34,0,0,0,30,0,0,\n      0,28,0,0,0,32,0,0,0,35,0,0,0,31,0,0,0,25,0,0,0,37,0,0,0,36,0,0,0,24,0,0,\n      0,26,0,0,0,38,0,0,0,37,0,0,0,25,0,0,0,27,0,0,0,39,0,0,0,38,0,0,0,26,0,0,\n      0,24,0,0,0,36,0,0,0,39,0,0,0,27,0,0,0,20,0,0,0,41,0,0,0,40,0,0,0,19,0,0,\n      0,21,0,0,0,42,0,0,0,41,0,0,0,20,0,0,0,23,0,0,0,43,0,0,0,42,0,0,0,21,0,0,\n      0,19,0,0,0,40,0,0,0,43,0,0,0,23,0,0,0,15,0,0,0,45,0,0,0,44,0,0,0,14,0,0,\n      0,17,0,0,0,46,0,0,0,45,0,0,0,15,0,0,0,18,0,0,0,47,0,0,0,46,0,0,0,17,0,0,\n      0,14,0,0,0,44,0,0,0,47,0,0,0,18,0,0,0,9,0,0,0,49,0,0,0,48,0,0,0,8,0,0,0,\n      11,0,0,0,50,0,0,0,49,0,0,0,9,0,0,0,13,0,0,0,51,0,0,0,50,0,0,0,11,0,0,0,\n      8,0,0,0,48,0,0,0,51,0,0,0,13,0,0,0,3,0,0,0,53,0,0,0,52,0,0,0,2,0,0,0,5,\n      0,0,0,54,0,0,0,53,0,0,0,3,0,0,0,7,0,0,0,55,0,0,0,54,0,0,0,5,0,0,0,2,0,\n      0,0,52,0,0,0,55,0,0,0,7,0,0,0>>.\n\nedges() ->\n    <<1,0,0,0,0,0,0,0,20,0,0,0,0,0,0,0,1,0,0,0,4,0,0,0,1,0,0,0,8,0,0,0,1,0,0,\n      0,16,0,0,0,9,0,0,0,23,0,0,0,0,0,0,0,6,0,0,0,12,0,0,0,3,0,0,0,0,0,0,0,22,\n      0,0,0,21,0,0,0,15,0,0,0,6,0,0,0,4,0,0,0,4,0,0,0,2,0,0,0,6,0,0,0,12,0,0,\n      0,13,0,0,0,7,0,0,0,4,0,0,0,10,0,0,0,5,0,0,0,11,0,0,0,16,0,0,0,22,0,0,0,\n      16,0,0,0,22,0,0,0,16,0,0,0,10,0,0,0,10,0,0,0,19,0,0,0,22,0,0,0,12,0,0,0,\n      17,0,0,0,14,0,0,0,12,0,0,0,10,0,0,0,18,0,0,0,6,0,0,0,3,0,0,0,2,0,0,0,0,\n      0,0,0,44,0,0,0,0,0,0,0,2,0,0,0,3,0,0,0,0,0,0,0,5,0,0,0,3,0,0,0,1,0,0,0,\n      45,0,0,0,1,0,0,0,3,0,0,0,0,0,0,0,1,0,0,0,7,0,0,0,5,0,0,0,2,0,0,0,46,0,0,\n      0,4,0,0,0,5,0,0,0,1,0,0,0,2,0,0,0,2,0,0,0,7,0,0,0,3,0,0,0,47,0,0,0,6,0,0,\n      0,7,0,0,0,2,0,0,0,3,0,0,0,9,0,0,0,8,0,0,0,4,0,0,0,40,0,0,0,6,0,0,0,8,0,0,\n      0,7,0,0,0,4,0,0,0,11,0,0,0,9,0,0,0,5,0,0,0,41,0,0,0,4,0,0,0,9,0,0,0,4,0,\n      0,0,5,0,0,0,13,0,0,0,11,0,0,0,6,0,0,0,42,0,0,0,10,0,0,0,11,0,0,0,5,0,0,0,\n      6,0,0,0,8,0,0,0,13,0,0,0,7,0,0,0,43,0,0,0,12,0,0,0,13,0,0,0,6,0,0,0,7,0,\n      0,0,15,0,0,0,14,0,0,0,8,0,0,0,36,0,0,0,4,0,0,0,14,0,0,0,11,0,0,0,8,0,0,\n      0,17,0,0,0,15,0,0,0,9,0,0,0,37,0,0,0,1,0,0,0,15,0,0,0,8,0,0,0,9,0,0,0,\n      18,0,0,0,17,0,0,0,10,0,0,0,38,0,0,0,16,0,0,0,17,0,0,0,9,0,0,0,10,0,0,0,\n      14,0,0,0,18,0,0,0,11,0,0,0,39,0,0,0,10,0,0,0,18,0,0,0,10,0,0,0,11,0,0,0,\n      20,0,0,0,19,0,0,0,12,0,0,0,32,0,0,0,0,0,0,0,19,0,0,0,15,0,0,0,12,0,0,0,\n      21,0,0,0,20,0,0,0,13,0,0,0,33,0,0,0,6,0,0,0,20,0,0,0,12,0,0,0,13,0,0,0,\n      23,0,0,0,21,0,0,0,14,0,0,0,34,0,0,0,12,0,0,0,21,0,0,0,13,0,0,0,14,0,0,0,\n      19,0,0,0,23,0,0,0,15,0,0,0,35,0,0,0,22,0,0,0,23,0,0,0,14,0,0,0,15,0,0,0,\n      25,0,0,0,24,0,0,0,16,0,0,0,28,0,0,0,16,0,0,0,24,0,0,0,19,0,0,0,16,0,0,0,\n      26,0,0,0,25,0,0,0,17,0,0,0,29,0,0,0,22,0,0,0,25,0,0,0,16,0,0,0,17,0,0,0,\n      27,0,0,0,26,0,0,0,18,0,0,0,30,0,0,0,12,0,0,0,26,0,0,0,17,0,0,0,18,0,0,0,\n      24,0,0,0,27,0,0,0,19,0,0,0,31,0,0,0,10,0,0,0,27,0,0,0,18,0,0,0,19,0,0,0,\n      29,0,0,0,28,0,0,0,20,0,0,0,24,0,0,0,1,0,0,0,28,0,0,0,23,0,0,0,20,0,0,0,\n      30,0,0,0,29,0,0,0,21,0,0,0,25,0,0,0,0,0,0,0,29,0,0,0,20,0,0,0,21,0,0,0,\n      31,0,0,0,30,0,0,0,22,0,0,0,26,0,0,0,22,0,0,0,30,0,0,0,21,0,0,0,22,0,0,0,\n      28,0,0,0,31,0,0,0,23,0,0,0,27,0,0,0,16,0,0,0,31,0,0,0,22,0,0,0,23,0,0,0,\n      222,255,255,255,32,0,0,0,24,0,0,0,255,255,255,255,28,0,0,0,32,0,0,0,27,0,\n      0,0,24,0,0,0,221,255,255,255,33,0,0,0,25,0,0,0,255,255,255,255,29,0,0,0,\n      33,0,0,0,24,0,0,0,25,0,0,0,220,255,255,255,34,0,0,0,26,0,0,0,255,255,255,\n      255,30,0,0,0,34,0,0,0,25,0,0,0,26,0,0,0,223,255,255,255,35,0,0,0,27,0,0,\n      0,255,255,255,255,31,0,0,0,35,0,0,0,26,0,0,0,27,0,0,0,218,255,255,255,\n      36,0,0,0,28,0,0,0,255,255,255,255,24,0,0,0,36,0,0,0,31,0,0,0,28,0,0,0,\n      217,255,255,255,37,0,0,0,29,0,0,0,255,255,255,255,25,0,0,0,37,0,0,0,28,\n      0,0,0,29,0,0,0,216,255,255,255,38,0,0,0,30,0,0,0,255,255,255,255,26,0,0,\n      0,38,0,0,0,29,0,0,0,30,0,0,0,219,255,255,255,39,0,0,0,31,0,0,0,255,255,\n      255,255,27,0,0,0,39,0,0,0,30,0,0,0,31,0,0,0,214,255,255,255,40,0,0,0,32,\n      0,0,0,255,255,255,255,19,0,0,0,40,0,0,0,35,0,0,0,32,0,0,0,213,255,255,\n      255,41,0,0,0,33,0,0,0,255,255,255,255,20,0,0,0,41,0,0,0,32,0,0,0,33,0,\n      0,0,212,255,255,255,42,0,0,0,34,0,0,0,255,255,255,255,21,0,0,0,42,0,0,\n      0,33,0,0,0,34,0,0,0,215,255,255,255,43,0,0,0,35,0,0,0,255,255,255,255,\n      23,0,0,0,43,0,0,0,34,0,0,0,35,0,0,0,210,255,255,255,44,0,0,0,36,0,0,0,\n      255,255,255,255,14,0,0,0,44,0,0,0,39,0,0,0,36,0,0,0,209,255,255,255,45,\n      0,0,0,37,0,0,0,255,255,255,255,15,0,0,0,45,0,0,0,36,0,0,0,37,0,0,0,208,\n      255,255,255,46,0,0,0,38,0,0,0,255,255,255,255,17,0,0,0,46,0,0,0,37,0,0,\n      0,38,0,0,0,211,255,255,255,47,0,0,0,39,0,0,0,255,255,255,255,18,0,0,0,\n      47,0,0,0,38,0,0,0,39,0,0,0,206,255,255,255,48,0,0,0,40,0,0,0,255,255,\n      255,255,8,0,0,0,48,0,0,0,43,0,0,0,40,0,0,0,205,255,255,255,49,0,0,0,41,\n      0,0,0,255,255,255,255,9,0,0,0,49,0,0,0,40,0,0,0,41,0,0,0,204,255,255,\n      255,50,0,0,0,42,0,0,0,255,255,255,255,11,0,0,0,50,0,0,0,41,0,0,0,42,0,\n      0,0,207,255,255,255,51,0,0,0,43,0,0,0,255,255,255,255,13,0,0,0,51,0,0,\n      0,42,0,0,0,43,0,0,0,202,255,255,255,52,0,0,0,44,0,0,0,255,255,255,255,\n      2,0,0,0,52,0,0,0,47,0,0,0,44,0,0,0,201,255,255,255,53,0,0,0,45,0,0,0,\n      255,255,255,255,3,0,0,0,53,0,0,0,44,0,0,0,45,0,0,0,200,255,255,255,54,\n      0,0,0,46,0,0,0,255,255,255,255,5,0,0,0,54,0,0,0,45,0,0,0,46,0,0,0,203,\n      255,255,255,55,0,0,0,47,0,0,0,255,255,255,255,7,0,0,0,55,0,0,0,46,0,0,\n      0,47,0,0,0>>.\n\nverts() -> \n    <<0,0,128,191,0,0,128,63,0,0,128,63,0,0,192,65,0,0,128,191,0,0,128,191,0,\n      0,128,63,0,0,192,65,0,0,128,191,0,0,128,63,205,204,140,63,0,0,128,65,0,\n      0,128,191,0,0,128,191,205,204,140,63,0,0,128,65,0,0,128,63,0,0,128,191,\n      0,0,128,63,0,0,192,65,0,0,128,63,0,0,128,191,205,204,140,63,0,0,128,65,\n      0,0,128,63,0,0,128,63,0,0,128,63,0,0,192,65,0,0,128,63,0,0,128,63,205,\n      204,140,63,0,0,128,65,205,204,140,63,0,0,128,63,0,0,128,63,0,0,128,65,\n      205,204,140,63,0,0,128,191,0,0,128,63,0,0,128,65,0,0,128,63,0,0,128,191,\n      0,0,128,191,0,0,192,65,205,204,140,63,0,0,128,191,0,0,128,191,0,0,128,\n      65,0,0,128,63,0,0,128,63,0,0,128,191,0,0,192,65,205,204,140,63,0,0,128,\n      63,0,0,128,191,0,0,128,65,0,0,128,63,205,204,140,191,0,0,128,63,0,0,128,\n      65,0,0,128,191,205,204,140,191,0,0,128,63,0,0,128,65,0,0,128,191,0,0,\n      128,191,0,0,128,191,0,0,192,65,0,0,128,191,205,204,140,191,0,0,128,191,\n      0,0,128,65,0,0,128,63,205,204,140,191,0,0,128,191,0,0,128,65,0,0,128,\n      191,205,204,140,63,0,0,128,63,0,0,128,65,0,0,128,63,205,204,140,63,0,0,\n      128,63,0,0,128,65,0,0,128,63,205,204,140,63,0,0,128,191,0,0,128,65,0,0,\n      128,191,0,0,128,63,0,0,128,191,0,0,192,65,0,0,128,191,205,204,140,63,0,\n      0,128,191,0,0,128,65,0,0,128,191,0,0,128,191,205,204,140,191,0,0,128,65,\n      0,0,128,191,0,0,128,63,205,204,140,191,0,0,128,65,0,0,128,63,0,0,128,63,\n      205,204,140,191,0,0,128,65,0,0,128,63,0,0,128,191,205,204,140,191,0,0,\n      128,65,205,204,140,191,0,0,128,191,0,0,128,63,0,0,128,65,205,204,140,\n      191,0,0,128,63,0,0,128,63,0,0,128,65,205,204,140,191,0,0,128,63,0,0,128,\n      191,0,0,128,65,205,204,140,191,0,0,128,191,0,0,128,191,0,0,128,65,51,51,\n      163,192,0,0,128,191,0,0,128,63,0,0,96,65,51,51,163,192,0,0,128,63,0,0,\n      128,63,0,0,96,65,51,51,163,192,0,0,128,63,0,0,128,191,0,0,96,65,51,51,\n      163,192,0,0,128,191,0,0,128,191,0,0,96,65,0,0,128,191,0,0,128,191,51,\n      51,163,192,0,0,96,65,0,0,128,191,0,0,128,63,51,51,163,192,0,0,96,65,0,\n      0,128,63,0,0,128,63,51,51,163,192,0,0,96,65,0,0,128,63,0,0,128,191,51,\n      51,163,192,0,0,96,65,0,0,128,191,51,51,163,64,0,0,128,63,0,0,96,65,0,\n      0,128,63,51,51,163,64,0,0,128,63,0,0,96,65,0,0,128,63,51,51,163,64,0,\n      0,128,191,0,0,96,65,0,0,128,191,51,51,163,64,0,0,128,191,0,0,96,65,0,\n      0,128,63,51,51,163,192,0,0,128,63,0,0,96,65,0,0,128,191,51,51,163,192,\n      0,0,128,63,0,0,96,65,0,0,128,191,51,51,163,192,0,0,128,191,0,0,96,65,\n      0,0,128,63,51,51,163,192,0,0,128,191,0,0,96,65,51,51,163,64,0,0,128,\n      63,0,0,128,63,0,0,96,65,51,51,163,64,0,0,128,191,0,0,128,63,0,0,96,\n      65,51,51,163,64,0,0,128,191,0,0,128,191,0,0,96,65,51,51,163,64,0,0,\n      128,63,0,0,128,191,0,0,96,65,0,0,128,191,0,0,128,63,51,51,163,64,0,\n      0,96,65,0,0,128,191,0,0,128,191,51,51,163,64,0,0,96,65,0,0,128,63,\n      0,0,128,191,51,51,163,64,0,0,96,65,0,0,128,63,0,0,128,63,51,51,163,\n      64,0,0,96,65>>.\n"
  },
  {
    "path": "examples/cl_bandwidth.erl",
    "content": "%%\n%% SquareFloat program adpoted from \"Hello World\" OpenCL examples by apple\n%%\n-module(cl_bandwidth).\n\n-compile(export_all).\n\n-import(lists, [map/2]).\n\n-include(\"../include/cl.hrl\").\n\n-define(DATA_SIZE, 1*1024*1024).\n\ntest_data(Length) ->\n    << <<X:32/native-float>> || X <- lists:duplicate(Length, 1) >>.\n\ntest() ->\n    test(all).\n    \ntest(DevType) ->\n    %% Create binary with floating points 1.0 ... 1024.0\n    Data = test_data(?DATA_SIZE),\n    run(Data, DevType).\n\ntest(Length, DevType) when is_number(Length) ->\n    Data = test_data(Length),\n    run(Data, DevType).    \n\n%%\n%% execute a kernel that squares floating point numbers\n%% now only one device is used (We run on cpu for debugging)\n%%\nrun(Data, DevType) ->\n    E = clu:setup(DevType),\n    io:format(\"platform created\\n\"),\n\n    N = byte_size(Data), %% number of bytes in indata\n\n    io:format(\"Testing with byte size: ~p \\n\", [N]),\n\n    %% Create input data memory (implicit copy_host_ptr)\n    {ok,Input} = cl:create_buffer(E#cl.context,[read_only],N),\n    io:format(\"input memory created\\n\"),\n\n    %% Create the command queue for the first device\n    {ok,Queue} = cl:create_queue(E#cl.context,hd(E#cl.devices),[]),\n    io:format(\"queue created\\n\"),\n\n    %% run benchmark on data messuring best write time\n    {WriteTotal, WriteQueueTotal} =\n\twrite_loop(1000, Queue, Input, Data, N),\n\n    io:format(\"Bandwidth tested with write size: ~p bytes\\n\\n\", [N]),\n    \n    io:format(\"Write total milliseconds: ~p\\n\", [WriteTotal]),\n    io:format(\"Bandwidth rate: ~p KB per second\\n\\n\", [trunc((N / (WriteTotal/1000))/1024)]),\n\n    io:format(\"Queue total milliseconds: ~p\\n\", [WriteQueueTotal]),\n    io:format(\"Bandwidth rate: ~p KB per second\\n\\n\", [trunc((N / (WriteQueueTotal/1000))/1024)]),\n    %%\n    cl:release_mem_object(Input),\n    cl:release_queue(Queue),\n\n    clu:teardown(E).\n\nwrite_loop(Max, Queue, Mem, Data, N) ->\n    write_loop(Max, Queue, Mem, Data, N, undefined, 0.0).\n\nwrite_loop(0, _Queue, _Mem, _Data, _N, TBest, TQBest) ->\n    {TBest, TQBest};\nwrite_loop(I, Queue, Mem, Data, N, TBest, TQBest) ->\n    WriteQueueStart = erlang:now(),\n    {ok,E1} = cl:enqueue_write_buffer(Queue, Mem, 0, N, Data, []),\n    WriteQueueEnd = erlang:now(),\n    WQT = timer:now_diff(WriteQueueEnd, WriteQueueStart)/1000,\n\n    WriteStart = erlang:now(),\n    ok = cl:flush(Queue),\n    {ok,completed} = cl:wait(E1),\n    WriteEnd = erlang:now(),\n    WT = timer:now_diff(WriteEnd, WriteStart)/1000,\n    if TBest =:= undefined; WT < TBest ->\n\t    write_loop(I-1, Queue, Mem, Data, N, WT, WQT);\n       true ->\n\t    write_loop(I-1, Queue, Mem, Data, N, TBest, TQBest)\n    end.\n\n\t    \n\t    \n\n\n    \n    \n"
  },
  {
    "path": "examples/cl_compile.erl",
    "content": "%%% @author Tony Rogvall <tony@rogvall.se>\n%%% @copyright (C) 2014, Tony Rogvall\n%%% @doc\n%%%    A opencl compiler wrapper\n%%% @end\n%%% Created :  9 May 2014 by Tony Rogvall <tony@rogvall.se>\n\n-module(cl_compile).\n\n-compile(export_all).\n\n%% compile File into binary,\n\nfile(File) ->  file(File,all).\nfile(File,Type) ->\n    case lists:member({1,2}, cl:versions()) of\n\ttrue ->\n\t    file(File,Type,\"-cl-kernel-arg-info\");\n\tfalse ->\n\t    file(File,Type,\"\")\n    end.\n\nfile(File,Type,Options) ->\n    Clu = clu:setup(Type),\n    case clu:build_source_file(Clu, File, Options) of\n\tErr = {error,_} ->\n\t    Err;\n\t{ok,Program} ->\n\t    info(Program)\n    end.\n\ninfo(Program) ->\n    {ok,Ds} = cl:get_program_info(Program, devices),\n    {ok,Bs} = cl:get_program_info(Program, binaries),\n    lists:foreach(fun(Device) -> build_info(Program, Device)  end, Ds),\n    program_info(Program),\n    {ok,Kernels} = cl:create_kernels_in_program(Program),\n    lists:foreach(\n      fun(Kernel) ->\n\t      {ok,KernelInfo} = cl:get_kernel_info(Kernel),\n\t      io:format(\"KernelInfo: ~p\\n\", [KernelInfo]),\n\t      lists:foreach(\n\t\tfun(Device) ->\n\t\t\t{ok,I}=cl:get_kernel_workgroup_info(Kernel,Device),\n\t\t\tio:format(\"KernelWorkGroupInfo: ~p\\n\", [I])\n\t\tend, Ds),\n\t      case lists:member({1,2}, cl:versions()) of\n\t\t  true ->\n\t\t      {ok,ArgInfo} = cl:get_kernel_arg_info(Kernel),\n\t\t      io:format(\"arg_info: ~p\\n\", [ArgInfo]);\n\t\t  false ->\n\t\t      ok\n\t      end\n      end, Kernels),\n    {ok,Bs}.\n\n\nprogram_info(Program) ->\n    io:format(\"ProgramInfo:\\n\", []),\n    lists:foreach(\n      fun(Attr) ->\n\t      case cl:get_program_info(Program,Attr) of\n\t\t  {ok,Value} ->\n\t\t      io:format(\"  ~s: ~p\\n\", [Attr,Value]);\n\t\t  {error,Reason} ->\n\t\t      io:format(\"InfoError: ~s [~p]\\n\", \n\t\t\t\t[Attr,Reason])\n\t      end\n      end, cl:program_info()).\n\nbuild_info(Program, Device) ->\n    io:format(\"BuildInfo @ ~w\\n\", [Device]),\n    {ok,BuildInfo} = cl:get_program_build_info(Program,Device),\n    lists:foreach(\n      fun({Attr,Value}) ->\n\t      io:format(\"  ~s: ~p\\n\", [Attr,Value])\n      end, BuildInfo),\n    case lists:member({1,2}, cl:versions()) of\n\ttrue ->\n\t    %% fixme: version handle program_build_info \n\t    case cl:get_program_build_info(Program,Device,binary_type) of\n\t\t{ok,BinaryInfo} ->\n\t\t    io:format(\"  ~s: ~p\\n\", [binary_type,BinaryInfo]);\n\t\t{error,Reason} ->\n\t\t    io:format(\"InfoError: ~s [~p]\\n\", \n\t\t\t      [binary_type,Reason])\n\t    end;\n\tfalse ->\n\t    ok\n    end.\n\n%% compile & link with openCL version 1.2\n\ninc1() -> \"\n#define FOO 5\n\".\n\ninc2() -> \"\n#define BAR 7\n\".\n\nprog1() -> \"\n#include \\\"inc1.h\\\"\\n\n#include \\\"inc2.h\\\"\\n\n\n__kernel void sum(int x, int y, __global int* z)\n{\n  int i = get_global_id(0);\n  z[i] = x + y + FOO + BAR + BAZ;\n}\n\".\n\nprog2() -> \"\n#define FOO 5\n#define BAR 7\n\n__kernel void prod(int x, int y, __global int* z)\n{\n  int i = get_global_id(0);\n  z[i] = x*y*FOO*BAR + BAZ;\n}\n\".\n\nmake_prog(Clu,prog1) ->\n    {ok,Program} = cl:create_program_with_source(clu:context(Clu), prog1()),\n    {ok,Inc1} = cl:create_program_with_source(clu:context(Clu), inc1()),\n    {ok,Inc2} = cl:create_program_with_source(clu:context(Clu), inc2()),\n    {Program, [Inc1,Inc2], [\"inc1.h\", \"inc2.h\"]};\nmake_prog(Clu,prog2) ->\n    {ok,Program} = cl:create_program_with_source(clu:context(Clu), prog2()),\n    {Program, [], []}.\n\n%% MackBookPro, mac os x 10.9 with GEForce 9400M test_12(gpu,prog1)\n%% fail with an error saying that the compiler can not find include\n%% files 'inc1.h'\ntest_12() ->\n    test_12(prog1, cpu).\n\ntest_12(Prog, Type) ->\n    true = lists:member({1,2}, cl:versions()),\n    Clu = clu:setup(Type),\n    compile_12(Clu, Prog).\n\ncompile_12(Clu, Prog) ->\n    {Program,Includes,IncludeNames} = make_prog(Clu,Prog),\n    Ds = clu:device_list(Clu),\n    case cl:compile_program(Program,Ds,\"-DBAZ=11\",\n\t\t\t    Includes, IncludeNames) of\n\tok ->\n\t    Status = [get_build_status(Program, Dev) || Dev <- Ds],\n\t    case lists:any(fun(success) -> true;\n\t\t\t      (_) -> false end, Status) of\n\t\ttrue ->\n\t\t    {ok,Program};\n\t\tfalse ->\n\t\t    Logs = get_program_logs(Program),\n\t\t    io:format(\"Logs: ~s\\n\", [Logs]),\n\t\t    {error,{Status,Logs}}\n\t    end;\n\tError ->\n\t    Logs = get_program_logs(Program),\n\t    io:format(\"Logs: ~s\\n\", [Logs]),\n\t    cl:release_program(Program),\n\t    {error,{Error,Logs}}\n    end.\n\nlink_12(Type) ->\n    link_12(prog1,Type).\n\nlink_12(Prog,Type) ->\n    true = lists:member({1,2}, cl:versions()),\n    Clu = clu:setup(Type),\n    {ok,Prog1} = compile_12(Clu, Prog),\n    io:format(\"Prog1 = ~p\\n\", [Prog1]),\n%%    {ok,Prog2} = compile_12(Clu, prog2),\n%%    io:format(\"Prog2 = ~p\\n\", [Prog2]),\n    case cl:link_program(clu:context(Clu),\n\t\t\t clu:device_list(Clu),\n\t\t\t \"\",\n\t\t\t [Prog1]) of\n\t{ok, Program} ->\n\t    %% check status & logs\n\t    get_program_binaries(Program);\n\tError ->\n\t    Error\n    end.\n\nget_build_status(Program, Device) ->\n    {ok,Status} = cl:get_program_build_info(Program, Device, status),\n    {ok,BinaryType} = cl:get_program_build_info(Program, Device, binary_type),\n    io:format(\"status: ~p, binary_type=~p\\n\", [Status, BinaryType]),\n    Status.\n\nget_program_logs(Program) ->\n    {ok,DeviceList} = cl:get_program_info(Program, devices),\n    lists:map(\n      fun(Device) ->\n\t      {ok,Log} = cl:get_program_build_info(Program,Device,log),\n\t      Log\n      end, DeviceList).\n\nget_program_binaries(Program) ->\n    {ok,DeviceList} = cl:get_program_info(Program, devices),\n    {ok,BinaryList} = cl:get_program_info(Program, binaries),\n    {ok,{DeviceList, BinaryList}}.\n"
  },
  {
    "path": "examples/cl_map.erl",
    "content": "-module(cl_map).\n\n-include_lib(\"cl/include/cl.hrl\").\n\n-compile(export_all).\n-import(lists, [map/2, foreach/2, foldl/3]).\n\n-record(kwork,\n\t{\n\t  queue,   %% the queue\n\t  local,   %% kernel work_group_size\n\t  freq,    %% device max_clock_frequenct\n\t  units,   %% device max_compute_units\n\t  weight,  %% weight [0..1]\n\t  e1,e2,e3, %% events (fixme)\n\t  imem,    %% input memory object\n\t  omem,    %% output memory object\n\t  isize,   %% item size\n\t  idata    %% input data\n\t }).\n\ntest() ->\n    Args = << <<X:32/native-float>> || X <- lists:seq(1, 1024) >>,\n    ResultList = run(\"fun(<<X/cl_float>>) -> X*X+1 end\", Args),\n    lists:flatmap(\n      fun(Result) ->\n\t      [ X || <<X:32/native-float>> <= Result ]\n      end, ResultList).\n\t\n%% \n%% Run a map operation over data\n%% Restrictions: the output must currently equal the size of\n%%\n%% \nrun(Function, Data) ->\n    E = clu:setup(all),  %% gpu needs more work\n    {_NArgs,ItemSize,Source} = p_program(Function),\n    io:format(\"Program:\\n~s\\n\", [Source]),\n    {ok,Program} = clu:build_source(E, Source),\n    {ok,Kernel} = cl:create_kernel(Program, \"example\"),\n\n    Kws =\n\tmap(\n\t  fun(Device) ->\n\t\t  {ok,Queue} = cl:create_queue(E#cl.context,Device,[]),\n\t\t  {ok,Local} = cl:get_kernel_workgroup_info(Kernel,Device,\n\t\t\t\t\t\t\t    work_group_size),\n\t\t  {ok,Freq} = cl:get_device_info(Device,max_clock_frequency),\n\t\t  {ok,K} = cl:get_device_info(Device, max_compute_units),\n\t\t  #kwork{ queue=Queue, local=Local, freq=Freq, units=K,\n\t\t\t  isize=ItemSize }\n\t  end, E#cl.devices),\n    io:format(\"Kws = ~p\\n\", [Kws]),\n\n    %% Sum the weights and scale to [0..1]\n    Tw = foldl(fun(K,Sum) -> Sum + K#kwork.freq*K#kwork.units end,\n\t       0, Kws),\n    Kws1 = map(fun(K) ->\n\t\t       K#kwork { weight = (K#kwork.freq*K#kwork.units)/Tw }\n\t       end, Kws),\n    io:format(\"Kws1 = ~p\\n\", [Kws1]),\n    \n    %% Split data according to Weights but start with data\n    %% That have hard requirements on work_group_size\n    Kws11 = lists:reverse(lists:keysort(#kwork.local,Kws1)),\n    Kws2 = kwork_set_data(Kws11,  Data),\n    io:format(\"Kws2 = ~p\\n\", [Kws2]),\n\n    %% Create memory objects\n    Kws3 = map(\n\t     fun(K) ->\n\t\t     Nk = byte_size(K#kwork.idata),\n\t\t     {ok,I}  = cl:create_buffer(E#cl.context,[read_only],Nk),\n\t\t     {ok,O} = cl:create_buffer(E#cl.context,[write_only],Nk),\n\t\t     K#kwork { imem=I, omem=O }\n\t     end, Kws2),\n    io:format(\"Kws3 = ~p\\n\", [Kws3]),\n    \n    %% Enque input data\n    Kws4 = map(\n\tfun(K) ->\n\t\tNk = byte_size(K#kwork.idata),\n\t\tCount = Nk div K#kwork.isize,\n\t\t{ok,E1} = cl:enqueue_write_buffer(K#kwork.queue,\n\t\t\t\t\t\t  K#kwork.imem, \n\t\t\t\t\t\t  0, Nk, \n\t\t\t\t\t\t  K#kwork.idata, []),\n\t\t%% Set kernel arguments\n\t\tok = cl:set_kernel_arg(Kernel, 0, K#kwork.imem),\n\t\tok = cl:set_kernel_arg(Kernel, 1, K#kwork.omem),\n\t\tok = cl:set_kernel_arg(Kernel, 2, Count),\n\t      \n\t\t%% Enqueue the kernel\n\t\tGlobal = Count,\n\t\tio:format(\"Global=~w, Local=~w\\n\", [Global,K#kwork.local]),\n\t\t{ok,E2} = cl:enqueue_nd_range_kernel(K#kwork.queue,\n\t\t\t\t\t\t     Kernel,\n\t\t\t\t\t\t     [Global], [K#kwork.local],\n\t\t\t\t\t\t     [E1]),\n\t\t%% Enqueue the read from device memory (wait for kernel to finish)\n\t\t{ok,E3} = cl:enqueue_read_buffer(K#kwork.queue,\n\t\t\t\t\t\t K#kwork.omem,0,Nk,[E2]),\n\t\t%% Now flush the queue to make things happend \n\t\tok = cl:flush(K#kwork.queue),\n\t\t%% FIXME: here we should release E1,E2\n\t\tK#kwork { e1=E1,e2=E2,e3=E3 }\n\tend, Kws3),\n    io:format(\"Kws4 = ~p\\n\", [Kws4]),\n\n    %% Wait for Result buffer to be written\n    Bs = map(\n\t   fun(K) ->\n\t\t   io:format(\"E1 = ~p\\n\", [cl:wait(K#kwork.e1)]),\n\t\t   io:format(\"E2 = ~p\\n\", [cl:wait(K#kwork.e2)]),\n\t\t   {ok,Bin} = cl:wait(K#kwork.e3),\n\t\t   cl:release_mem_object(K#kwork.imem),\n\t\t   cl:release_mem_object(K#kwork.omem),\n\t\t   cl:release_queue(K#kwork.queue),\n\t\t   %% Release built into cl:wait!\n\t\t   %% cl:release_event(K#kwork.e1),\n\t\t   %% cl:release_event(K#kwork.e2),\n\t\t   %% cl:release_event(K#kwork.e3),\n\t\t   Bin\n\t   end, Kws4),\n    \n\n    cl:release_kernel(Kernel),\n    cl:release_program(Program),\n    clu:teardown(E),\n    Bs.\n%%\n%% Assume at least one kwork\n%% Data must be a multiple of local (work_group_size)\n%% FIXME: This must be reworked to handle all cases\n%%\nkwork_set_data([K], Data) ->\n    [K#kwork { idata = Data }];\nkwork_set_data([K|Ks], Data) ->\n    N = byte_size(Data) div K#kwork.isize,\n    M = trunc(K#kwork.weight * N),  %% make a multiple of local\n    L = K#kwork.local,\n    R = ((L - (M rem L)) rem L),\n    ML = M + R,\n    io:format(\"N=~w, M=~w, L=~w, R=~w, ML=~w\\n\", [N,M,L,R,ML]),\n    if ML =< N ->\n\t    Md = ML*K#kwork.isize,\n\t    <<Data1:Md/binary, Data2/binary>> = Data,\n\t    [K#kwork { idata = Data1 } | kwork_set_data(Ks, Data2)];\n       true ->\n\t    Rd = R*K#kwork.isize,\n\t    [K#kwork { idata = <<Data/binary, 0:Rd/unit:8>> } | Ks]\n    end.\n    \n%%\n%% Function:\n%%     fun(<<X:32/T>>,P1,..,Pn) -> \n%%         F(X,P1,...Pn)\n%%\n%% Translates to\n%%     __kernel main(__global T0* input, __global T0* output,\n%%                   const unsigned int item_count,\n%%                   T1 p1, T2 p2 .. Tn Pn)\n%%     {\n%%         int i = get_global_id(0);\n%%         if (i < item_count) {\n%%             output[i] = F(input[i],p1,..Pn)\n%%         }\n%%     }\n%%\n%%\n%%\np_program(Function) ->\n    case erl_scan:string(Function) of\n\t{ok,Ts,_Ln} ->\n\t    case erl_parse:parse_exprs(add_dot(Ts)) of\n\t\t{ok, Exprs} ->\n\t\t    p_fun(Exprs);\n\t\tError ->\n\t\t    Error\n\t    end;\n\tError ->\n\t    Error\n    end.\n\nadd_dot(Ts) ->\n    case lists:last(Ts) of\n\t{dot,_} -> Ts;\n\tE -> \n\t    Ts ++ [{dot,element(2,E)}]\n    end.\n\t    \n\np_fun([{'fun',_Ln1,{clauses,[{clause,_Ln3,H,[],B}]}}]) ->\n    As = p_header(H),\n    NArgs = length(As),\n    {_MainVar,MainType} = hd(As),\n    ItemSize = sizeof(MainType),\n    {NArgs,ItemSize,\n     lists:flatten([g_header(As), g_body(As,B)])};\np_fun(Fs) ->\n    io:format(\"Fs=~p\\n\", [Fs]),\n    erlang:error(not_supported).\n\np_header(Params) ->\n    map(fun p_arg/1, Params).\n\ng_header([{V,T}|Ps]) ->\n    [\"__kernel void example(\",\n     \"__global \", g_type(T), \"*\", \"in\", \",\",\n     \"__global \", g_type(T), \"*\", \"out\",\",\",\n     \"const uint n\",\n     map(fun({X,Tx}) ->\n\t\t [\",\", \"const \", g_type(Tx), \" \",\n\t\t  atom_to_list(X)]\n\t end, Ps),\n     \")\\n\",\n     \"{\",\n     \"  int i = get_global_id(0);\\n\",\n     \"  if (i < n) {\\n\"\n     \"  \", g_type(T), \" \", atom_to_list(V), \"= in[i];\\n\"\n    ].\n\ng_body(Vs,[E]) ->\n    [\"out[i] = \", p_expr(Vs, E),\";\\n\",\n     \"  }\\n\",\n     \"}\\n\"];\ng_body(Vs,[E|Es]) ->\n    [p_expr(Vs,E),\";\\n\",\n     g_body(Vs, Es)];\ng_body(_Vs,[]) ->\n    [\"  }\\n\",\n     \"}\\n\"].\n\np_arg({bin,_,[{bin_element,_,{var,_,V},Size,[Type]}]}) ->\n    S = t_vector_size(Size),\n    T = t_type(S,Type),\n    {V,T}.\n\np_expr(Vs, {var,_,V}) ->\n    true = lists:keymember(V, 1, Vs),\n    [atom_to_list(V)];\np_expr(_Vs, {integer,_,I}) ->\n    [integer_to_list(I)];\np_expr(_Vs, {float,_,F}) ->\n    io_lib:format(\"~f\", [F]);\np_expr(Vs, {op,_Ln,Op,L,R}) ->\n    [p_expr(Vs,L),atom_to_list(Op),p_expr(Vs,R)];\np_expr(Vs, {op,_Ln,Op,M}) ->\n    [atom_to_list(Op),p_expr(Vs,M)];\np_expr(Vs, {match,_Ln,L,R}) ->\n    [p_expr(Vs,L),\"=\",p_expr(Vs,R)];\np_expr(Vs, {record_field,_Ln,{var,_,V},{atom,_,Selector}}) ->\n    true = lists:keymember(V, 1, Vs),\n    [atom_to_list(V),\".\",atom_to_list(Selector)];\np_expr(Vs, {record_field,_Ln,Expr,{atom,_,Selector}}) ->\n    E = p_expr(Vs, Expr),\n    %% fixme: normalize vector selector and check that\n    %% the permutation is valid.\n    [E,\".\",atom_to_list(Selector)];\np_expr(Vs, {call,_Ln,{atom,_,F},As}) ->\n    Ps = map(fun(A) -> p_expr(Vs, A) end, As),\n    [atom_to_list(F),\"(\", g_args(Ps), \")\"].\n\n\nt_vector_size(default) ->\n    default;\nt_vector_size({integer,_,Sz}) ->\n    Sz.\n\ng_args([]) -> [];\ng_args([A]) ->  [A];\ng_args([A|As]) ->  [A,\",\" | g_args(As)].\n\ng_type({T,S}) when is_atom(T), is_integer(S) ->\n    [atom_to_list(T),integer_to_list(T)];\ng_type(T) when is_atom(T) ->\n    [atom_to_list(T)].\n\n%% size scalar type\nsizeof('char') -> 1;\nsizeof('uchar') -> 1;\nsizeof('short') -> 2;\nsizeof('ushort') -> 2;\nsizeof('int') -> 4;\nsizeof('uint') -> 4;\nsizeof('long') -> 8;\nsizeof('ulong') -> 8;\nsizeof('float') -> 4;\nsizeof('half') -> 2;\nsizeof({T,default}) -> sizeof(T);\nsizeof({T,S}) -> S*sizeof(T).\n\n%% scalar types (api -> opencl)\nt_type(Size,Type) ->\n    Scalar = t_type(Type),\n    if Size == default -> Scalar;\n       Size == 1 -> Scalar;\n       Scalar == 'half' ->\n\t    erlang:error({bad_vector_type,Scalar,Size});\n       Size == 2 -> {Scalar,2};\n       Size == 4 -> {Scalar,4};\n       Size == 8 -> {Scalar,8};\n       Size == 16 -> {Scalar,16};\n       true -> erlang:error({bad_vector_type,Scalar,Size})\n    end.\n    \nt_type(cl_char)   -> 'char';\nt_type(cl_uchar)  -> 'uchar';\nt_type(cl_short)  -> 'short';\nt_type(cl_ushort) -> 'ushort';\nt_type(cl_int)    -> 'int';\nt_type(cl_uint)   -> 'uint';\nt_type(cl_long)   -> 'long';\nt_type(cl_ulong)  -> 'ulong';\nt_type(cl_float)  -> 'float';\nt_type(cl_half)   -> 'half';\nt_type(T) ->\n    erlang:error({bad_type,T}).\n\n"
  },
  {
    "path": "examples/cl_mul.erl",
    "content": "%%% File    : cl_mul.erl\n%%% Author  : Tony Rogvall <tony@rogvall.se>\n%%% Description : Multiply matrix with list of matrices\n%%% Created : 16 Nov 2009 by Tony Rogvall <tony@rogvall.se>\n\n-module(cl_mul).\n\n-compile(export_all).\n\n-import(lists, [map/2]).\n\n-include(\"../include/cl.hrl\").\n\n-define(DATA_SIZE, 1024).\n-define(ITEM_SIZE, (16*4)).\n\nencode_matrix({float16,M}) -> encode_matrix(M);\nencode_matrix({ X1, X2, X3, X4\n               , X5, X6, X7, X8\n               , X9, X10,X11,X12\n               , X13,X14,X15,X16}) ->\n    <<?cl_float16( X1, X2, X3, X4\n                 , X5, X6, X7, X8\n                 , X9, X10,X11,X12\n                 , X13,X14,X15,X16)>>.\n\ndecode_matrix(Data) ->\n    case Data of\n    <<\n     ?cl_float(A11), ?cl_float(A12), ?cl_float(A13), ?cl_float(A14),\n     ?cl_float(A21), ?cl_float(A22), ?cl_float(A23), ?cl_float(A24),\n     ?cl_float(A31), ?cl_float(A32), ?cl_float(A33), ?cl_float(A34),\n     ?cl_float(A41), ?cl_float(A42), ?cl_float(A43), ?cl_float(A44),\n     Rest/binary\n     >> ->\n\t    [{A11,A12,A13,A14,\n\t      A21,A22,A23,A24,\n\t      A31,A32,A33,A34,\n\t      A41,A42,A43,A44} | decode_matrix(Rest)];\n\t<<>> ->\n\t    []\n    end.\n\nid_matrix() ->\n    {float16,{1,0,0,0,\n\t      0,1,0,0,\n\t      0,0,1,0,\n\t      0,0,0,1}}.\n\nzero_matrix() ->\n    {float16,{0,0,0,0,\n\t      0,0,0,0,\n\t      0,0,0,0,\n\t      0,0,0,0}}.\n\nr() -> random:uniform().\n\nrandom_matrices(N) ->\n    list_to_binary(\n      lists:map(\n\tfun(_I) ->\n\t\tM = {r(),r(),r(),r(),\n\t\t     r(),r(),r(),r(),\n\t\t     r(),r(),r(),r(),\n\t\t     r(),r(),r(),r()},\n\t\tencode_matrix(M)\n\tend, lists:seq(1, N))).\n\ntest_data() ->\n    random_matrices(4).\n\ndump_data(Bin) ->\n    io:format(\"data=~p\\n\", [decode_matrix(Bin)]).\n\ntest() ->\n    test(all).\n    \ntest(DevType) ->\n    %% Create binary with floating points 1.0 ... 1024.0\n    Data = test_data(),\n    run(Data, DevType).\n\nexamples_dir() ->\n    filename:join(code:lib_dir(cl), \"examples\").\n\n%%\n%% execute a kernel that squares floating point numbers\n%% now only one device is used (We run on cpu for debugging)\n%%\nrun(Data, DevType) ->\n    E = clu:setup(DevType),\n    io:format(\"platform created\\n\"),\n    \n    Filename = filename:join(examples_dir(),\"mul4x4.cl\"),\n    io:format(\"build: ~s\\n\", [Filename]),\n    {ok,Program} = clu:build_source_file(E, Filename),\n    io:format(\"program built\\n\"),\n\n    N = byte_size(Data),       %% number of bytes in indata\n    Count = N div ?ITEM_SIZE,  %% number of matrices in indata\n\n    %% Create input data memory (implicit copy_host_ptr)\n    {ok,Input} = cl:create_buffer(E#cl.context,[read_only],N),\n    io:format(\"input memory created\\n\"),\n\n    %% Create the output memory\n    {ok,Output} = cl:create_buffer(E#cl.context,[write_only],N),\n    io:format(\"output memory created\\n\"),\n\n    %% Create the command queue for the first device\n    {ok,Queue} = cl:create_queue(E#cl.context,hd(E#cl.devices),[]),\n    io:format(\"queue created\\n\"),\n\n    %% Create the squre kernel object\n    {ok,Kernel} = cl:create_kernel(Program, \"mul4x4\"),\n    io:format(\"kernel created: ~p\\n\", [Kernel]),\n\n    dump_data(Data),\n\n    %% Write data into input array \n    {ok,Event1} = cl:enqueue_write_buffer(Queue, Input, 0, N, Data, []),\n    io:format(\"write data enqueued\\n\"),\n    erlang:display_string(\"enqueu write\\n\"),\n\n    %% Set kernel arguments\n    clu:apply_kernel_args(Kernel, [Input,Output,encode_matrix(id_matrix()),{uint,Count}]),\n    io:format(\"kernel args set\\n\"),\n\n    Device = hd(E#cl.devices),\n    {ok,Local} = cl:get_kernel_workgroup_info(Kernel, Device, work_group_size),\n    io:format(\"work_group_size = ~p\\n\", [Local]),\n\n    %% Enqueue the kernel\n    Global = Count,\n    if Local > Count ->  LocalWork = Count;\n       true ->   \t LocalWork = Local\n    end,\n    {ok,Event2} = cl:enqueue_nd_range_kernel(Queue, Kernel,\n\t\t\t\t\t     [Global], [LocalWork], [Event1]),\n    io:format(\"nd range [~w, ~w] kernel enqueued\\n\",\n\t      [[Global],[LocalWork]]),\n    \n    %% Enqueue the read from device memory (wait for kernel to finish)\n    {ok,Event3} = cl:enqueue_read_buffer(Queue,Output,0,N,[Event2]),\n    io:format(\"read buffer enqueued\\n\"),\n\n    %% Now flush the queue to make things happend \n    ok = cl:flush(Queue),\n    io:format(\"flushed\\n\"),\n\n    %% Wait for Result buffer to be written\n    io:format(\"wait\\n\"),\n    io:format(\"Event1 = ~p\\n\", [cl:wait(Event1,1000)]),\n    io:format(\"Event2 = ~p\\n\", [cl:wait(Event2,1000)]),\n    Event3Res = cl:wait(Event3,1000),\n    io:format(\"Event3 = ~p\\n\", [Event3Res]),\n\n    %%\n    cl:release_mem_object(Input),\n    cl:release_mem_object(Output),\n    cl:release_queue(Queue),\n    cl:release_kernel(Kernel),\n    cl:release_program(Program),\n\n    clu:teardown(E),\n    case Event3Res of\n\t{ok,ResData} ->\n\t    dump_data(ResData);\n\t_ ->\n\t    ok\n    end,\n    Event3Res.\n"
  },
  {
    "path": "examples/cl_square_float.erl",
    "content": "%%\n%% SquareFloat program adpoted from \"Hello World\" OpenCL examples by apple\n%%\n-module(cl_square_float).\n\n-compile(export_all).\n\n-import(lists, [map/2]).\n\n-include(\"../include/cl.hrl\").\n\n-define(DATA_SIZE, 1024).\n\nsource() ->\n\"\n__kernel void square( __global float* input, \n                      __global float* output,\n                      const unsigned int count)\n{\n   int i = get_global_id(0);\n   if (i < count)\n      output[i] = input[i]*input[i];\n}\n\".\n\ntest_data() ->\n    << <<X:32/native-float>> || X <- lists:seq(1,?DATA_SIZE) >>.\n\ndump_data(Bin) ->\n    io:format(\"data=~p\\n\", [[ X || <<X:32/native-float>> <= Bin ]]).\n\ntest() ->\n    test(all).\n    \ntest(DevType) ->\n    %% Create binary with floating points 1.0 ... 1024.0\n    Data = test_data(),\n    run(Data, DevType).\n\n%%\n%% execute a kernel that squares floating point numbers\n%% now only one device is used (We run on cpu for debugging)\n%%\nrun(Data, DevType) ->\n    E = clu:setup(DevType),\n    io:format(\"platform created\\n\"),\n    {ok,Program} = clu:build_source(E, source()),\n    io:format(\"program built\\n\"),\n\n    N = byte_size(Data), %% number of bytes in indata\n    Count = N div 4,     %% number of floats in indata\n\n    %% Create input data memory (implicit copy_host_ptr)\n    {ok,Input} = cl:create_buffer(E#cl.context,[read_only],N),\n    io:format(\"input memory created\\n\"),\n\n    %% Create the output memory\n    {ok,Output} = cl:create_buffer(E#cl.context,[write_only],N),\n    io:format(\"output memory created\\n\"),\n\n    %% Create the command queue for the first device\n    {ok,Queue} = cl:create_queue(E#cl.context,hd(E#cl.devices),[]),\n    io:format(\"queue created\\n\"),\n\n    %% Create the squre kernel object\n    {ok,Kernel} = cl:create_kernel(Program, \"square\"),\n    io:format(\"kernel created: ~p\\n\", [Kernel]),\n\n    clu:apply_kernel_args(Kernel, [Input, Output, Count]),\n    io:format(\"kernel args set\\n\"),\n\n    %% Write data into input array \n    {ok,Event1} = cl:enqueue_write_buffer(Queue, Input, 0, N, Data, []),\n    io:format(\"write data enqueued\\n\"),\n    erlang:display_string(\"enqueu write\\n\"),\n\n    Device = hd(E#cl.devices),\n    {ok,Local} = cl:get_kernel_workgroup_info(Kernel, Device, work_group_size),\n    io:format(\"work_group_size = ~p\\n\", [Local]),\n\n    %% Enqueue the kernel\n    Global = Count,\n    {ok,Event2} = cl:enqueue_nd_range_kernel(Queue, Kernel,\n\t\t\t\t\t     [Global], [Local], [Event1]),\n    io:format(\"nd range [~p, ~p] kernel enqueued\\n\",\n\t      [[Global],[Local]]),\n    \n    %% Enqueue the read from device memory (wait for kernel to finish)\n    {ok,Event3} = cl:enqueue_read_buffer(Queue,Output,0,N,[Event2]),\n    io:format(\"read buffer enqueued\\n\"),\n\n    %% Now flush the queue to make things happend \n    ok = cl:flush(Queue),\n    io:format(\"flushed\\n\"),\n\n    %% Wait for Result buffer to be written\n    io:format(\"wait\\n\"),\n    io:format(\"Event1 = ~p\\n\", [cl:wait(Event1)]),\n    io:format(\"Event2 = ~p\\n\", [cl:wait(Event2)]),\n    Event3Res = cl:wait(Event3),\n    io:format(\"Event3 = ~p\\n\", [Event3Res]),\n\n    %%\n    cl:release_mem_object(Input),\n    cl:release_mem_object(Output),\n    cl:release_queue(Queue),\n    cl:release_kernel(Kernel),\n    cl:release_program(Program),\n\n    clu:teardown(E),\n    {ok,EventResData} = Event3Res,\n    dump_data(EventResData).\n"
  },
  {
    "path": "examples/mul4x4.cl",
    "content": "//\n// Multiply count 4x4 matrices with a constant matrix\n//\n\n__kernel void mul4x4(__global float* input,\n\t\t     __global float* output,\n\t\t     const float16 aa,\n\t\t     const unsigned int count)\n{\n    size_t ix;\n    __global float* b;\n    __global float* c;\n    float *a = (float*)&aa;\n    \n    ix = get_global_id(0);\n    if (ix < count) {\n        int i,j,k;\n\n\tb = input  + ix*16;\n\tc = output + ix*16;\n\n\tfor (i=0; i<4; i++) {\n\t    for (j=0; j<4; j++) {\n\t        float s1 = 0.0f;\n\t\tfor (k=0; k<4; k++) {\n\t\t    float t1 = a[4*i+k];\n\t\t    float t2 = b[4*k+j];\n\t\t    s1 += (t1*t2);\n\t\t}\n    \t\tc[4*i+j] = s1;\n\t    }\n\t}\n\t\n    }\n}\n\n\n\t \n"
  },
  {
    "path": "examples/z2.cl",
    "content": "//\n// Calculate mandelbrot\n// f(0) = x+yi\n// f(n) = f(n)^2 + c\n//\n\n__kernel void z2(const float x, const float y, \n\t\t const float xs, const float ys, \n\t\t const unsigned int n,\n\t\t __global unsigned int* out)\n{\n    int i = get_global_id(0);\n    int j = get_global_id(1);\n    if ((i < n) && (j < n)) {\n\tint k = 0;\n\tfloat cx = x + i*xs;\n\tfloat cy = y + j*ys;\n\tfloat a = 0, b = 0;\n\tfloat a2 = 0, b2 = 0;\n\n\twhile ((k < n) && ((a2 + b2) < 4)) {\n\t    a = a2-b2 + cx;\n\t    b = 2*a*b + cy;\n\t    a2 = a*a;\n\t    b2 = b*b;\n\t    k++;\n\t}\n\tout[i*n + j] = k;\n    }\n}\n"
  },
  {
    "path": "include/cl.hrl",
    "content": "%%%---- BEGIN COPYRIGHT -------------------------------------------------------\n%%%\n%%% Copyright (C) 2007 - 2012, Rogvall Invest AB, <tony@rogvall.se>\n%%%\n%%% This software is licensed as described in the file COPYRIGHT, which\n%%% you should have received as part of this distribution. The terms\n%%% are also available at http://www.rogvall.se/docs/copyright.txt.\n%%%\n%%% You may opt to use, copy, modify, merge, publish, distribute and/or sell\n%%% copies of the Software, and permit persons to whom the Software is\n%%% furnished to do so, under the terms of the COPYRIGHT file.\n%%%\n%%% This software is distributed on an \"AS IS\" basis, WITHOUT WARRANTY OF ANY\n%%% KIND, either express or implied.\n%%%\n%%%---- END COPYRIGHT ---------------------------------------------------------\n%%\n%% Definitions used here and there\n%%\n-ifndef(__CL_HRL__).\n-define(__CL_HRL__, true).\n\n-define(POINTER_SIZE, 64).  %% casted by driver\n-define(SIZE_SIZE,    64).  %% casted by driver\n\n%% transport types\n-define(u_int8_t(X),    X:8/native-unsigned-integer).\n-define(u_int16_t(X),   X:16/native-unsigned-integer).\n-define(u_int32_t(X),   X:32/native-unsigned-integer).\n-define(u_int64_t(X),   X:64/native-unsigned-integer).\n-define(int8_t(X),      X:8/native-signed-integer).\n-define(int16_t(X),     X:16/native-signed-integer).\n-define(int32_t(X),     X:32/native-signed-integer).\n-define(int64_t(X),     X:64/native-signed-integer).\n-define(float_t(X),     X:32/native-float).\n-define(double_t(X),    X:64/native-float).\n-define(pointer_t(X),   X:?POINTER_SIZE/native-unsigned-integer).\n-define(size_t(X),      X:?SIZE_SIZE/native-unsigned-integer).\n\n%% scalar types\n%% @type cl_char() = integer()\n%% @type cl_uchar() = non_neg_integer()\n%% @type cl_short() = integer()\n%% @type cl_ushort() = non_neg_integer()\n%% @type cl_int() = integer()\n%% @type cl_uint() = non_neg_integer()\n%% @type cl_long() = integer()\n%% @type cl_ulong() = non_neg_integer()\n%% @type cl_half() = float()\n%% @type cl_float() = float()\n%% @type cl_double() = float()\n\n-type cl_char() :: integer().\n-type cl_uchar() :: non_neg_integer().\n-type cl_short() :: integer().\n-type cl_ushort() :: non_neg_integer().\n-type cl_int() :: integer().\n-type cl_uint() :: non_neg_integer().\n-type cl_long() :: integer().\n-type cl_ulong() :: non_neg_integer().\n-type cl_half() :: float().\n-type cl_float() :: float().\n-type cl_double() :: float().\n\n\n-define(cl_char(X),     X:8/native-signed-integer).\n-define(cl_uchar(X),    X:8/native-unsigned-integer).\n-define(cl_short(X),    X:16/native-signed-integer).\n-define(cl_ushort(X),   X:16/native-unsigned-integer).\n-define(cl_int(X),      X:32/native-signed-integer).\n-define(cl_uint(X),     X:32/native-unsigned-integer).\n-define(cl_long(X),     X:64/native-signed-integer).\n-define(cl_ulong(X),    X:64/native-unsigned-integer).\n-define(cl_half(X),     X:16/native-unsigned-integer).\n-define(cl_float(X),    X:32/native-float).\n-define(cl_double(X),   X:64/native-float).\n\n-define(cl_pointer(X),  X:?POINTER_SIZE/native-unsigned-integer).\n-define(cl_size(X),     X:?SIZE_SIZE/native-unsigned-integer).\n\n%% vector types,  OpenCL requires that all types be naturally aligned. \n-define(cl_char2(X1,X2), ?cl_char(X1), ?cl_char(X2)).\n-define(cl_char4(X1,X2,X3,X4),\n\t?cl_char(X1), ?cl_char(X2), ?cl_char(X3), ?cl_char(X4)).\n-define(cl_char8(X1,X2,X3,X4,X5,X6,X7,X8), \n\t?cl_char(X1), ?cl_char(X2), ?cl_char(X3), ?cl_char(X4),\n\t?cl_char(X5), ?cl_char(X6), ?cl_char(X7), ?cl_char(X8)).\n-define(cl_char16(X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,X14,X15,X16), \n\t?cl_char(X1), ?cl_char(X2), ?cl_char(X3), ?cl_char(X4),\n\t?cl_char(X5), ?cl_char(X6), ?cl_char(X7), ?cl_char(X8),\n\t?cl_char(X9), ?cl_char(X10), ?cl_char(X11), ?cl_char(X12),\n\t?cl_char(X13), ?cl_char(X14), ?cl_char(X15), ?cl_char(X16)).\n\n-define(cl_uchar2(X1,X2), ?cl_uchar(X1), ?cl_uchar(X2)).\n-define(cl_uchar4(X1,X2,X3,X4),\n\t?cl_uchar(X1), ?cl_uchar(X2), ?cl_uchar(X3), ?cl_uchar(X4)).\n-define(cl_uchar8(X1,X2,X3,X4,X5,X6,X7,X8), \n\t?cl_uchar(X1), ?cl_uchar(X2), ?cl_uchar(X3), ?cl_uchar(X4),\n\t?cl_uchar(X5), ?cl_uchar(X6), ?cl_uchar(X7), ?cl_uchar(X8)).\n-define(cl_uchar16(X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,X14,X15,X16), \n\t?cl_uchar(X1), ?cl_uchar(X2), ?cl_uchar(X3), ?cl_uchar(X4),\n\t?cl_uchar(X5), ?cl_uchar(X6), ?cl_uchar(X7), ?cl_uchar(X8),\n\t?cl_uchar(X9), ?cl_uchar(X10), ?cl_uchar(X11), ?cl_uchar(X12),\n\t?cl_uchar(X13), ?cl_uchar(X14), ?cl_uchar(X15), ?cl_uchar(X16)).\n\n-define(cl_short2(X1,X2), ?cl_short(X1), ?cl_short(X2)).\n-define(cl_short4(X1,X2,X3,X4),\n\t?cl_short(X1), ?cl_short(X2), ?cl_short(X3), ?cl_short(X4)).\n-define(cl_short8(X1,X2,X3,X4,X5,X6,X7,X8), \n\t?cl_short(X1), ?cl_short(X2), ?cl_short(X3), ?cl_short(X4),\n\t?cl_short(X5), ?cl_short(X6), ?cl_short(X7), ?cl_short(X8)).\n-define(cl_short16(X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,X14,X15,X16), \n\t?cl_short(X1), ?cl_short(X2), ?cl_short(X3), ?cl_short(X4),\n\t?cl_short(X5), ?cl_short(X6), ?cl_short(X7), ?cl_short(X8),\n\t?cl_short(X9), ?cl_short(X10), ?cl_short(X11), ?cl_short(X12),\n\t?cl_short(X13), ?cl_short(X14), ?cl_short(X15), ?cl_short(X16)).\n\n-define(cl_ushort2(X1,X2), ?cl_ushort(X1), ?cl_ushort(X2)).\n-define(cl_ushort4(X1,X2,X3,X4),\n\t?cl_ushort(X1), ?cl_ushort(X2), ?cl_ushort(X3), ?cl_ushort(X4)).\n-define(cl_ushort8(X1,X2,X3,X4,X5,X6,X7,X8), \n\t?cl_ushort(X1), ?cl_ushort(X2), ?cl_ushort(X3), ?cl_ushort(X4),\n\t?cl_ushort(X5), ?cl_ushort(X6), ?cl_ushort(X7), ?cl_ushort(X8)).\n-define(cl_ushort16(X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,X14,X15,X16), \n\t?cl_ushort(X1), ?cl_ushort(X2), ?cl_ushort(X3), ?cl_ushort(X4),\n\t?cl_ushort(X5), ?cl_ushort(X6), ?cl_ushort(X7), ?cl_ushort(X8),\n\t?cl_ushort(X9), ?cl_ushort(X10), ?cl_ushort(X11), ?cl_ushort(X12),\n\t?cl_ushort(X13), ?cl_ushort(X14), ?cl_ushort(X15), ?cl_ushort(X16)).\n\n-define(cl_int2(X1,X2), ?cl_int(X1), ?cl_int(X2)).\n-define(cl_int4(X1,X2,X3,X4),\n\t?cl_int(X1), ?cl_int(X2), ?cl_int(X3), ?cl_int(X4)).\n-define(cl_int8(X1,X2,X3,X4,X5,X6,X7,X8), \n\t?cl_int(X1), ?cl_int(X2), ?cl_int(X3), ?cl_int(X4),\n\t?cl_int(X5), ?cl_int(X6), ?cl_int(X7), ?cl_int(X8)).\n-define(cl_int16(X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,X14,X15,X16), \n\t?cl_int(X1), ?cl_int(X2), ?cl_int(X3), ?cl_int(X4),\n\t?cl_int(X5), ?cl_int(X6), ?cl_int(X7), ?cl_int(X8),\n\t?cl_int(X9), ?cl_int(X10), ?cl_int(X11), ?cl_int(X12),\n\t?cl_int(X13), ?cl_int(X14), ?cl_int(X15), ?cl_int(X16)).\n\n-define(cl_uint2(X1,X2), ?cl_uint(X1), ?cl_uint(X2)).\n-define(cl_uint4(X1,X2,X3,X4),\n\t?cl_uint(X1), ?cl_uint(X2), ?cl_uint(X3), ?cl_uint(X4)).\n-define(cl_uint8(X1,X2,X3,X4,X5,X6,X7,X8), \n\t?cl_uint(X1), ?cl_uint(X2), ?cl_uint(X3), ?cl_uint(X4),\n\t?cl_uint(X5), ?cl_uint(X6), ?cl_uint(X7), ?cl_uint(X8)).\n-define(cl_uint16(X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,X14,X15,X16), \n\t?cl_uint(X1), ?cl_uint(X2), ?cl_uint(X3), ?cl_uint(X4),\n\t?cl_uint(X5), ?cl_uint(X6), ?cl_uint(X7), ?cl_uint(X8),\n\t?cl_uint(X9), ?cl_uint(X10), ?cl_uint(X11), ?cl_uint(X12),\n\t?cl_uint(X13), ?cl_uint(X14), ?cl_uint(X15), ?cl_uint(X16)).\n\n-define(cl_long2(X1,X2), ?cl_long(X1), ?cl_long(X2)).\n-define(cl_long4(X1,X2,X3,X4),\n\t?cl_long(X1), ?cl_long(X2), ?cl_long(X3), ?cl_long(X4)).\n-define(cl_long8(X1,X2,X3,X4,X5,X6,X7,X8), \n\t?cl_long(X1), ?cl_long(X2), ?cl_long(X3), ?cl_long(X4),\n\t?cl_long(X5), ?cl_long(X6), ?cl_long(X7), ?cl_long(X8)).\n-define(cl_long16(X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,X14,X15,X16), \n\t?cl_long(X1), ?cl_long(X2), ?cl_long(X3), ?cl_long(X4),\n\t?cl_long(X5), ?cl_long(X6), ?cl_long(X7), ?cl_long(X8),\n\t?cl_long(X9), ?cl_long(X10), ?cl_long(X11), ?cl_long(X12),\n\t?cl_long(X13), ?cl_long(X14), ?cl_long(X15), ?cl_long(X16)).\n\n-define(cl_ulong2(X1,X2), ?cl_ulong(X1), ?cl_ulong(X2)).\n-define(cl_ulong4(X1,X2,X3,X4),\n\t?cl_ulong(X1), ?cl_ulong(X2), ?cl_ulong(X3), ?cl_ulong(X4)).\n-define(cl_ulong8(X1,X2,X3,X4,X5,X6,X7,X8), \n\t?cl_ulong(X1), ?cl_ulong(X2), ?cl_ulong(X3), ?cl_ulong(X4),\n\t?cl_ulong(X5), ?cl_ulong(X6), ?cl_ulong(X7), ?cl_ulong(X8)).\n-define(cl_ulong16(X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,X14,X15,X16), \n\t?cl_ulong(X1), ?cl_ulong(X2), ?cl_ulong(X3), ?cl_ulong(X4),\n\t?cl_ulong(X5), ?cl_ulong(X6), ?cl_ulong(X7), ?cl_ulong(X8),\n\t?cl_ulong(X9), ?cl_ulong(X10), ?cl_ulong(X11), ?cl_ulong(X12),\n\t?cl_ulong(X13), ?cl_ulong(X14), ?cl_ulong(X15), ?cl_ulong(X16)).\n\n-define(cl_float2(X1,X2), ?cl_float(X1), ?cl_float(X2)).\n-define(cl_float4(X1,X2,X3,X4),\n\t?cl_float(X1), ?cl_float(X2), ?cl_float(X3), ?cl_float(X4)).\n-define(cl_float8(X1,X2,X3,X4,X5,X6,X7,X8), \n\t?cl_float(X1), ?cl_float(X2), ?cl_float(X3), ?cl_float(X4),\n\t?cl_float(X5), ?cl_float(X6), ?cl_float(X7), ?cl_float(X8)).\n-define(cl_float16(X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,X14,X15,X16), \n\t?cl_float(X1), ?cl_float(X2), ?cl_float(X3), ?cl_float(X4),\n\t?cl_float(X5), ?cl_float(X6), ?cl_float(X7), ?cl_float(X8),\n\t?cl_float(X9), ?cl_float(X10), ?cl_float(X11), ?cl_float(X12),\n\t?cl_float(X13), ?cl_float(X14), ?cl_float(X15), ?cl_float(X16)).\n\n-define(cl_double2(X1,X2), ?cl_double(X1), ?cl_double(X2)).\n-define(cl_double4(X1,X2,X3,X4),\n\t?cl_double(X1), ?cl_double(X2), ?cl_double(X3), ?cl_double(X4)).\n-define(cl_double8(X1,X2,X3,X4,X5,X6,X7,X8), \n\t?cl_double(X1), ?cl_double(X2), ?cl_double(X3), ?cl_double(X4),\n\t?cl_double(X5), ?cl_double(X6), ?cl_double(X7), ?cl_double(X8)).\n-define(cl_double16(X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,X14,X15,X16), \n\t?cl_double(X1), ?cl_double(X2), ?cl_double(X3), ?cl_double(X4),\n\t?cl_double(X5), ?cl_double(X6), ?cl_double(X7), ?cl_double(X8),\n\t?cl_double(X9), ?cl_double(X10), ?cl_double(X11), ?cl_double(X12),\n\t?cl_double(X13), ?cl_double(X14), ?cl_double(X15), ?cl_double(X16)).\n\n%% @type cl_platform_id() = { {'object', 1, non_neg_integer() } }\n%% @type cl_device_id()   = { {'object', 2, non_neg_integer() } }\n%% @type cl_context()     = { {'object', 3, non_neg_integer() } }\n%% @type cl_queue()       = { {'object', 4, non_neg_integer() } }\n%% @type cl_mem()         = { {'object', 5, non_neg_integer() } }\n%% @type cl_sampler()     = { {'object', 6, non_neg_integer() } }\n%% @type cl_program()     = { {'object', 7, non_neg_integer() } }\n%% @type cl_kernel()      = { {'object', 8, non_neg_integer() } }\n%% @type cl_event()       = { {'object', 9, non_neg_integer() } }\n\n-type cl_platform_id() ::  {Type::atom(), 1, non_neg_integer() } .\n-type cl_device_id()   ::  {Type::atom(), 2, non_neg_integer() } .\n-type cl_context()     ::  {Type::atom(), 3, non_neg_integer() } .\n-type cl_queue()       ::  {Type::atom(), 4, non_neg_integer() } .\n-type cl_mem()         ::  {Type::atom(), 5, non_neg_integer() } .\n-type cl_sampler()     ::  {Type::atom(), 6, non_neg_integer() } .\n-type cl_program()     ::  {Type::atom(), 7, non_neg_integer() } .\n-type cl_kernel()      ::  {Type::atom(), 8, non_neg_integer() } .\n-type cl_event()       ::  {Type::atom(), 9, non_neg_integer() } .\n\n%% @type cl_error()  =  {\n%%     'device_not_found' |\n%%     'device_not_available' |\n%%     'compiler_not_available' |\n%%     'mem_object_allocation_failure' |\n%%     'out_of_resources' |\n%%     'out_of_host_memory' |\n%%     'profiling_info_not_available' |\n%%     'mem_copy_overlap' |\n%%     'image_format_mismatch' |\n%%     'image_format_not_supported' |\n%%     'build_program_failure' |\n%%     'map_failure' |\n%%     'invalid_value' |\n%%     'invalid_device type' |\n%%     'invalid_platform' |\n%%     'invalid_device' |\n%%     'invalid_context' |\n%%     'invalid_queue_properties' |\n%%     'invalid_command_queue' |\n%%     'invalid_host_ptr' |\n%%     'invalid_mem_object' |\n%%     'invalid_image_format_descriptor' |\n%%     'invalid_image_size' |\n%%     'invalid_sampler' |\n%%     'invalid_binary' |\n%%     'invalid_build_options' |\n%%     'invalid_program' |\n%%     'invalid_program_executable' |\n%%     'invalid_kernel_name' |\n%%     'invalid_kernel_definition' |\n%%     'invalid_kernel' |\n%%     'invalid_arg_index' |\n%%     'invalid_arg_value' |\n%%     'invalid_arg_size' |\n%%     'invalid_kernel_args' |\n%%     'invalid_work_dimension' |\n%%     'invalid_work_group_size' |\n%%     'invalid_work_item size' |\n%%     'invalid_global_offset' |\n%%     'invalid_event_wait_list' |\n%%     'invalid_event' |\n%%     'invalid_operation' |\n%%     'invalid_gl_object' |\n%%     'invalid_buffer_size' |\n%%     'invalid_mip_level' |\n%%     'unknown' }.\n\n-type cl_error()  ::  \n    'device_not_found' |\n    'device_not_available' |\n    'compiler_not_available' |\n    'mem_object_allocation_failure' |\n    'out_of_resources' |\n    'out_of_host_memory' |\n    'profiling_info_not_available' |\n    'mem_copy_overlap' |\n    'image_format_mismatch' |\n    'image_format_not_supported' |\n    'build_program_failure' |\n    'map_failure' |\n    'invalid_value' |\n    'invalid_device type' |\n    'invalid_platform' |\n    'invalid_device' |\n    'invalid_context' |\n    'invalid_queue_properties' |\n    'invalid_command_queue' |\n    'invalid_host_ptr' |\n    'invalid_mem_object' |\n    'invalid_image_format_descriptor' |\n    'invalid_image_size' |\n    'invalid_sampler' |\n    'invalid_binary' |\n    'invalid_build_options' |\n    'invalid_program' |\n    'invalid_program_executable' |\n    'invalid_kernel_name' |\n    'invalid_kernel_definition' |\n    'invalid_kernel' |\n    'invalid_arg_index' |\n    'invalid_arg_value' |\n    'invalid_arg_size' |\n    'invalid_kernel_args' |\n    'invalid_work_dimension' |\n    'invalid_work_group_size' |\n    'invalid_work_item size' |\n    'invalid_global_offset' |\n    'invalid_event_wait_list' |\n    'invalid_event' |\n    'invalid_operation' |\n    'invalid_gl_object' |\n    'invalid_buffer_size' |\n    'invalid_mip_level' |\n    'unknown' .\n\n-define(cl_platform_id(X),   ?cl_pointer(X)).\n-define(cl_device_id(X),     ?cl_pointer(X)).\n-define(cl_context(X),       ?cl_pointer(X)).\n-define(cl_command_queue(X), ?cl_pointer(X)).\n-define(cl_mem(X),           ?cl_pointer(X)).\n-define(cl_program(X),       ?cl_pointer(X)).\n-define(cl_kernel(X),        ?cl_pointer(X)).\n-define(cl_event(X),         ?cl_pointer(X)).\n-define(cl_sampler(X),       ?cl_pointer(X)).\n\n-define(cl_bool(X),         ?cl_uint(X)).\n-define(cl_bitfield(X),     ?cl_ulong(X)).\n-define(cl_device_type(X),  ?cl_bitfield(X)).\n-define(cl_platform_info(X),  ?cl_uint(X)).\n-define(cl_device_info(X),  ?cl_uint(X)).\n-define(cl_device_address_info(X),  ?cl_bitfield(X)).\n-define(cl_device_fp_config(X),  ?cl_bitfield(X)).\n-define(cl_device_mem_cache_type(X),  ?cl_uint(X)).\n-define(cl_device_local_mem_type(X),  ?cl_uint(X)).\n-define(cl_device_exec_capabilities(X),  ?cl_bitfield(X)).\n-define(cl_command_queue_properties(X),  ?cl_bitfield(X)).\n\n%% -define(cl_context_properties(X),  ?intptr_t(X)).\n-define(cl_context_info(X),        ?cl_uint(X)).\n-define(cl_command_queue_info(X),  ?cl_uint(X)).\n-define(cl_channel_order(X),       ?cl_uint(X)).\n-define(cl_channel_type(X),        ?cl_uint(X)).\n-define(cl_mem_flags(X),           ?cl_bitfield(X)).\n-define(cl_mem_object_type(X),     ?cl_uint(X)).\n-define(cl_mem_info(X),            ?cl_uint(X)).\n-define(cl_image_info(X),          ?cl_uint(X)).\n-define(cl_addressing_mode(X),     ?cl_uint(X)).\n-define(cl_filter_mode(X),         ?cl_uint(X)).\n-define(cl_sampler_info(X),        ?cl_uint(X)).\n-define(cl_map_flags(X),           ?cl_bitfield(X)).\n-define(cl_program_info(X),        ?cl_uint(X)).\n-define(cl_program_build_info(X),  ?cl_uint(X)).\n-define(cl_build_status(X),        ?cl_int(X)).\n-define(cl_kernel_info(X),         ?cl_uint(X)).\n-define(cl_kernel_work_group_info(X),  ?cl_uint(X)).\n-define(cl_event_info(X),          ?cl_uint(X)).\n-define(cl_command_type(X),        ?cl_uint(X)).\n-define(cl_profiling_info(X),      ?cl_uint(X)).\n\n-define(CL_CHAR_BIT,        8).\n-define(CL_SCHAR_MAX,       127).\n-define(CL_SCHAR_MIN,       (-127-1)).\n-define(CL_CHAR_MAX,        ?CL_SCHAR_MAX).\n-define(CL_CHAR_MIN,        ?CL_SCHAR_MIN).\n-define(CL_UCHAR_MAX,       255).\n-define(CL_SHRT_MAX,        32767).\n-define(CL_SHRT_MIN,        (-32767-1)).\n-define(CL_USHRT_MAX,       65535).\n-define(CL_INT_MAX,         2147483647).\n-define(CL_INT_MIN,         (-2147483647-1)).\n-define(CL_UINT_MAX,        16#ffffffff).\n-define(CL_LONG_MAX,        16#7FFFFFFFFFFFFFFF).\n-define(CL_LONG_MIN,        (-16#7FFFFFFFFFFFFFFF-1)).\n-define(CL_ULONG_MAX,       16#FFFFFFFFFFFFFFFF).\n\n-define(CL_FLT_DIG,          6).\n-define(CL_FLT_MANT_DIG,     24).\n-define(CL_FLT_MAX_10_EXP,   38).\n-define(CL_FLT_MAX_EXP,      128).\n-define(CL_FLT_MIN_10_EXP,   -37).\n-define(CL_FLT_MIN_EXP,      -125).\n-define(CL_FLT_RADIX,        2).\n-define(CL_FLT_MAX,          3.40282347e+38).\n-define(CL_FLT_MIN,          1.17549435e-38).\n-define(CL_FLT_EPSILON,      1.19209290e-07).\n\n-define(CL_DBL_DIG,          15).\n-define(CL_DBL_MANT_DIG,     53).\n-define(CL_DBL_MAX_10_EXP,   308).\n-define(CL_DBL_MAX_EXP,      1024).\n-define(CL_DBL_MIN_10_EXP,   -307).\n-define(CL_DBL_MIN_EXP,      -1021).\n-define(CL_DBL_RADIX,        2).\n-define(CL_DBL_MAX,          1.7976931348623157e+308).\n-define(CL_DBL_MIN,          2.2250738585072014e-308).\n-define(CL_DBL_EPSILON,      2.2204460492503131e-16).\n\n-type cl_channel_order() :: \n\tr | a | rg | ra | rgb | rgba | rgba | bgra | argb |\n\tintensity | luminance | rx | rgx | rgbx |\n\t%% 1.2\n\tdepth | depth_stencil.\n\t\n-type cl_channel_type() :: \n\tsnorm_int8 | snorm_int16 | unorm_int8 | unorm_int16 |\n\tunorm_short_565 | unorm_short_555 | unorm_int_101010 |\n\tsigned_int8 | signed_int16 | signed_int32 | unsigned_int8 |\n\tunsigned_int16 | unsigned_int32 | half_float | float |\n\t%% 1.2 \n\tunorm_int24.\n\n-type cl_mem_object_type() ::\n\tbuffer | image2d | image3d |\n\t%% 1.2\n\timage2d_array | image1d | image1d_array | image1d_buffer.\n\n-record(cl_image_format,\n\t{\n\t  cl_channel_order :: cl_channel_order(),\n\t  cl_channel_type  :: cl_channel_type()\n\t}).\n\n%% 1.2 \n\n-record(cl_image_desc,\n\t{\n\t  image_type  :: cl_mem_object_type(),\n\t  image_width :: non_neg_integer(),\n\t  image_height :: non_neg_integer(),\n\t  image_depth  :: non_neg_integer(),\n\t  image_array_size :: non_neg_integer(),\n\t  image_row_pitch ::  non_neg_integer(),\n\t  image_slice_pitch = 1 ::  non_neg_integer(),\n\t  num_mip_levels  = 0 ::  non_neg_integer(),\n\t  num_samples  = 0 ::  non_neg_integer(),\n\t  buffer :: cl_mem() %% when CL_MEM_OBJECT_IMAGE1D_BUFFER\n\t}).\n\n%% cl platform & default contex\n-record(cl,\n\t{\n\t  platform,  %% one platform !\n\t  devices,   %% devices selected\n\t  context    %% context for devices\n\t }).\n\n-endif.\n\n\n"
  },
  {
    "path": "rebar.config",
    "content": "%% -*- erlang -*-\n%% Config file for cl-application\n{deps, []}.\n{erl_opts, [debug_info, fail_on_warning]}.\n\n{provider_hooks, [{post, [{ct, edoc}, {ct, dialyzer}]}]}.\n\n{pre_hooks,\n [{\"(linux|darwin|solaris|win32)\", compile, \"make -C c_src\"},\n  {\"(freebsd)\", compile, \"gmake -C c_src\"}\n ]}.\n\n%% Assumes bash (even on windows)\n{post_hooks,\n [%% Temporary hack for equal usage on rebar and rebar3\n  {\"(linux|darwin|solaris|freebsd)\", compile,\n   \"cp _build/default/lib/cl/ebin/* ebin 2> /dev/null | :\"},\n  {\"win32\", compile,\n   \"xcopy _build\\\\default\\\\lib\\\\cl\\\\ebin\\\\*.* ebin\\\\ /c /q /i /y | echo ok\"},\n  %%\n  {\"\", clean, \"rm -f test/*.beam\"},\n  {\"\", clean, \"rm -rf logs\"},\n  {\"\", clean, \"rm -rf doc/*.html\"},\n  {\"\", clean, \"rm -rf ebin/*\"},\n  {\"(linux|darwin|solaris|win32)\", clean, \"make -C c_src clean\"},\n  {\"(freebsd)\", clean, \"gmake -C c_src clean\"}\n ]}.\n\n%% Make old-rebar avoid building cl_drv.so\n{port_specs, [{\"priv/cl_nif.so\", []}]}.\n"
  },
  {
    "path": "src/.gitignore",
    "content": ".*.d\n"
  },
  {
    "path": "src/Makefile",
    "content": "#@BEGIN-ERL_SRC-DEFAULT-RULES@\nERLC=\"$(shell which erlc)\"\nERLC_FLAGS=-MMD -MP -MF .$<.d -I ../.. +debug_info\nYRL_SRC=$(wildcard *.yrl)\nXRL_SRC=$(wildcard *.xrl)\nERL_SOURCES=$(wildcard *.erl) $(YRL_SRC:%.yrl=%.erl) $(XRL_SRC:%.xrl=%.erl)\nERL_OBJECTS=$(ERL_SOURCES:%.erl=../ebin/%.beam)\nALL_OBJECTS=$(ERL_OBJECTS)\n\n.PRECIOUS: $(YRL_SRC:%.yrl=%.erl) $(XRL_SRC:%.xrl=%.erl)\n\nall: $(ALL_OBJECTS)\n\nclean:\n\trm -f $(ALL_OBJECTS) *.core .*.d\n\n../ebin/%.beam: %.erl\n\t$(ERLC) $(ERLC_FLAGS) -o ../ebin $<\n\n%.erl: %.yrl\n\t$(ERLC) $<\n\n%.erl: %.xrl\n\t$(ERLC) $<\n\n.%.d: ;\n\n-include .*.d\n#@END-ERL_SRC-DEFAULT-RULES@\n"
  },
  {
    "path": "src/cl.app.src",
    "content": "{application, cl,\n [{description, \"OpenCL binding for Erlang\"},\n  {vsn, \"1.2.4\"},\n  {modules, [cl,cl10,cl11,cl12,cl13,clu]},\n  {env, []},\n  {applications,[kernel,stdlib]},\n  {maintainers, [\"Tony Rogvall\", \"Dan Gudmundsson\"]},\n  {licenses, [\"BSD\"]},\n  {links, [{\"Github\", \"https://github.com/tonyrog/cl\"}]},\n  %% Arrg hex auto pick up derivates, specify files instead\n  {files, [\"rebar.config\", \"README\", \"COPYRIGHT\",\n\t   \"src\", \"include\",\n\t   \"c_src/*.[ch]\", \"c_src/Makefile\"\n\t  ]}\n  ]}.\n"
  },
  {
    "path": "src/cl.erl",
    "content": "%%%---- BEGIN COPYRIGHT -------------------------------------------------------\n%%%\n%%% Copyright (C) 2007 - 2012, Rogvall Invest AB, <tony@rogvall.se>\n%%%\n%%% This software is licensed as described in the file COPYRIGHT, which\n%%% you should have received as part of this distribution. The terms\n%%% are also available at http://www.rogvall.se/docs/copyright.txt.\n%%%\n%%% You may opt to use, copy, modify, merge, publish, distribute and/or sell\n%%% copies of the Software, and permit persons to whom the Software is\n%%% furnished to do so, under the terms of the COPYRIGHT file.\n%%%\n%%% This software is distributed on an \"AS IS\" basis, WITHOUT WARRANTY OF ANY\n%%% KIND, either express or implied.\n%%%\n%%%---- END COPYRIGHT ---------------------------------------------------------\n%%% File    : cl.erl\n%%% Author  : Tony Rogvall <tony@rogvall.se>\n%%% Description : Erlang OpenCL  interface\n%%% Created : 25 Oct 2009 by Tony Rogvall <tony@rogvall.se>\n\n%% @doc The erlang api for <a href=\"http://www.khronos.org/opencl/\">OpenCL</a>.\n%%\n%% OpenCL (Open Computing Language) is an open royalty-free standard\n%% for general purpose parallel programming across CPUs, GPUs and\n%% other processors, giving software developers portable and efficient\n%% access to the power of these heterogeneous processing platforms.\n%%\n%% OpenCL supports a wide range of applications, ranging from embedded\n%% and consumer software to HPC solutions, through a low-level,\n%% high-performance, portable abstraction. By creating an efficient,\n%% close-to-the-metal programming interface, OpenCL will form the\n%% foundation layer of a parallel computing ecosystem of\n%% platform-independent tools, middleware and applications.\n%%\n%% OpenCL consists of an API for coordinating parallel computation\n%% across heterogeneous processors; and a cross-platform programming\n%% language with a well-specified computation environment. The OpenCL\n%% standard:\n%%\n%% <li> Supports both data- and task-based parallel programming models</li>\n%% <li> Utilizes a subset of ISO C99 with extensions for parallelism </li>\n%% <li> Defines consistent numerical requirements based on IEEE 754</li>\n%% <li> Defines a configuration profile for handheld and embedded devices</li>\n%% <li> Efficiently interoperates with OpenGL, OpenGL ES, and other graphics APIs</li>\n%%\n%% The specification is divided into a core specification that any\n%% OpenCL compliant implementation must support; a handheld/embedded\n%% profile which relaxes the OpenCL compliance requirements for\n%% handheld and embedded devices; and a set of optional extensions\n%% that are likely to move into the core specification in later\n%% revisions of the OpenCL specification.\n%%\n%% The documentation is re-used with the following copyright:\n%%\n%% Copyright © 2007-2009 The Khronos Group Inc. Permission is hereby\n%% granted, free of charge, to any person obtaining a copy of this\n%% software and/or associated documentation files (the \"Materials\"),\n%% to deal in the Materials without restriction, including without\n%% limitation the rights to use, copy, modify, merge, publish,\n%% distribute, sublicense, and/or sell copies of the Materials, and to\n%% permit persons to whom the Materials are furnished to do so,\n%% subject to the condition that this copyright notice and permission\n%% notice shall be included in all copies or substantial portions of\n%% the Materials.\n%%\n%% @headerfile \"../include/cl.hrl\"\n%%\n-module(cl).\n\n-on_load(init/0).\n\n-export([start/0, start/1, stop/0]).\n-export([noop/0, noop_/0, dirty_noop/0]).\n-export([versions/0]).\n%% Platform\n-export([get_platform_ids/0]).\n-export([platform_info/0]).\n-export([get_platform_info/1,get_platform_info/2]).\n%% Devices\n-export([get_device_ids/0, get_device_ids/2]).\n-export([create_sub_devices/2]).\n-export([release_device/1]).\n-export([retain_device/1]).\n-export([device_info/0]).\n-export([device_info_10/1]).\n-export([device_info_11/1]).\n-export([device_info_12/1]).\n-export([get_device_info/1,get_device_info/2]).\n%% Context\n-export([create_context/1]).\n-export([create_context_from_type/1]).\n-export([release_context/1]).\n-export([retain_context/1]).\n-export([context_info/0]).\n-export([get_context_info/1,get_context_info/2]).\n%% Command queue\n-export([create_queue/3]).\n-export([set_queue_property/3]).\n-export([release_queue/1]).\n-export([retain_queue/1]).\n-export([queue_info/0]).\n-export([get_queue_info/1,get_queue_info/2]).\n%% Memory object\n-export([create_buffer/3, create_buffer/4]).\n-export([create_sub_buffer/4]).\n-export([release_mem_object/1]).\n-export([retain_mem_object/1]).\n-export([mem_object_info/0]).\n-export([get_mem_object_info/1,get_mem_object_info/2]).\n-export([image_info/0]).\n-export([get_image_info/1,get_image_info/2]).\n-export([get_supported_image_formats/3]).\n-export([create_image/5]).\n-export([create_image2d/7]).\n-export([create_image3d/9]).\n%% pipe\n-export([create_pipe/4]).\n%% Sampler \n-export([create_sampler/4]).\n-export([release_sampler/1]).\n-export([retain_sampler/1]).\n-export([sampler_info/0]).\n-export([get_sampler_info/1,get_sampler_info/2]).\n%% Program\n-export([create_program_with_source/2]).\n-export([create_program_with_binary/3]).\n-export([create_program_with_builtin_kernels/3]).\n-export([create_program_with_il/2]). %% 2.1!\n-export([release_program/1]).\n-export([retain_program/1]).\n-export([build_program/3, async_build_program/3]).\n-export([unload_compiler/0]).\n-export([unload_platform_compiler/1]).\n-export([compile_program/5, async_compile_program/5]).\n-export([link_program/4, async_link_program/4]).\n-export([program_info/0]).\n-export([get_program_info/1,get_program_info/2]).\n-export([program_build_info/0]).\n-export([get_program_build_info/2,get_program_build_info/3]).\n%% Kernel\n-export([create_kernel/2]).\n-export([create_kernels_in_program/1]).\n-export([set_kernel_arg/3]).\n-export([set_kernel_arg_size/3]).\n-export([release_kernel/1]).\n-export([retain_kernel/1]).\n-export([kernel_info/0]).\n-export([get_kernel_info/1,get_kernel_info/2]).\n-export([kernel_workgroup_info/0]).\n-export([get_kernel_workgroup_info/2,get_kernel_workgroup_info/3]).\n-export([kernel_arg_info/0]).\n-export([get_kernel_arg_info/1, get_kernel_arg_info/2,get_kernel_arg_info/3]).\n%% Events\n-export([enqueue_task/3, enqueue_task/4]).\n-export([nowait_enqueue_task/3]).\n-export([enqueue_nd_range_kernel/5]).\n-export([enqueue_nd_range_kernel/6]).\n-export([nowait_enqueue_nd_range_kernel/5]).\n-export([enqueue_marker/1]).\n-export([enqueue_barrier/1]).\n-export([enqueue_marker_with_wait_list/2]).\n-export([enqueue_barrier_with_wait_list/2]).\n-export([enqueue_wait_for_events/2]).\n-export([enqueue_read_buffer/5]).\n-export([enqueue_read_buffer_rect/10]).\n-export([enqueue_write_buffer/6]).\n-export([enqueue_write_buffer/7]).\n-export([nowait_enqueue_write_buffer/6]).\n-export([enqueue_fill_buffer/6]).\n-export([enqueue_write_buffer_rect/11]).\n-export([enqueue_read_image/7]).\n-export([enqueue_write_image/8]).\n-export([enqueue_write_image/9]).\n-export([nowait_enqueue_write_image/8]).\n-export([enqueue_copy_buffer/7]).\n-export([enqueue_copy_buffer_rect/11]).\n-export([enqueue_copy_image/7]).\n-export([enqueue_fill_image/6]).\n-export([enqueue_copy_image_to_buffer/7]).\n-export([enqueue_copy_buffer_to_image/7]).\n-export([enqueue_map_buffer/6]).\n-export([enqueue_map_image/6]).\n-export([enqueue_unmap_mem_object/3]).\n-export([enqueue_migrate_mem_objects/4]).\n-export([release_event/1]).\n-export([retain_event/1]).\n-export([event_info/0]).\n-export([get_event_info/1, get_event_info/2]).\n-export([event_profiling_info/0]).\n-export([get_event_profiling_info/1, get_event_profiling_info/2]).\n-export([wait/1, wait/2]).\n-export([wait_for_events/1]).\n\n-export([async_flush/1, flush/1]).\n-export([async_finish/1, finish/1]).\n-export([async_wait_for_event/1, wait_for_event/1]).\n\n-import(lists, [map/2, reverse/1]).\n\n-include(\"../include/cl.hrl\").\n\n-define(is_platform(X), element(1,X) =:= platform_t).\n-define(is_device(X), element(1,X) =:= device_t).\n-define(is_context(X), element(1,X) =:= context_t).\n-define(is_queue(X), element(1,X) =:= command_queue_t).\n-define(is_mem(X), element(1,X) =:= mem_t).\n-define(is_sampler(X), element(1,X) =:= sampler_t).\n-define(is_program(X), element(1,X) =:= program_t).\n-define(is_kernel(X), element(1,X) =:= kernel_t).\n-define(is_event(X), element(1,X) =:= event_t).\n\n-ifdef(debug).\n-define(DBG(F,A), io:format((F),(A))).\n-else.\n-define(DBG(F,A), ok).\n-endif.\n\n-define(nif_stub,nif_stub_error(?LINE)).\nnif_stub_error(Line) ->\n    erlang:nif_error({nif_not_loaded,module,?MODULE,line,Line}).\n\ninit() ->\n    Nif = filename:join([code:priv_dir(cl), \"cl_nif\"]),\n    ?DBG(\"Loading: ~s\\n\", [Nif]),\n    erlang:load_nif(Nif, 0).\n\n%%\n%% @type start_arg() = { {'debug',boolean()} }\n%%\n-type start_arg() ::  {'debug',boolean()} .\n\n%%\n%% @spec start([start_arg()]) -> 'ok' | {'error', term()}\n%%\n%% @doc Start the OpenCL application\n%% \n-spec start(Args::[start_arg()]) -> 'ok' | {'error', term()}.\n\nstart(_Args) ->\n    ok.\n\n%%\n%% @spec start() -> 'ok' | {'error', term()}\n%%\n%% @doc Start the OpenCL application\n%%\n%% @equiv start([])\n%%\n-spec start() -> 'ok' | {'error', term()}.\n\nstart() -> \n    start([]).\n\n%%\n%% @spec stop() -> 'ok' | {'error', term()}\n%%\n%% @doc Stop the OpenCL application\n%%\n%% @equiv application:stop(cl)\n%%\n-spec stop() -> 'ok' | {'error', term()}.\n\nstop()  -> \n    ok.\n\n%%\n%% @spec noop() -> 'ok' | {'error', cl_error()}\n%%\n%% @doc Run a no operation towards the NIF object. This call can be used\n%% to messure the call overhead to the NIF objeect.\n%%\n-spec noop() -> 'ok' | {'error', cl_error()}.\n\nnoop() ->\n    ?nif_stub.\n\nnoop_() ->  %% dynamic dirty noop\n    ?nif_stub.\n\ndirty_noop() ->  %% dirty noop\n    ?nif_stub.\n\n%%\n%% @spec versions() -> [{Major::integer(),Minor::integer()}]\n%%\n%% @doc Run a no operation towards the NIF object. This call can be used\n%% to messure the call overhead to the NIF objeect.\n%%\n-spec versions() -> [{Major::integer(),Minor::integer()}].\n\nversions() ->\n    ?nif_stub.\n\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n%% Platform\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n\n%%\n%% @type cl_platform_info_key() =\n%%    'profile' | 'name' | 'vendor' | 'extensions'.\n\n-type cl_platform_info_key() ::\n\t'profile' | 'name' | 'vendor' | 'extensions'.\n%%\n%% @type cl_platform_info() =\n%%      {'profile',string()} |\n%%      {'version', string()} |\n%%      {'name',string()} |\n%%      {'vendor',string()} |\n%%      {'extensions',string()}.\n\n-type cl_platform_info() ::\n\t{'profile',string()} |\n\t{'version',string()} |\n\t{'name',string()} |\n\t{'vendor',string()} |\n\t{'extensions',string()}.\n\n%%\n%% @spec get_platform_ids() ->\n%%    {'ok',[cl_platform_id()]} | {'error', cl_error()}\n%% @doc Obtain the list of platforms available.\n-spec get_platform_ids() ->\n    {'ok',[cl_platform_id()]} | {'error', cl_error()}.\n    \nget_platform_ids() ->\n    ?nif_stub.\n%%\n%% @spec platform_info() ->\n%%    [cl_platform_info_keys()]\n%% @doc Returns a list of the possible platform info keys.\n-spec platform_info() ->\n    [cl_platform_info_key()].\n\nplatform_info() ->\n    [profile,\n     version,\n     name,\n     vendor,\n     extensions].\n\n%%\n%% @spec get_platform_info(Platform :: cl_platform_id(), \n%%\t\t\tInfo :: cl_platform_info_key()) ->\n%%    {'ok',term()} | {'error', cl_error()}\n%% @doc Get specific information about the OpenCL platform.\n%% <dl>\n%%\n%% <dt>name</dt>     <dd>Platform name string.</dd>\n%%\n%% <dt>vendor</dt>   <dd>Platform vendor string.</dd>\n%%\n%% <dt>profile</dt>  \n%%        <dd> OpenCL profile string. Returns the profile name\n%%        supported by the implementation. The profile name returned\n%%        can be one of the following strings:\n%%\n%%        FULL_PROFILE - if the implementation supports the OpenCL\n%%        specification (functionality defined as part of the core\n%%        specification and does not require any extensions to be supported).\n%%\n%%        EMBEDDED_PROFILE - if the implementation supports the OpenCL\n%%        embedded profile. The embedded profile is defined to be a subset for\n%%        each version of OpenCL.</dd>\n%%\n%% <dt>version</dt>   \n%%       <dd>OpenCL version string. Returns the OpenCL version supported by the implementation.</dd>\n%%\n%% <dt>extensions</dt> <dd>Returns a space-separated list of extension\n%% names (the extension names themselves do not contain any spaces)\n%% supported by the platform. Extensions defined here must be\n%% supported by all devices associated with this platform. </dd> \n%%</dl>\n-spec get_platform_info(Platform :: cl_platform_id(), \n\t\t\tInfo :: cl_platform_info_key()) ->\n    {'ok',term()} | {'error', cl_error()}.\n\nget_platform_info(_Platform, _Info) ->\n    ?nif_stub.\n\n%%\n%% @spec get_platform_info(Platform::cl_platform_id()) ->\n%%     {'ok', [cl_platform_info()]} | {'error', cl_error()}\n%% @doc Get all information about the OpenCL platform.\n%% @see get_platform_info/2\n-spec get_platform_info(Platform::cl_platform_id()) ->\n    {'ok', [cl_platform_info()]} | {'error', cl_error()}.\n\nget_platform_info(Platform) when ?is_platform(Platform) ->\n    get_info_list(Platform, platform_info(), fun get_platform_info/2).\n\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n%% Devices\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n\n%%\n%% @type cl_device_type() =\n%%   {'gpu' | 'cpu' | 'accelerator' | 'all' | 'default' }\n%%\n-type cl_device_type() :: 'gpu' | 'cpu' | 'accelerator' | 'all' | 'default'.\n%%\n%%\n%% @type cl_device_types() = {cl_device_type() | [cl_device_type()]}\n%%\n-type cl_device_types() :: cl_device_type() | [cl_device_type()].\n\n%%\n%%\n%% @type cl_device_info_key() = { 'type' | 'vendor_id' | 'max_compute_units' |\n%%  'max_work_item_dimensions' | 'max_work_group_size' |\n%%  'max_work_item_sizes' |\n%%  'preferred_vector_width_char' | 'preferred_vector_width_short' |\n%%  'preferred_vector_width_int' | 'preferred_vector_width_long' |\n%%  'preferred_vector_width_float' | 'preferred_vector_width_double' |\n%%  'max_clock_frequency' | 'address_bits' | 'max_read_image_args' |\n%%  'max_write_image_args' | 'max_mem_alloc_size' | \n%%  'image2d_max_width' | 'image2d_max_height' | 'image3d_max_width' |\n%%  'image3d_max_height' | 'image3d_max_depth' | \n%%  'image_support' |\n%%  'max_parameter_size' | 'max_samplers' |\n%%  'mem_base_addr_align' | 'min_data_type_align_size' |\n%%  'single_fp_config' |  'global_mem_cache_type' |\n%%  'global_mem_cacheline_size' | 'global_mem_cache_size' | 'global_mem_size' |\n%%  'max_constant_buffer_size' | 'max_constant_args' |\n%%  'local_mem_type' | 'local_mem_size' | 'error_correction_support' |\n%%  'profiling_timer_resolution' | 'endian_little' | 'available' |\n%%  'compiler_available' | 'execution_capabilities' | 'queue_properties' |\n%%  'name' | 'vendor' | 'driver_version' | 'profile' | 'version' |\n%%  'extensions' | 'platform' }\n%%\n-type cl_device_info_key() :: 'type' | 'vendor_id' | 'max_compute_units' |\n 'max_work_item_dimensions' | 'max_work_group_size' |\n 'max_work_item_sizes' |\n 'preferred_vector_width_char' | 'preferred_vector_width_short' |\n 'preferred_vector_width_int' | 'preferred_vector_width_long' |\n 'preferred_vector_width_float' | 'preferred_vector_width_double' |\n 'max_clock_frequency' | 'address_bits' | 'max_read_image_args' |\n 'max_write_image_args' | 'max_mem_alloc_size' | \n 'image2d_max_width' | 'image2d_max_height' | 'image3d_max_width' |\n 'image3d_max_height' | 'image3d_max_depth' | \n 'image_support' |\n 'max_parameter_size' | 'max_samplers' |\n 'mem_base_addr_align' | 'min_data_type_align_size' |\n 'single_fp_config' |  'global_mem_cache_type' |\n 'global_mem_cacheline_size' | 'global_mem_cache_size' | 'global_mem_size' |\n 'max_constant_buffer_size' | 'max_constant_args' |\n 'local_mem_type' | 'local_mem_size' | 'error_correction_support' |\n 'profiling_timer_resolution' | 'endian_little' | 'available' |\n 'compiler_available' | 'execution_capabilities' | 'queue_properties' |\n 'name' | 'vendor' | 'driver_version' | 'profile' | 'version' |\n 'extensions' | 'platform'.\n\n%%\n%% @type cl_device_info() = {cl_device_info_key(), term()}\n%% @todo specifiy all info types\n-type cl_device_info() :: {cl_device_info_key(), term()}.\n\n%%\n%% @spec get_device_ids() -> {'ok',[cl_device_id()]} | {'error',cl_error()}\n%%\n%% @equiv get_devive_ids(0,all)\n%%\n-spec get_device_ids() -> {'ok',[cl_device_id()]} | {'error',cl_error()}.\n    \nget_device_ids() ->\n    get_device_ids(undefined, all).\n\n%%\n%% @spec get_device_ids(Platform::cl_platform_id(),Type::cl_device_types()) ->\n%%     {'ok',[cl_device_id()]} | {'error',cl_error()}\n%% @doc Obtain the list of devices available on a platform.\n%% <dl> <dt>Platform</dt> <dd>\n%%\n%% Refers to the platform ID returned by <c>get_platform_ids</c> or can be\n%% NULL. If platform is NULL, the behavior is implementation-defined. </dd>\n%% \n%% <dt>Type</dt> <dd>\n%% \n%% A list that identifies the type of OpenCL device. The\n%% device_type can be used to query specific OpenCL devices or all\n%% OpenCL devices available. </dd>\n%%\n%% </dl> \n%%\n%%  get_device_ids/2 may return all or a subset of the actual\n%%  physical devices present in the platform and that match\n%%  device_type.\n%%\n%% The application can query specific capabilities of the OpenCL\n%% device(s) returned by get_device_ids/2. This can be used by the\n%% application to determine which device(s) to use.\n%%\n-spec get_device_ids(undefined|cl_platform_id(),Type::cl_device_types()) ->\n    {'ok',[cl_device_id()]} | {'error',cl_error()}.\n\nget_device_ids(_Platform, _Type) ->\n    ?nif_stub.\n\n-spec create_sub_devices(Device::cl_device_id(),\n\t\t\t Property::\n\t\t\t   {equally|non_neg_integer()} |\n\t\t\t   {by_counts,[non_neg_integer()]} |\n\t\t\t   {by_affinity_domain,\n\t\t\t    numa|l4_cache|l3_cache|l2_cache|l1_cache|\n\t\t\t    next_partitionable}) ->\n    {'ok',[cl_device_id()]} | {'error',cl_error()}.\n\ncreate_sub_devices(_Device, _Properties) ->\n    ?nif_stub.\n\n-spec release_device(Device::cl_device_id()) ->\n\t\t\t    'ok' | {'error', cl_error()}.\nrelease_device(_Device) ->\n    ok.\n\n-spec retain_device(Device::cl_device_id()) ->\n\t\t\t   'ok' | {'error', cl_error()}.\nretain_device(_Device) ->\n    ok.\n\n%%\n%% @spec device_info() -> [cl_device_info_key()]\n%% @doc Return a list of possible device info queries.\n%% @see get_device_info/2\n-spec device_info() -> [cl_device_info_key()].\n    \ndevice_info() ->\n    lists:foldl(\n      fun({1,2},Acc) -> device_info_12(Acc);\n\t ({1,1},Acc) -> device_info_11(Acc);\n\t ({1,0},Acc) -> device_info_10(Acc);\n\t ({2,1},Acc) -> device_info_21(Acc);\n\t (_, Acc) -> Acc\n      end, [], versions()).\n\n\t\ndevice_info_10(L) ->\n    [\n     type, \n     vendor_id, \n     max_compute_units,\n     max_work_item_dimensions,\n     max_work_group_size,\n     max_work_item_sizes,\n     preferred_vector_width_char,\n     preferred_vector_width_short,\n     preferred_vector_width_int,\n     preferred_vector_width_long,\n     preferred_vector_width_float,\n     preferred_vector_width_double,\n     max_clock_frequency,\n     address_bits,\n     max_read_image_args,\n     max_write_image_args,\n     max_mem_alloc_size,\n     image2d_max_width,\n     image2d_max_height,\n     image3d_max_width,\n     image3d_max_height,\n     image3d_max_depth,\n     image_support,\n     max_parameter_size,\n     max_samplers,\n     mem_base_addr_align,\n     min_data_type_align_size,\n     single_fp_config,\n     global_mem_cache_type,\n     global_mem_cacheline_size,\n     global_mem_cache_size,\n     global_mem_size,\n     max_constant_buffer_size,\n     max_constant_args,\n     local_mem_type,\n     local_mem_size,\n     error_correction_support,\n     profiling_timer_resolution,\n     endian_little,\n     available,\n     compiler_available,\n     execution_capabilities,\n     queue_properties,\n     name,\n     vendor,\n     driver_version,\n     profile,\n     version,\n     extensions,\n     platform | L\n    ].\n\ndevice_info_11(L) ->\n    [\n     preferred_vector_width_half,\n     host_unified_memory,\n     native_vector_width_char,\n     native_vector_width_short,\n     native_vector_width_int,\n     native_vector_width_long,\n     native_vector_width_float,\n     native_vector_width_double,\n     native_vector_width_half,\n     opencl_c_version | L\n    ].\n\ndevice_info_12(L) ->\n    [\n     double_fp_config,\n     linker_available,\n     built_in_kernels,\n     image_max_buffer_size,\n     image_max_array_size,\n     parent_device,\n     partition_max_sub_devices,\n     partition_properties,\n     partition_affinity_domain,\n     partition_type,\n     reference_count,\n     preferred_interop_user_sync,\n     printf_buffer_size | L\n%%     image_pitch_alignment,\n%%     image_base_address_alignment\n    ].\n\ndevice_info_21(_L) ->\n    [max_read_write_image_args,\n     il_version].\n\n%%\n%% @spec get_device_info(DevID::cl_device_id(), Info::cl_device_info_key()) ->\n%%   {'ok', term()} | {'error', cl_error()}\n%% @doc Get information about an OpenCL device.\n%% \n%% <dl> <dt>'type' </dt> <dd> <p>The OpenCL device type. Currently\n%% supported values are one of or a combination of: CL_DEVICE_TYPE_CPU,\n%% CL_DEVICE_TYPE_GPU, CL_DEVICE_TYPE_ACCELERATOR, or\n%% CL_DEVICE_TYPE_DEFAULT.</p></dd>\n%%\n%% <dt>'vendor_id'</dt> <dd> <p>A unique device vendor identifier. An\n%% example of a unique device identifier could be the PCIe ID.</p> </dd>\n%%\n%% <dt>'max_compute_units'</dt> <dd> <p>The number of parallel compute\n%% cores on the OpenCL device. The minimum value is 1.</p> </dd>\n%%\n%% <dt>'max_work_item_dimensions'</dt> <dd> <p>Maximum dimensions that\n%% specify the global and local work-item IDs used by the data parallel\n%% execution model. (@see enqueue_nd_range_kernel/5). The\n%% minimum value is 3.</p></dd>\n%%\n%% <dt>'max_work_group_size'</dt> <dd> <p>Maximum number of\n%% work-items in a work-group executing a kernel using the data parallel\n%% execution model. (@see enqueue_nd_range_kernel/5). The minimum value\n%% is 1.</p> </dd> \n%%\n%% <dt>'max_work_item_sizes'</dt> <dd> <p>Maximum number of work-items\n%% that can be specified in each dimension of the work-group to enqueue_nd_range_kernel/5.</p>\n%% <p>Returns <code class=\"varname\">n</code> entries, where <code\n%% class=\"varname\">n</code> is the value returned by the query for\n%% CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS. The minimum value is (1, 1,\n%% 1).</p></dd>\n%%\n%% <dt>'preferred_vector_width_TYPE'</dt> <dd> <p>Preferred native vector\n%% width size for built-in scalar types that can be put into vectors. The\n%% vector width is defined as the number of scalar elements that can be\n%% stored in the vector.</p> <p>If the <c>cl_khr_fp64</c> extension is\n%% not supported, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE must return\n%% 0.</p></dd>\n%%\n%% <dt>'max_clock_frequency'</dt> <dd> <p>Maximum configured clock\n%% frequency of the device in MHz.</p>\n%%\n%% </dd> <dt>'address_bits'</dt> <dd> The default compute device address\n%% space size specified as an unsigned integer value in bits. Currently\n%% supported values are 32 or 64 bits. </dd>\n%%\n%% <dt>'max_read_image_args'</dt> <dd> <p>Max number of simultaneous\n%% image objects that can be read by a kernel. The minimum value is 128\n%% if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.</p></dd>\n%%\n%% <dt>'max_write_image_args'</dt> <dd> <p>Max number of\n%% simultaneous image objects that can be written to by a kernel. The\n%% minimum value is 8 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.</p> </dd>\n%%\n%% <dt>'max_mem_alloc_size'</dt> <dd> <p>Max size of memory object\n%% allocation in bytes. The minimum value is max (1/4th of\n%% CL_DEVICE_GLOBAL_MEM_SIZE, 128*1024*1024)</p></dd>\n%%\n%% <dt>'image2d_max_width'</dt> <dd> <p>Max width of 2D image in\n%% pixels. The minimum value is 8192 if CL_DEVICE_IMAGE_SUPPORT is\n%% CL_TRUE.</p> </dd>\n%%\n%% <dt>'image2d_max_height'</dt> <dd> <p>Max height of 2D image in\n%% pixels. The minimum value is 8192 if CL_DEVICE_IMAGE_SUPPORT is\n%% CL_TRUE.</p> </dd>\n%%\n%% <dt>'image3d_max_width'</dt> <dd> <p>Max width of 3D image in\n%% pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is\n%% CL_TRUE.</p> </dd> \n%%\n%% <dt>'image3d_max_height'</dt> <dd> <p>Max height of 3D image in\n%% pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is\n%% CL_TRUE.</p> </dd>\n%%\n%% <dt>'image3d_max_depth'</dt> <dd> <p>Max depth of 3D image in\n%% pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is\n%% CL_TRUE.</p> </dd>\n%%\n%% <dt>'image_support'</dt> <dd> <p>Is CL_TRUE if images are supported by\n%% the OpenCL device and CL_FALSE otherwise.</p> </dd>\n%%\n%% <dt>'max_parameter_size'</dt> <dd> <p>Max size in bytes of the\n%% arguments that can be passed to a kernel. The minimum value is\n%% 256.</p> </dd>\n%%\n%% <dt>'max_samplers'</dt> <dd> <p>Maximum number of samplers that can be\n%% used in a kernel. The minimum value is 16 if CL_DEVICE_IMAGE_SUPPORT\n%% is CL_TRUE.</p> </dd>\n%%\n%% <dt>'mem_base_addr_align'</dt> <dd> <p>Describes the alignment in bits\n%% of the base address of any allocated memory object.</p> </dd>\n%%\n%% <dt>'min_data_type_align_size'</dt> <dd> <p>The smallest alignment in\n%% bytes which can be used for any data type.</p> </dd>\n%% <dt>'single_fp_config'</dt> <dd> <p>Describes single precision\n%% floating-point capability of the device. This is a bit-field that\n%% describes one or more of the following values:</p> <p>CL_FP_DENORM -\n%% denorms are supported</p> <p>CL_FP_INF_NAN - INF and quiet NaNs are\n%% supported</p> <p>CL_FP_ROUND_TO_NEAREST - round to nearest even\n%% rounding mode supported</p>\n%% <p>CL_FP_ROUND_TO_ZERO - round to zero rounding mode supported</p>\n%% <p>CL_FP_ROUND_TO_INF - round to +ve and -ve infinity rounding modes supported</p>\n%% <p>CL_FP_FMA - IEEE754-2008 fused multiply-add is supported</p>\n%% <p>The mandated minimum floating-point capability is CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN.</p>\n%% </dd>\n%%\n%% <dt>'global_mem_cache_type'</dt> <dd> <p>Return type:\n%% cl_device_mem_cache_type</p> <p>Type of global memory cache\n%% supported. Valid values are: CL_NONE, CL_READ_ONLY_CACHE, and\n%% CL_READ_WRITE_CACHE.</p> </dd>\n%%\n%% <dt>'global_mem_cacheline_size'</dt> <dd>\n%% <p>Size of global memory cache line in bytes.</p>\n%% </dd>\n%%\n%% <dt>'global_mem_cache_size'</dt> <dd>\n%% <p>Size of global memory cache in bytes.</p>\n%% </dd>\n%%\n%% <dt>'global_mem_size'</dt> <dd>\n%% <p>Size of global device memory in bytes.</p>\n%% </dd>\n%%\n%% <dt>'max_constant_buffer_size'</dt> <dd>\n%% <p>Max size in bytes of a constant buffer allocation. The minimum value is 64 KB.</p></dd>\n%%\n%%  <dt>'max_constant_args'</dt> <dd> <p>Max number of arguments\n%% declared with the <c>__constant</c> qualifier in a kernel. The minimum\n%% value is 8.</p> </dd>\n%%\n%% <dt>'local_mem_type'</dt> <dd> <p>Type of local memory\n%% supported. This can be set to CL_LOCAL implying dedicated local memory\n%% storage such as SRAM, or CL_GLOBAL.</p> </dd>\n%%\n%% <dt>'local_mem_size'</dt> <dd> <p>Size of local memory arena in\n%% bytes. The minimum value is 16 KB.</p></dd>\n%%\n%% <dt>'error_correction_support'</dt> <dd> Is CL_TRUE if the device\n%% implements error correction for the memories, caches, registers\n%% etc. in the device. Is CL_FALSE if the device does not implement error\n%% correction. This can be a requirement for certain clients of\n%% OpenCL.</dd>\n%%\n%% <dt>'profiling_timer_resolution'</dt> <dd> <p>Describes the resolution\n%% of device timer. This is measured in nanoseconds.</p> </dd>\n%%\n%% <dt>'endian_little'</dt> <dd> Is CL_TRUE if the OpenCL device is a\n%% little endian device and CL_FALSE otherwise.  </dd>\n%%\n%% <dt>'available'</dt> <dd> Is CL_TRUE if the device is available and\n%% CL_FALSE if the device is not available.  </dd>\n%%\n%% <dt>'compiler_available'</dt> <dd> Is CL_FALSE if the implementation\n%% does not have a compiler available to compile the program source. Is\n%% CL_TRUE if the compiler is available. This can be CL_FALSE for the\n%% embededed platform profile only.  </dd>\n%%\n%% <dt>'execution_capabilities'</dt> <dd> <p>Return type:\n%% cl_device_exec_capabilities</p> <p>Describes the execution\n%% capabilities of the device. This is a bit-field that describes one or\n%% more of the following values:</p> <p>CL_EXEC_KERNEL - The OpenCL\n%% device can execute OpenCL kernels.</p> <p>CL_EXEC_NATIVE_KERNEL - The\n%% OpenCL device can execute native kernels.</p> <p>The mandated minimum\n%% capability is CL_EXEC_KERNEL.</p> </dd>\n%%\n%% <dt>'queue_properties'</dt> <dd> <p>Describes the command-queue\n%% properties supported by the device.  This is a bit-field that\n%% describes one or more of the following values:</p>\n%% <p>'out_of_order_exec_mode_enable'</p>\n%% <p>'profiling_enable'</p> <p>These properties are described in\n%% the table for create_queue/3 .  The mandated minimum capability is\n%% 'profiling_enable'.</p> </dd>\n%%\n%% <dt>'name'</dt> <dd> <p>Device name string.</p> </dd>\n%%\n%% <dt>'vendor'</dt> <dd><p>Vendor name string.</p></dd>\n%%\n%% <dt>'driver_version'</dt> <dd><p>OpenCL software driver version string</p> </dd>\n%%\n%% <dt>'profile'</dt> <dd> <p>OpenCL profile string. Returns the profile\n%% name supported by the device (see note). The profile name returned can\n%% be one of the following strings:</p>\n%% <p>FULL_PROFILE - if the device supports the OpenCL specification\n%% (functionality defined as part of the core\n%% specification and does not require any extensions\n%% to be supported).</p> <p>EMBEDDED_PROFILE - if\n%% the device supports the OpenCL embedded\n%% profile.</p></dd>\n%%\n%% <dt>'version'</dt> <dd> <p>OpenCL version string.</p> </dd>\n%%\n%% <dt>'extensions' </dt> <dd><p>Returns a space separated list of extension names (the extension\n%% names themselves do not contain any spaces). </p></dd>\n%%\n%% <dt>'platform' </dt> <dd> <p>The platform associated with this device.</p> </dd>\n%%\n%% </dl>\n%%\n%% <c>NOTE</c>: CL_DEVICE_PROFILE: The platform profile returns the profile that is\n%% implemented by the OpenCL framework. If the platform profile\n%% returned is FULL_PROFILE, the OpenCL framework will support devices\n%% that are FULL_PROFILE and may also support devices that are\n%% EMBEDDED_PROFILE. The compiler must be available for all devices\n%% i.e. CL_DEVICE_COMPILER_AVAILABLE is CL_TRUE. If the platform\n%% profile returned is EMBEDDED_PROFILE, then devices that are only\n%% EMBEDDED_PROFILE are supported.\n\n-spec get_device_info(Device::cl_device_id(), Info::cl_device_info_key()) ->\n    {'ok', term()} | {'error', cl_error()}.\n\nget_device_info(_Device, _Info) ->\n    ?nif_stub.\n\n%%\n%% @spec get_device_info(Device) ->\n%%    {'ok', [cl_device_info()]} | {'error', cl_error()}\n%% @doc Get all device info.\n%% @see get_device_info/2\n-spec get_device_info(Device::cl_device_id()) ->\n    {'ok', [cl_device_info()]} | {'error', cl_error()}.\n\nget_device_info(Device) ->\n    get_info_list(Device, device_info(), fun get_device_info/2).\n\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n%% Context\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n\n%% @type cl_context_info_key() = {'reference_count' | 'devices' | 'properties'}\n-type cl_context_info_key() :: 'reference_count' | 'devices' | 'properties'.\n\n%% @type cl_context_info() = \n%%  { {'reference_count', cl_uint()},\n%%    {'devices', [cl_device()]},\n%%    {'properties', [cl_int()]} }\n-type cl_context_info() ::\n\t{'reference_count', cl_uint()} |\n\t{'devices', [cl_device_id()]} |\n\t{'properties', [cl_int()]}.\n\n%%\n%% @spec create_context(DeviceList::[cl_device_id()]) ->\n%%    {'ok', cl_context()} | {'error', cl_error()}\n%% @doc Creates an OpenCL context.\n%%\n%% An OpenCL context is created with one or more devices. Contexts are\n%% used by the OpenCL runtime for managing objects such as\n%% command-queues, memory, program and kernel objects and for\n%% executing kernels on one or more devices specified in the context.\n%%\n%% NOTE: create_context/1 and create_context_from_type/1 perform an\n%% implicit retain. This is very helpful for 3rd party libraries,\n%% which typically get a context passed to them by the\n%% application. However, it is possible that the application may\n%% delete the context without informing the library. Allowing\n%% functions to attach to (i.e. retain) and release a context solves\n%% the problem of a context being used by a library no longer being\n%% valid.\n\n-spec create_context(DeviceList::[cl_device_id()]) ->\n    {'ok', cl_context()} | {'error', cl_error()}.\n\ncreate_context(_DeviceList) ->\n    ?nif_stub.\n\n%%\n%% @spec create_context_from_type(Type::cl_device_types())->\n%%    {'ok', cl_context()} | {'error', cl_error()}\n%% @doc Create an OpenCL context from a device type that identifies the specific device(s) to use. \n%%\n%% NOTE: \n%% create_context_from_type/1 may return all or a subset of the\n%% actual physical devices present in the platform and that match\n%% device_type.\n%% \n%% create_context/1 and create_context_from_type/1 perform an\n%% implicit retain. This is very helpful for 3rd party libraries,\n%% which typically get a context passed to them by the\n%% application. However, it is possible that the application may\n%% delete the context without informing the library. Allowing\n%% functions to attach to (i.e. retain) and release a context solves\n%% the problem of a context being used by a library no longer being\n%% valid.\n-spec create_context_from_type(Type::cl_device_types())->\n    {'ok', cl_context()} | {'error', cl_error()}.\n\ncreate_context_from_type(Type) ->\n    case get_device_ids(undefined, Type) of\n\t{ok,DeviceList} ->\n\t    create_context(DeviceList);\n\tError ->\n\t    Error\n    end.\n\n%%\n%% @spec release_context(Context::cl_context()) ->\n%%    'ok' | {'error', cl_error()}\n%% @doc Decrement the context reference count. \n%% \n%% After the context reference count becomes zero and all the objects\n%% attached to context (such as memory objects, command-queues) are\n%% released, the context is deleted.\n-spec release_context(Context::cl_context()) ->\n    'ok' | {'error', cl_error()}.\n\nrelease_context(Context) when ?is_context(Context) ->\n    ok.\n\n%%\n%% @spec retain_context(Context::cl_context()) ->\n%%     'ok' | {'error', cl_error()}\n%% @doc Increment the context reference count. \n%% @see create_context\n-spec retain_context(Context::cl_context()) ->\n    'ok' | {'error', cl_error()}.\n\nretain_context(Context) when ?is_context(Context) ->\n    ok.\n\n%%\n%% @spec context_info() -> [cl_context_info_key()]\n%% @doc List context info queries.\n-spec context_info() -> [cl_context_info_key()].\n\ncontext_info() ->\n    [\n     reference_count,\n     devices,\n     properties \n    ].\n%%\n%% @spec get_context_info(Context::cl_context(),Info::cl_context_info_key()) ->\n%%   {'ok', term()} | {'error', cl_error()}\n%% @doc  Query information about a context. \n%%\n%% <dl> <dt>reference_count</dt> <dd> Return the context reference\n%% count. The reference count returned should be considered\n%% immediately stale. It is unsuitable for general use in\n%% applications. This feature is provided for identifying memory\n%% leaks. </dd>\n%% \n%% <dt>devices</dt> <dd>Return the list of devices in context.</dd>\n%%\n%% <dt>properties</dt> <dd>Return the context properties.</dd>\n%% </dl>\n-spec get_context_info(Context::cl_context(), Info::cl_context_info_key()) ->\n    {'ok', term()} | {'error', cl_error()}.\n\nget_context_info(_Context, _Info) ->\n    ?nif_stub.\n\n\n%% @spec get_context_info(Context::cl_context()) ->\n%%    {'ok', [cl_context_info()]} | {'error', cl_error()}\n%% @doc Get all context info.\n%% @see get_context_info/2\n-spec get_context_info(Context::cl_context()) ->\n    {'ok', [cl_context_info()]} | {'error', cl_error()}.\n\nget_context_info(Context) when ?is_context(Context) ->\n    get_info_list(Context, context_info(), fun get_context_info/2).\n\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n%% Command Queue (Queue)\n%% @type cl_queue_property() = { 'out_of_order_exec_mode_enable' | \n%%\t\t\t         'profiling_enable' }\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n-type cl_queue_property() :: 'out_of_order_exec_mode_enable' | 'profiling_enable'.\n%%\n%% @spec create_queue(Context::cl_context(),Device::cl_device_id(),\n%%                    Properties::[cl_queue_property()]) ->\n%%    {'ok', cl_queue()} | {'error', cl_error()}\n%% @doc Create a command-queue on a specific device.\n%%\n%% <dl> \n%% <dt>'out_of_order_exec_mode_enable'</dt> <dd> Determines\n%% whether the commands queued in the command-queue are executed\n%% in-order or out-of-order. If set, the commands in the command-queue\n%% are executed out-of-order. Otherwise, commands are executed\n%% in-order.</dd>\n%% \n%% <dt>'profiling_enable'</dt> <dd> Enable or disable profiling of\n%% commands in the command-queue. If set, the profiling of commands is\n%% enabled. Otherwise profiling of commands is disabled. See\n%% clGetEventProfilingInfo for more information.\n%% </dd>\n%% </dl>\n%% \n%% The OpenCL functions that are submitted to a command-queue are\n%% enqueued in the order the calls are made but can be configured to\n%% execute in-order or out-of-order. The properties argument in\n%% clCreateCommandQueue can be used to specify the execution order.\n%%\n%% If the 'out_of_order_exec_mode_enable' property of a\n%% command-queue is not set, the commands enqueued to a command-queue\n%% execute in order. For example, if an application calls\n%% clEnqueueNDRangeKernel to execute kernel A followed by a\n%% clEnqueueNDRangeKernel to execute kernel B, the application can\n%% assume that kernel A finishes first and then kernel B is\n%% executed. If the memory objects output by kernel A are inputs to\n%% kernel B then kernel B will see the correct data in memory objects\n%% produced by execution of kernel A. If the\n%% 'out_of_order_exec_mode_enable' property of a commandqueue\n%% is set, then there is no guarantee that kernel A will finish before\n%% kernel B starts execution.\n%%\n%% Applications can configure the commands enqueued to a command-queue\n%% to execute out-of-order by setting the\n%% 'out_of_order_exec_mode_enable' property of the\n%% command-queue. This can be specified when the command-queue is\n%% created or can be changed dynamically using\n%% clCreateCommandQueue. In out-of-order execution mode there is no\n%% guarantee that the enqueued commands will finish execution in the\n%% order they were queued. As there is no guarantee that kernels will\n%% be executed in order, i.e. based on when the clEnqueueNDRangeKernel\n%% calls are made within a command-queue, it is therefore possible\n%% that an earlier clEnqueueNDRangeKernel call to execute kernel A\n%% identified by event A may execute and/or finish later than a\n%% clEnqueueNDRangeKernel call to execute kernel B which was called by\n%% the application at a later point in time. To guarantee a specific\n%% order of execution of kernels, a wait on a particular event (in\n%% this case event A) can be used. The wait for event A can be\n%% specified in the event_wait_list argument to clEnqueueNDRangeKernel\n%% for kernel B.\n%%\n%% In addition, a wait for events or a barrier command can be enqueued\n%% to the command-queue. The wait for events command ensures that\n%% previously enqueued commands identified by the list of events to\n%% wait for have finished before the next batch of commands is\n%% executed. The barrier command ensures that all previously enqueued\n%% commands in a command-queue have finished execution before the next\n%% batch of commands is executed.\n%%\n%% Similarly, commands to read, write, copy or map memory objects that\n%% are enqueued after clEnqueueNDRangeKernel, clEnqueueTask or\n%% clEnqueueNativeKernel commands are not guaranteed to wait for\n%% kernels scheduled for execution to have completed (if the\n%% 'out_of_order_exec_mode_enable' property is set). To ensure\n%% correct ordering of commands, the event object returned by\n%% clEnqueueNDRangeKernel, clEnqueueTask or clEnqueueNativeKernel can\n%% be used to enqueue a wait for event or a barrier command can be\n%% enqueued that must complete before reads or writes to the memory\n%% object(s) occur.\n-spec create_queue(Context::cl_context(),Device::cl_device_id(),\n\t\t   Properties::[cl_queue_property()]) ->\n    {'ok', cl_queue()} | {'error', cl_error()}.\n\ncreate_queue(_Context, _Device, _Properties) ->\n    ?nif_stub.\n\n%%\n%% @spec set_queue_property(Queue::cl_queue(),\n%%                          Properties::[cl_queue_property()],\n%%                          Enable::bool()) ->\n%%    'ok' | {'error', cl_error()}\n%% @doc Function is deprecated and have been removed.\n-spec set_queue_property(_, _, _) -> no_return().\nset_queue_property(_Queue, _Properties, _Enable) ->\n    erlang:error(deprecated).\n\n%%\n%% @spec release_queue(Queue::cl_queue()) ->\n%%    'ok' | {'error', cl_error()}\n%% @doc Decrements the command_queue reference count.\n%%\n%% After the command_queue reference count becomes zero and all\n%% commands queued to command_queue have finished (e.g., kernel\n%% executions, memory object updates, etc.), the command-queue is\n%% deleted.\n-spec release_queue(Queue::cl_queue()) ->\n    'ok' | {'error', cl_error()}.\nrelease_queue(Queue) when ?is_queue(Queue) ->\n    ok.\n\n%%\n%% @spec retain_queue(Queue::cl_queue()) ->\n%%    'ok' | {'error', cl_error()}\n%% @doc Increments the command_queue reference count.\n%%\n%%  create_queue/3 performs an implicit retain. This is very\n%%  helpful for 3rd party libraries, which typically get a\n%%  command-queue passed to them by the application. However, it is\n%%  possible that the application may delete the command-queue without\n%%  informing the library. Allowing functions to attach to\n%%  (i.e. retain) and release a command-queue solves the problem of a\n%%  command-queue being used by a library no longer being valid.\n\n-spec retain_queue(Queue::cl_queue()) ->\n    'ok' | {'error', cl_error()}.\n\nretain_queue(Queue) when ?is_queue(Queue) ->\n    ok.\n\n%% @spec queue_info() -> [queue_info_keys()]\n%% @doc Returns the list of possible queue info items.\nqueue_info() ->\n    [\n     context,\n     device,\n     reference_count,\n     properties\n    ].\n\n%% @spec get_queue_info(Queue, Info) -> {ok, term()}\n%% @doc Return the specified queue info\nget_queue_info(_Queue, _Info) ->\n    ?nif_stub.\n\n%% @spec get_queue_info(Queue) -> [queue_info_keys()]\n%% @doc Returns all queue info.\nget_queue_info(Queue) when ?is_queue(Queue) ->\n    get_info_list(Queue, queue_info(), fun get_queue_info/2).\n\n\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n%% Mem\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n\n%%\n%% @type cl_mem_flag() = { 'read_write' | 'write_only' | 'read_only' |\n%%                         'use_host_ptr' | 'alloc_host_ptr' |\n%%                         'copy_host_ptr'}\n%%\n-type cl_mem_flag() :: 'read_write' | 'write_only' | 'read_only' |\n\t\t       'use_host_ptr' | 'alloc_host_ptr' |\n\t\t       'copy_host_ptr'.\n\n%%\n%% @spec create_buffer(Context::cl_context(),Flags::cl_mem_flags(),\n%%                      Size::non_neg_integer()) ->\n%%    {'ok', cl_mem()} | {'error', cl_error()}\n%%\n%% @equiv create_buffer(Context,Flags,Size,<<>>)\n%%    \n-spec create_buffer(Context::cl_context(),Flags::[cl_mem_flag()],\n\t\t    Size::non_neg_integer()) ->\n    {'ok', cl_mem()} | {'error', cl_error()}.\n\ncreate_buffer(Context,Flags,Size) ->\n    create_buffer(Context,Flags,Size,[]).\n\n%%\n%% @spec create_buffer(Context::cl_context(),Flags::[cl_mem_flag()],\n%%                      Size::non_neg_integer(), Data::iolist()) ->\n%%    {'ok', cl_mem()} | {'error', cl_error()}\n%% @doc  Creates a buffer object. \n%% \n-spec create_buffer(Context::cl_context(),Flags::[cl_mem_flag()],\n\t\t    Size::non_neg_integer(),Data::iodata()) ->\n    {'ok', cl_mem()} | {'error', cl_error()}.\n\ncreate_buffer(_Context,_Flags,_Size,_Data) ->\n    ?nif_stub.\n\n%%\n%% @doc Creates a buffer object\n%%\n-spec create_sub_buffer(Buffer::cl_mem(),Flags::[cl_mem_flag()],\n                         Type::'region', Info::[non_neg_integer()]) ->\n    {'ok', cl_mem()} | {'error', cl_error()}.\n\n%%\ncreate_sub_buffer(_Buffer, _Flags, _Type, _Info) ->\n    ?nif_stub.\n\n%%\n%% @spec release_mem_object(Mem::cl_mem()) ->\n%%    'ok' | {'error', cl_error()}\n%% @doc  Decrements the memory object reference count. \n%%\n%% After the memobj reference count becomes zero and commands queued\n%% for execution on a command-queue(s) that use memobj have finished,\n%% the memory object is deleted.\n-spec release_mem_object(Mem::cl_mem()) ->\n    'ok' | {'error', cl_error()}.\n\nrelease_mem_object(Mem) when ?is_mem(Mem) ->\n    ok.\n\n%%\n%% @spec retain_mem_object(Mem::cl_mem()) ->\n%%    'ok' | {'error', cl_error()}\n%% @doc Increments the memory object reference count. \n-spec retain_mem_object(Mem::cl_mem()) ->\n    'ok' | {'error', cl_error()}.\n\nretain_mem_object(Mem) when ?is_mem(Mem) ->\n    ok.\n\n\n-type cl_mem_info_key() :: \n\t'object_type' | 'flags' | 'size' | 'host_ptr' | 'map_count' |\n\t'reference_count' | 'context'.\n\n\n%%\n%% @spec mem_object_info() ->\n%%    [cl_mem_info_keys()]\n%% @doc Returns a list of the possible mem info keys.\n-spec mem_object_info() ->\n    [cl_mem_info_key()].\n\nmem_object_info() ->\n    [\n     object_type,\n     flags,\n     size,\n     host_ptr,\n     map_count,\n     reference_count,\n     context\n    ].\n\n%%\n%% @spec get_mem_object_info(Mem::cl_mem(), InfoType::cl_mem_info_key()) ->\n%%    {'ok', term()} | {'error', cl_error()}\n%%\n%% @doc Used to get <c>InfoType</c> information that is common to all memory objects\n%% (buffer and image objects).\n-spec get_mem_object_info(Mem::cl_mem(), Info::cl_mem_info_key()) ->\n    {'ok', term()} | {'error', cl_error()}.\n\nget_mem_object_info(_Mem, _Info) ->\n    ?nif_stub.\n\n%%\n%% @spec get_mem_object_info(Mem::cl_mem()) ->\n%%    {'ok', term()} | {'error', cl_error()}\n%%\n%% @doc Used to get all information that is common to all memory objects\n%% (buffer and image objects).\nget_mem_object_info(Mem) when ?is_mem(Mem) ->\n    get_info_list(Mem, mem_object_info(), fun get_mem_object_info/2).\n\nimage_info() ->\n    [\n     format,\n     element_size,\n     row_pitch,\n     slice_pitch,\n     width,\n     height,\n     depth\n    ].\n\nget_image_info(_Mem, _Info) ->\n    ?nif_stub.\n\nget_image_info(Mem) when ?is_mem(Mem) ->\n    get_info_list(Mem, image_info(), fun get_image_info/2).\n\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n%% Sample\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n\n%% @type cl_addressing_mode() = {'none'|'clamp_to_edge'|'clamp'|'repeat'}\n%%\n-type cl_addressing_mode() :: 'none'|'clamp_to_edge'|'clamp'|'repeat'.\n\n%% @type cl_filter_mode() = {'nearest' | 'linear' }\n-type cl_filter_mode() :: 'nearest' | 'linear'.\n\n%%\n%% @spec create_sampler(Context::cl_context(),Normalized::boolean(),\n%%                      AddressingMode::cl_addressing_mode(),\n%%                      FilterMode::cl_filter_mode()) -> \n%%    {'ok', cl_sampler()} | {'error', cl_error()}\n%% @doc Creates a sampler object. \n%%\n%%  A sampler object describes how to sample an image when the image\n%%  is read in the kernel. The built-in functions to read from an\n%%  image in a kernel take a sampler as an argument. The sampler\n%%  arguments to the image read function can be sampler objects\n%%  created using OpenCL functions and passed as argument values to\n%%  the kernel or can be samplers declared inside a kernel. In this\n%%  section we discuss how sampler objects are created using OpenCL\n%%  functions.\n-spec create_sampler(Context::cl_context(),Normalized::boolean(),\n\t\t     AddressingMode::cl_addressing_mode(),\n\t\t     FilterMode::cl_filter_mode()) -> \n    {'ok', cl_sampler()} | {'error', cl_error()}.\n\ncreate_sampler(_Context, _Normalized, _AddressingMode, _FilterMode) ->\n    ?nif_stub.\n\n%%\n%% @spec release_sampler(Sampler::cl_sampler()) ->\n%%    'ok' | {'error', cl_error()}\n%% @doc Decrements the sampler reference count. \n%%\n%%  The sampler object is deleted after the reference count becomes\n%%  zero and commands queued for execution on a command-queue(s) that\n%%  use sampler have finished.\n-spec release_sampler(Sampler::cl_sampler()) ->\n    'ok' | {'error', cl_error()}.\n\nrelease_sampler(Sampler) when ?is_sampler(Sampler) ->\n    ok.\n\n%%\n%% @spec retain_sampler(Sampler::cl_sampler()) ->\n%%    'ok' | {'error', cl_error()}\n%% @doc Increments the sampler reference count. \n-spec retain_sampler(Sampler::cl_sampler()) ->\n    'ok' | {'error', cl_error()}.\n\nretain_sampler(Sampler) when ?is_sampler(Sampler) ->\n    ok.\n\nsampler_info() ->\n    [\n     reference_count,\n     context,\n     normalized_coords,\n     addressing_mode,\n     filter_mode\n    ].\n\n%% @spec get_sampler_info(Sampler::cl_sampler(), InfoType::cl_sampler_info_type()) -> \n%%    {'ok', term()} | {'error', cl_error()}\n%% @doc Returns <c>InfoType</c> information about the sampler object. \nget_sampler_info(_Sampler, _Info) ->\n    ?nif_stub.\n\n\n%% @spec get_sampler_info(Sampler::cl_sampler()) -> {'ok', term()} | {'error', cl_error()}\n%% @doc Returns all information about the sampler object. \n%% @see get_sampler_info/2\nget_sampler_info(Sampler) ->\n    get_info_list(Sampler, sampler_info(), fun get_sampler_info/2).\n\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n%% Program\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n\n%%\n%% @spec create_program_with_source(Context::cl_context(),\n%%                                  Source::iodata()) ->\n%%    {'ok', cl_program()} | {'error', cl_error()}\n%%\n%% @doc Creates a program object for a context, \n%% and loads the source code specified by the text strings in the\n%% strings array into the program object.\n%% \n%%  The devices associated with the program object are the devices associated with context.\n\n%% OpenCL allows applications to create a program object using the\n%% program source or binary and build appropriate program\n%% executables. This allows applications to determine whether they\n%% want to use the pre-built offline binary or load and compile the\n%% program source and use the executable compiled/linked online as the\n%% program executable. This can be very useful as it allows\n%% applications to load and build program executables online on its\n%% first instance for appropriate OpenCL devices in the system. These\n%% executables can now be queried and cached by the\n%% application. Future instances of the application launching will no\n%% longer need to compile and build the program executables. The\n%% cached executables can be read and loaded by the application, which\n%% can help significantly reduce the application initialization time.\n\n%% An OpenCL program consists of a set of kernels that are identified\n%% as functions declared with the __kernel qualifier in the program\n%% source. OpenCL programs may also contain auxiliary functions and\n%% constant data that can be used by __kernel functions. The program\n%% executable can be generated online or offline by the OpenCL\n%% compiler for the appropriate target device(s).\n%%\n%% @todo allow iodata and handle multiple binaries in the driver\n%%\n-spec create_program_with_source(Context::cl_context(),\n\t\t\t\t Source::iodata()) ->\n    {'ok', cl_program()} | {'error', cl_error()}.\n\ncreate_program_with_source(_Context, _Source) ->\n    ?nif_stub.\n\n%%\n%% @spec create_program_with_binary(Context::cl_context(),\n%%                                  DeviceList::[cl_device_id()],\n%%                                  BinaryList::[binary()]) ->\n%%    {'ok', cl_program()} | {'error', cl_error()}\n%%\n%% @doc  Creates a program object for a context, and loads specified binary data into the program object. \n%% \n%% OpenCL allows applications to create a program object using the\n%% program source or binary and build appropriate program\n%% executables. This allows applications to determine whether they\n%% want to use the pre-built offline binary or load and compile the\n%% program source and use the executable compiled/linked online as the\n%% program executable. This can be very useful as it allows\n%% applications to load and build program executables online on its\n%% first instance for appropriate OpenCL devices in the system. These\n%% executables can now be queried and cached by the\n%% application. Future instances of the application launching will no\n%% longer need to compile and build the program executables. The\n%% cached executables can be read and loaded by the application, which\n%% can help significantly reduce the application initialization time.\n%%\n%%  The binaries and device can be generated by calling:\n%%  <code>\n%%    {ok,P} = cl:create_program_with_source(Context,Source),\n%%    ok = cl:build_program(P, DeviceList, Options),\n%%    {ok,DeviceList} = cl:get_program_info(P, devices),\n%%    {ok,BinaryList} = cl:get_program_info(P, binaries).\n%%  </code>\n%%\n-spec create_program_with_binary(Context::cl_context(),\n\t\t\t\t DeviceList::[cl_device_id()],\n\t\t\t\t BinaryList::[binary()]) ->\n    {'ok', cl_program()} | {'error', cl_error()}.\n\ncreate_program_with_binary(_Context, _DeviceList, _BinaryList) ->\n    ?nif_stub.\n\n-spec create_program_with_builtin_kernels(Context::cl_context(),\n\t\t\t\t\t  DeviceList::[cl_device_id()],\n\t\t\t\t\t  KernelNames::string()) ->\n    {'ok', cl_program()} | {'error', cl_error()}.\n\ncreate_program_with_builtin_kernels(_Context, _DeviceList, _KernelNames) ->\n    ?nif_stub.\n\n-spec create_program_with_il(Context::cl_context(), IL::iodata()) ->\n\t\t\t\t    {'ok', cl_program()} | {'error', cl_error()}.\n\ncreate_program_with_il(_Context, _IL) ->\n    ?nif_stub.\n\n%%\n%% @spec retain_program(Program::cl_program()) ->\n%%    'ok' | {'error', cl_error()}\n%% @doc  Increments the program reference count. \nretain_program(Program) when ?is_program(Program) ->\n    ok.\n\n%%\n%% @spec release_program(Program::cl_program()) ->\n%%    'ok' | {'error', cl_error()}\n%% @doc Decrements the program reference count. \n%%\n%% The program object is deleted after all kernel objects associated\n%% with program have been deleted and the program reference count\n%% becomes zero.\nrelease_program(Program) when ?is_program(Program) ->\n    ok.\n\n%%\n%% @spec build_program(Program::cl_program(),\n%%                     DeviceList::[cl_device_id()],\n%%                     Options::string()) ->\n%%  'ok' | {'error', cl_error()}\n%%\n%% @doc Builds (compiles and links) a program executable from the\n%% program source or binary.\n%%\n%% OpenCL allows program executables to be built using the source or the binary.\n%% \n%% The build options are categorized as pre-processor options, options\n%% for math intrinsics, options that control optimization and\n%% miscellaneous options. This specification defines a standard set of\n%% options that must be supported by an OpenCL compiler when building\n%% program executables online or offline. These may be extended by a\n%% set of vendor- or platform-specific options.\n%% \n%% <h4>Preprocessor Options</h4> These options\n%% control the OpenCL preprocessor which is run on each program source\n%% before actual compilation. -D options are processed in the order\n%% they are given in the options argument to\n%% <code>build_program/3</code>.\n%%\n%% <dl>\n%% <dt><span>-D name</span></dt><dd>\n%% <p> Predefine <code>name</code> as a macro, with definition 1.</p></dd>\n%% <dt>-D name=definition</dt><dd><p> The contents of <code>definition</code> \n%% are tokenized and processed as if they appeared during translation phase three in a `#define'\n%% directive. In particular, the definition will be truncated by\n%% embedded newline characters.  </p></dd>\n%% <dt>-I dir</dt><dd> <p>Add the directory <code>dir</code> to the list of directories to be\n%% searched for header files.</p> </dd></dl>\n%% <br />\n%%\n%% <h4>Math Intrinsics Options</h4> These options control compiler\n%% behavior regarding floating-point arithmetic. These options trade\n%% off between speed and correctness.\n%% <dl><dt>-cl-single-precision-constant</dt><dd><p> Treat double\n%% precision floating-point constant as single precision constant.\n%% </p></dd><dt>-cl-denorms-are-zero</dt><dd><p> This option controls\n%% how single precision and double precision denormalized numbers are\n%% handled. If specified as a build option, the single precision\n%% denormalized numbers may be flushed to zero and if the optional\n%% extension for double precision is supported, double precision\n%% denormalized numbers may also be flushed to zero. This is intended\n%% to be a performance hint and the OpenCL compiler can choose not to\n%% flush denorms to zero if the device supports single precision (or\n%% double precision) denormalized numbers.  </p><p> This option is\n%% ignored for single precision numbers if the device does not support\n%% single precision denormalized numbers i.e. CL_FP_DENORM bit is not\n%% set in CL_DEVICE_SINGLE_FP_CONFIG.  </p><p> </p><p> This option is\n%% ignored for double precision numbers if the device does not support\n%% double precision or if it does support double precison but\n%% CL_FP_DENORM bit is not set in CL_DEVICE_DOUBLE_FP_CONFIG.  </p><p>\n%% \n%% This flag only applies for scalar and vector single precision\n%% floating-point variables and computations on these floating-point\n%% variables inside a program. It does not apply to reading from or\n%% writing to image objects.  </p><p> </p></dd></dl><p><br />\n%% </p>\n%%\n%% <h4>Optimization Options</h4> These options control various\n%% sorts of optimizations. Turning on optimization flags makes the\n%% compiler attempt to improve the performance and/or code size at the\n%% expense of compilation time and possibly the ability to debug the\n%% program.  <dl><dt>-cl-opt-disable</dt><dd><p> This option\n%% disables all optimizations. The default is optimizations are\n%% enabled.  </p></dd><dt>-cl-strict-aliasing</dt><dd><p> This option\n%% allows the compiler to assume the strictest aliasing rules.\n%% </p></dd></dl>\n%%<p> The following options control compiler\n%% behavior regarding floating-point arithmetic. These options trade\n%% off between performance and correctness and must be specifically\n%% enabled. These options are not turned on by default since it can\n%% result in incorrect output for programs which depend on an exact\n%% implementation of IEEE 754 rules/specifications for math functions.\n%% </p><dl><dt>-cl-mad-enable</dt><dd><p> Allow <code>a * b + c</code>\n%% to be replaced by a <code>mad</code>. The <code>mad</code> computes\n%% <code>a * b + c</code> with reduced accuracy. For example, some\n%% OpenCL devices implement <code>mad</code> as truncate\n%% the result of <code>a * b</code> before adding it to\n%% <code>c</code>.  </p></dd>\n%% <dt>-cl-no-signed-zeros</dt><dd>\n%% <p> Allow optimizations for floating-point arithmetic that ignore\n%% the signedness of zero. IEEE 754 arithmetic specifies the behavior\n%% of distinct <code>+0.0</code> and <code>-0.0</code> values, which\n%% then prohibits simplification of expressions such as\n%% <code>x+0.0</code> or <code>0.0*x</code> (even with -clfinite-math\n%% only). This option implies that the sign of a zero result isn't\n%% significant.  </p></dd>\n%% <dt>-cl-unsafe-math-optimizations</dt><dd><p> Allow optimizations\n%% for floating-point arithmetic that (a) assume that arguments and\n%% results are valid, (b) may violate IEEE 754 standard and (c) may\n%% violate the OpenCL numerical compliance requirements as defined in\n%% section 7.4 for single-precision floating-point, section 9.3.9 for\n%% double-precision floating-point, and edge case behavior in section\n%% 7.5. This option includes the -cl-no-signed-zeros and\n%% -cl-mad-enable options.  </p></dd>\n%%<dt><span class=\"term\">-cl-finite-math-only</span></dt><dd><p> \n%% Allow optimizations for floating-point arithmetic that assume that arguments and results\n%% are not NaNs or ±infinity. This option may violate the OpenCL numerical compliance\n%% requirements defined in in section 7.4 for single-precision floating-point,\n%% section 9.3.9 for double-precision floating-point, and edge case behavior in section 7.5.\n%% </p></dd>\n%%<dt><span class=\"term\">-cl-fast-relaxed-math</span></dt><dd><p> \n%% Sets the optimization options -cl-finite-math-only and -cl-unsafe-math-optimizations.\n%% This allows optimizations for floating-point arithmetic that may violate the IEEE 754\n%% standard and the OpenCL numerical compliance requirements defined in the specification in section 7.4 for single-precision floating-point, section 9.3.9 for double-precision floating-point,\n%% and edge case behavior in section 7.5. This option causes the preprocessor macro\n%%\n%% <code>__FAST_RELAXED_MATH__</code> to be defined in the OpenCL program.\n%% </p></dd></dl><p><br />\n%% </p><h4>Options to Request or Suppress Warnings</h4>\n%% Warnings are diagnostic messages that report constructions which are not inherently erroneous\n%% but which are risky or suggest there may have been an error. The following languageindependent\n%% options do not enable specific warnings but control the kinds of diagnostics\n%% produced by the OpenCL compiler.\n%% <dl><dt><span class=\"term\">-w</span></dt><dd><p> \n%% Inhibit all warning messages.\n%% </p></dd><dt><span class=\"term\">-Werror</span></dt><dd><p> \n%% Make all warnings into errors.\n%% </p></dd>\n%%</dl>\n\nbuild_program(Program, DeviceList, Options) ->\n    case async_build_program(Program, DeviceList, Options) of\n\t{ok,Ref} ->\n\t    receive\n\t\t{cl_async,Ref,Reply} ->\n\t\t    Reply\n\t    end;\n\tError ->\n\t    Error\n    end.\n\nasync_build_program(_Program, _DeviceList, _Options) ->\n    ?nif_stub.\n\n\n%%\n%% @spec unload_compiler() -> 'ok' | {'error', cl_error()}\n%% @doc Allows the implementation to release the resources allocated by the OpenCL compiler. \n%%\n%% This is a hint from the application and does not guarantee that the\n%% compiler will not be used in the future or that the compiler will\n%% actually be unloaded by the implementation. Calls to build_program/3\n%% after unload_compiler/0 will reload the compiler, if necessary, to\n%% build the appropriate program executable.\nunload_compiler() ->   \n    ?nif_stub.\n\n%% @spec unload_platform_compiler(Platform :: cl_platform_id()) ->\n%%   'ok' | {'error', cl_error()}\n-spec unload_platform_compiler(Platform::cl_platform_id()) ->\n    'ok' | {'error', cl_error()}.\nunload_platform_compiler(_Platform) ->\n    ?nif_stub.\n\n-spec compile_program(Program::cl_program(),\n\t\t      DeviceList::[cl_device_id()],\n\t\t      Options::string(),\n\t\t      Headers::[cl_program()],\n\t\t      Names::[string()]) ->\n    'ok' | {'error', cl_error()}.\n\ncompile_program(Program, Devices, Options, Headers, Names) ->\n    case async_compile_program(Program, Devices, Options, Headers, Names) of\n\t{ok,Ref} ->\n\t    receive\n\t\t{cl_async,Ref,Reply} ->\n\t\t    Reply\n\t    end;\n\tError ->\n\t    Error\n    end.\n\nasync_compile_program(_Program, _Devices, _Options, _Headers, _Names) ->\n    ?nif_stub.\n\n\n-spec link_program(Context::cl_context(),\n\t\t   DeviceList::[cl_device_id()],\n\t\t   Options::string(),\n\t\t   Programs::[cl_program()]) ->\n    {'ok',cl_program()} | {'error', cl_error()}.\n\nlink_program(Context, DeviceList, Options, Programs) ->\n    case async_link_program(Context, DeviceList, Options, Programs) of\n\t{ok,{Ref,Program}} ->\n\t    receive\n\t\t{cl_async,Ref,ok} ->\n\t\t    {ok,Program};\n\t\t{cl_async,Ref,Error} ->\n\t\t    Error\n\t    end;\n\tError ->\n\t    Error\n    end.\n\nasync_link_program(_Context, _DeviceList, _Options, _Programs) ->\n    ?nif_stub.\n\n\nprogram_info() ->\n    [\n     reference_count,\n     context,\n     num_devices,\n     devices,\n     source,\n     binary_sizes,\n     binaries\n    ].\n\n%% @doc  Returns specific information about the program object. \nget_program_info(_Program, _Info) ->\n    ?nif_stub.\n\n%% @doc  Returns all information about the program object. \nget_program_info(Program) when ?is_program(Program) ->\n    get_info_list(Program, program_info(), fun get_program_info/2).\n\nprogram_build_info() ->\n    [\n     status,\n     options,\n     log\n    ].\n\n%% @doc Returns specific build information for each device in the program object. \nget_program_build_info(_Program, _Device, _Info) ->\n    ?nif_stub.\n\n%% @doc Returns all build information for each device in the program object. \nget_program_build_info(Program, Device) ->\n    get_info_list(Program, program_build_info(),\n\t\t  fun(P, I) ->\n\t\t\t  get_program_build_info(P, Device, I)\n\t\t  end).\n\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n%% Kernel\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n\n%%\n%% @spec create_kernel(Program::cl_program(),Name::string()) ->\n%%    {'ok', cl_kernel()} | {'error', cl_error()}\n%%\n%% @doc  Creates a kernal object. \n%%\n%%  A kernel is a function declared in a program. A kernel is\n%%  identified by the __kernel qualifier applied to any function in a\n%%  program. A kernel object encapsulates the specific __kernel\n%%  function declared in a program and the argument values to be used\n%%  when executing this __kernel function.\ncreate_kernel(_Program, _Name) ->\n    ?nif_stub.\n\n%%\n%% @spec create_kernels_in_program(Program::cl_program()) ->\n%%    {'ok', [cl_kernel()]} | {'error', cl_error()}\n%%\n%% @doc Creates kernel objects for all kernel functions in a program object. \n%%\n%%  Creates kernel objects for all kernel functions in program. Kernel\n%%  objects are not created for any __kernel functions in program that\n%%  do not have the same function definition across all devices for\n%%  which a program executable has been successfully built.\n\n%% Kernel objects can only be created once you have a program object\n%% with a valid program source or binary loaded into the program\n%% object and the program executable has been successfully built for\n%% one or more devices associated with program. No changes to the\n%% program executable are allowed while there are kernel objects\n%% associated with a program object. This means that calls to\n%% clBuildProgram return CL_INVALID_OPERATION if there are kernel\n%% objects attached to a program object. The OpenCL context associated\n%% with program will be the context associated with kernel. The list\n%% of devices associated with program are the devices associated with\n%% kernel. Devices associated with a program object for which a valid\n%% program executable has been built can be used to execute kernels\n%% declared in the program object.\ncreate_kernels_in_program(_Program) ->\n    ?nif_stub.\n\n%%\n%% @type cl_kernel_arg() = integer() | float() | binary()\n%%\n%% @spec set_kernel_arg(Kernel::cl_kernel(), Index::non_neg_integer(),\n%%                      Argument::cl_kernel_arg()) -> \n%%    'ok' | {'error', cl_error()}\n%% @doc Used to set the argument value for a specific argument of a kernel. \n%% \n%% For now set_kernel_arg handles integer and floats\n%% to set any other type use `<<Foo:Bar/native...>>'\n%% use the macros defined in cl.hrl to get it right (except for padding)\n%% \n%% A kernel object does not update the reference count for objects\n%% such as memory, sampler objects specified as argument values by\n%% set_kernel_arg/3, Users may not rely on a kernel object to retain\n%% objects specified as argument values to the kernel.\n%%\n%% Implementations shall not allow cl_kernel objects to hold reference\n%% counts to cl_kernel arguments, because no mechanism is provided for\n%% the user to tell the kernel to release that ownership right. If the\n%% kernel holds ownership rights on kernel args, that would make it\n%% impossible for the user to tell with certainty when he may safely\n%% release user allocated resources associated with OpenCL objects\n%% such as the cl_mem backing store used with CL_MEM_USE_HOST_PTR.\n\nset_kernel_arg(_Kernel,_Index,_Argument) ->\n    ?nif_stub.\n\n%%\n%% @spec set_kernel_arg_size(Kernel::cl_kernel(), Index::non_neg_integer(),\n%%                           Size::non_neg_integer()) ->\n%%    'ok' | {'error', cl_error()}\n%%\n%% @doc clErlang special to set kernel arg with size only (local mem etc)\n%%\nset_kernel_arg_size(_Kernel,_Index,_Size) ->\n    ?nif_stub.\n\n\n%%\n%% @spec retain_kernel(Context::cl_kernel()) ->\n%%    'ok' | {'error', cl_error()}\n%% @doc  Increments the program kernel reference count. \nretain_kernel(Kernel) when ?is_kernel(Kernel) ->\n    ok.\n\n%%\n%% @spec release_kernel(Context::cl_kernel()) ->\n%%    'ok' | {'error', cl_error()}\n%% @doc  Decrements the kernel reference count. \nrelease_kernel(Kernel) when ?is_kernel(Kernel) ->\n    ok.\n\nkernel_info() ->\n    [\n     function_name,\n     num_args,\n     reference_count,\n     context,\n     program\n    ].\n\n%% @doc Returns specific information about the kernel object. \nget_kernel_info(_Kernel, _Info) ->\n    ?nif_stub.\n\n%% @doc Returns all information about the kernel object. \nget_kernel_info(Kernel) when ?is_kernel(Kernel) ->\n    get_info_list(Kernel, kernel_info(), fun get_kernel_info/2).\n\nkernel_workgroup_info() ->\n    [\n     work_group_size,\n     compile_work_group_size,\n     local_mem_size\n    ].\n\n%% @doc Returns specific information about the kernel object that may\n%% be specific to a device.\nget_kernel_workgroup_info(_Kernel, _Device, _Info) ->\n    ?nif_stub.\n\n\n%% @doc Returns all information about the kernel object that may be\n%% specific to a device.\nget_kernel_workgroup_info(Kernel, Device) ->\n    get_info_list(Kernel, kernel_workgroup_info(),\n\t\t  fun(K,I) ->\n\t\t\t  get_kernel_workgroup_info(K,Device,I)\n\t\t  end).\n\n%% @doc Returns specific information about the kernel argument\nget_kernel_arg_info(_Kernel, _ArgIndex, _Info) ->\n    ?nif_stub.\n\nget_kernel_arg_info(Kernel, ArgIndex) ->\n    get_info_list(Kernel, kernel_arg_info(),\n\t\t       fun(K,I) ->\n\t\t\t       get_kernel_arg_info(K,ArgIndex,I)\n\t\t       end).\n\nget_kernel_arg_info(Kernel) ->\n    case get_kernel_info(Kernel, num_args) of\n\t{ok, N} ->\n\t    {ok,\n\t     lists:map(fun(I) ->\n\t\t\t       try get_kernel_arg_info(Kernel, I) of\n\t\t\t\t   {ok,Info} ->\n\t\t\t\t       {I,Info};\n\t\t\t\t   {error,Reason} ->\n\t\t\t\t       {I,{error,Reason}}\n\t\t\t       catch\n\t\t\t\t   error:Reason ->\n\t\t\t\t       {I,{error,Reason}}\n\t\t\t       end\n\t\t       end, lists:seq(0, N-1))};\n\tError ->\n\t    Error\n    end.\n\nkernel_arg_info() ->\n    [address_qualifier,\n     access_qualifier,\n     type_name,\n     type_qualifier,\n     name].\n\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n%% Events\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n\n%% @spec enqueue_task(Queue::cl_queue(), Kernel::cl_kernel(),\n%%                    WaitList::[cl_event()]) ->\n%%    {'ok', cl_event()} | {'error', cl_error()}\n%%\n%% @doc Enqueues a command to execute a kernel on a device. \n%%\n%% The kernel is executed using a single work-item.\n%% @see enqueue_nd_range_kernel/5\n-spec enqueue_task(Queue::cl_queue(), Kernel::cl_kernel(),\n\t\t   WaitList::[cl_event()]) ->\n    {'ok', cl_event()} | {'error', cl_error()}.\n\nenqueue_task(Queue, Kernel, WaitList) ->\n    enqueue_task(Queue, Kernel, WaitList, true).\n\n-spec nowait_enqueue_task(Queue::cl_queue(), Kernel::cl_kernel(),\n\t\t   WaitList::[cl_event()]) ->\n    'ok' | {'error', cl_error()}.\n\nnowait_enqueue_task(Queue, Kernel, WaitList) ->\n    enqueue_task(Queue, Kernel, WaitList, false).\n\nenqueue_task(_Queue, _Kernel, _WaitList, _WantEvent) ->\n    ?nif_stub.\n\n%%\n%% @spec enqueue_nd_range_kernel(Queue::cl_queue(), Kernel::cl_kernel(),\n%%                               Global::[non_neg_integer()],\n%%                               Local::[non_neg_integer()],\n%%                               WaitList::[cl_event()]) ->\n%%    {'ok', cl_event()} | {'error', cl_error()}\n%%\n%% @doc Enqueues a command to execute a kernel on a device. \n%% \n%% Work-group instances are executed in parallel across multiple\n%% compute units or concurrently on the same compute unit.\n%%\n%% Each work-item is uniquely identified by a global identifier. The\n%% global ID, which can be read inside the kernel, is computed using\n%% the value given by global_work_size and global_work_offset. In\n%% OpenCL 1.0, the starting global ID is always (0, 0, ... 0). In\n%% addition, a work-item is also identified within a work-group by a\n%% unique local ID. The local ID, which can also be read by the\n%% kernel, is computed using the value given by local_work_size. The\n%% starting local ID is always (0, 0, ... 0).\n-spec enqueue_nd_range_kernel(Queue::cl_queue(), Kernel::cl_kernel(),\n\t\t\t      Global::[non_neg_integer()],\n\t\t\t      Local::[non_neg_integer()],\n\t\t\t      WaitList::[cl_event()]) ->\n    {'ok', cl_event()} | {'error', cl_error()}.\n\n\nenqueue_nd_range_kernel(Queue, Kernel, Global, Local, WaitList) ->\n    enqueue_nd_range_kernel(Queue, Kernel, Global, Local, WaitList, true).\n\n\n-spec nowait_enqueue_nd_range_kernel(Queue::cl_queue(), Kernel::cl_kernel(),\n\t\t\t\t     Global::[non_neg_integer()],\n\t\t\t\t     Local::[non_neg_integer()],\n\t\t\t\t     WaitList::[cl_event()]) ->\n    'ok' | {'error', cl_error()}.\n\nnowait_enqueue_nd_range_kernel(Queue, Kernel, Global, Local, WaitList) ->\n    enqueue_nd_range_kernel(Queue, Kernel, Global, Local, WaitList, false).\n\nenqueue_nd_range_kernel(_Queue, _Kernel, _Global, _Local, _WaitList, \n\t\t\t_WantEvent) ->\n    ?nif_stub.\n\n%% @spec enqueue_marker(Queue::cl_queue()) ->\n%%    {'ok', cl_event()} | {'error', cl_error()}\n%%\n%% @doc  Enqueues a marker command. \n%%\n%%  Enqueues a marker command to command_queue. The marker command\n%%  returns an event which can be used to queue a wait on this marker\n%%  event i.e. wait for all commands queued before the marker command\n%%  to complete.\n-spec enqueue_marker(Queue::cl_queue()) ->\n    {'ok', cl_event()} | {'error', cl_error()}.\n\nenqueue_marker(_Queue) ->\n    ?nif_stub.\n\n%%\n%% @spec enqueue_wait_for_events(Queue::cl_queue(), WaitList::[cl_event()]) ->\n%%    'ok' | {'error', cl_error()}\n%%\n%% @doc Enqueues a wait for a specific event or a list of events \n%% to complete before any future commands queued in the command-queue are\n%% executed.\n%%\n%% The context associated with events in WaitList and Queue must be the same. \n-spec enqueue_wait_for_events(Queue::cl_queue(),  WaitList::[cl_event()]) ->\n    'ok' | {'error', cl_error()}.\n\nenqueue_wait_for_events(_Queue, _WaitList) ->\n    ?nif_stub.\n\n\n%%\n%% @doc Enqueue commands to read from a buffer object to host memory. \n%% \n%% Calling <code>enqueue_read_buffer</code> to read a region of the\n%% buffer object with the <code>Buffer</code> argument value set to\n%% <code>host_ptr</code> + <code >offset</code>, where\n%% <code>host_ptr</code> is a pointer to the memory region specified\n%% when the buffer object being read is created with\n%% <code>CL_MEM_USE_HOST_PTR</code>, must meet the following\n%% requirements in order to avoid undefined behavior:\n%%\n%% <ul> <li>All commands that use this buffer object have finished\n%% execution before the read command begins execution</li>\n%% <li>The buffer object is not mapped</li>\n%% <li>The buffer object is not used by any command-queue until the\n%% read command has finished execution</li>\n%% </ul>\n\n-spec enqueue_read_buffer(Queue::cl_queue(), Buffer::cl_mem(),\n\t\t\t  Offset::non_neg_integer(), \n\t\t\t  Size::non_neg_integer(), \n\t\t\t  WaitList::[cl_event()]) ->\n    {'ok', cl_event()} | {'error', cl_error()}.\n\n\nenqueue_read_buffer(_Queue, _Buffer, _Offset, _Size, _WaitList) ->\n    ?nif_stub.\n\n%%\n%% Read rectangular section from buffer memory into host memory\n%%\n-spec enqueue_read_buffer_rect(Queue::cl_queue(), Buffer::cl_mem(),\n\t\t\t       BufferOrigin::[non_neg_integer()],\n\t\t\t       HostOrigin::[non_neg_integer()],\n\t\t\t       Region::[non_neg_integer()],\n\t\t\t       BufferRowPicth::non_neg_integer(),\n\t\t\t       BufferSlicePicth::non_neg_integer(),\n\t\t\t       HostRowPicth::non_neg_integer(),\n\t\t\t       HostSlicePicth::non_neg_integer(),\n\t\t\t       WaitList::[cl_event()]) ->\n    {'ok', cl_event()} | {'error', cl_error()}.\n\nenqueue_read_buffer_rect(_Queue, _Buffer, _BufferOrigin, _HostOrigin,\n\t\t\t _Region, _BufferRowPitch, _BufferSlicePitch,\n\t\t\t _HostRowPitch, _HostSlicePitch,\n\t\t\t _WaitList) ->\n    ?nif_stub.\n\n%%\n%% @spec enqueue_write_buffer(Queue::cl_queue(), Buffer::cl_mem(),\n%%                            Offset::non_neg_integer(), \n%%                            Size::non_neg_integer(), \n%%                            Data::binary(),\n%%                            WaitList::[cl_event()]) ->\n%%    {'ok', cl_event()} | {'error', cl_error()}\n%%\n%% @doc Enqueue commands to write to a buffer object from host memory.\n%%\n%% Calling <code>enqueue_write_buffer</code> to update the latest bits\n%% in a region of the buffer object with the <code>Buffer</code>\n%% argument value set to <code>host_ptr</code> + <code >offset</code>,\n%% where <code>host_ptr</code> is a pointer to the memory region\n%% specified when the buffer object being read is created with\n%% <code>CL_MEM_USE_HOST_PTR</code>, must meet the following\n%% requirements in order to avoid undefined behavior:\n%%\n%% <ul> <li>The host memory region given by <code>(host_ptr + offset, cb)</code>\n%% contains the latest bits when the enqueued write command begins\n%% execution. </li> \n%% <li>The buffer object is not mapped</li> \n%% <li>The buffer object is not used by any command-queue until the read\n%% command has finished execution</li> </ul>\n-spec enqueue_write_buffer(Queue::cl_queue(), Buffer::cl_mem(),\n\t\t\t   Offset::non_neg_integer(), \n\t\t\t   Size::non_neg_integer(), \n\t\t\t   Data::binary(),\n\t\t\t   WaitList::[cl_event()]) ->\n    {'ok', cl_event()} | {'error', cl_error()}.\n\n\nenqueue_write_buffer(Queue, Buffer, Offset, Size, Data, WaitList) ->\n    enqueue_write_buffer(Queue, Buffer, Offset, Size, Data, WaitList, true).\n\n-spec nowait_enqueue_write_buffer(Queue::cl_queue(), Buffer::cl_mem(),\n\t\t\t   Offset::non_neg_integer(), \n\t\t\t   Size::non_neg_integer(), \n\t\t\t   Data::binary(),\n\t\t\t   WaitList::[cl_event()]) ->\n    'ok' | {'error', cl_error()}.\n\nnowait_enqueue_write_buffer(Queue, Buffer, Offset, Size, Data, WaitList) ->\n    enqueue_write_buffer(Queue, Buffer, Offset, Size, Data, WaitList, false).\n\nenqueue_write_buffer(_Queue, _Buffer, _Offset, _Size, _Data, _WaitList,\n\t\t     _WantEvent) ->\n    ?nif_stub.\n\n\n\n%%\n%% Write rectangular section from  host memory into buffer memory\n%%\n-spec enqueue_write_buffer_rect(Queue::cl_queue(), Buffer::cl_mem(),\n\t\t\t\tBufferOrigin::[non_neg_integer()],\n\t\t\t\tHostOrigin::[non_neg_integer()],\n\t\t\t\tRegion::[non_neg_integer()],\n\t\t\t\tBufferRowPicth::non_neg_integer(),\n\t\t\t\tBufferSlicePicth::non_neg_integer(),\n\t\t\t\tHostRowPicth::non_neg_integer(),\n\t\t\t\tHostSlicePicth::non_neg_integer(),\n\t\t\t\tData::binary(),\n\t\t\t\tWaitList::[cl_event()]) ->\n    {'ok', cl_event()} | {'error', cl_error()}.\n\nenqueue_write_buffer_rect(_Queue, _Buffer, _BufferOrigin, _HostOrigin,\n\t\t\t  _Region, _BufferRowPitch, _BufferSlicePitch,\n\t\t\t  _HostRowPitch, _HostSlicePitch,\n\t\t\t  _Data,\n\t\t\t  _WaitList) ->\n    ?nif_stub.\n\n\n%%\n%% Fill buffer memory from pattern,\n%% Size and Offset must be multiple of Pattern size\n%% Pattern size must be onle of 1,2,4,8,16,32,64 or 128\n%%\n-spec enqueue_fill_buffer(Queue::cl_queue(), Buffer::cl_mem(),\n\t\t\t  Pattern::binary(),\n\t\t\t  Offset::non_neg_integer(),\n\t\t\t  Size::non_neg_integer(),\n\t\t\t  WaitList::[cl_event()]) ->\n    {'ok', cl_event()} | {'error', cl_error()}.\n\nenqueue_fill_buffer(_Queue, _Buffer, _Pattern, _Offset, _Size, _WaitList) ->\n    ?nif_stub.\n\n%% \n%% @spec enqueue_barrier(Queue::cl_queue()) ->\n%%    'ok' | {'error', cl_error()}\n%%\n%% @doc A synchronization point that enqueues a barrier operation. \n%%\n%%  enqueue_barrier/1 is a synchronization point that ensures that all\n%%  queued commands in command_queue have finished execution before\n%%  the next batch of commands can begin execution.\n-spec enqueue_barrier(Queue::cl_queue()) ->\n    'ok' | {'error', cl_error()}.\n\nenqueue_barrier(_Queue) ->\n    ?nif_stub.\n\n%% @spec enqueue_marker_with_wait_list(Queue::cl_queue(),\n%%                    WaitList::[cl_event()]) ->\n%%    {'ok', cl_event()} | {'error', cl_error()}\n\n-spec enqueue_marker_with_wait_list(Queue::cl_queue(),\n\t\t\t\t    WaitList::[cl_event()]) ->\n    {'ok', cl_event()} | {'error', cl_error()}.\n\nenqueue_marker_with_wait_list(_Queue, _WaitList) ->\n    ?nif_stub.    \n\n%% @spec enqueue_barrier_with_wait_list(Queue::cl_queue(),\n%%                    WaitList::[cl_event()]) ->\n%%    {'ok', cl_event()} | {'error', cl_error()}\n-spec enqueue_barrier_with_wait_list(Queue::cl_queue(),\n\t\t\t\t     WaitList::[cl_event()]) ->\n    {'ok', cl_event()} | {'error', cl_error()}.\nenqueue_barrier_with_wait_list(_Queue, _WaitList) ->\n    ?nif_stub.\n    \n\n\nenqueue_read_image(_Queue, _Image, _Origin, _Region, _RowPitch, _SlicePitch,\n\t\t   _WaitList) ->\n    ?nif_stub.\n\nenqueue_write_image(Queue, Image, Origin, Region, RowPitch, SlicePitch,\n\t\t    Data, WaitList) ->\n    enqueue_write_image(Queue, Image, Origin, Region, RowPitch, SlicePitch,\n\t\t\tData, WaitList, true).\n\n\nnowait_enqueue_write_image(Queue, Image, Origin, Region, RowPitch, SlicePitch,\n\t\t\t   Data, WaitList) ->\n    enqueue_write_image(Queue, Image, Origin, Region, RowPitch, SlicePitch,\n\t\t\tData, WaitList, false).\n\nenqueue_write_image(_Queue, _Image, _Origin, _Region, _RowPitch, _SlicePitch,\n\t\t    _Data, _WaitList, _WantEvent) ->\n    ?nif_stub.\n\nenqueue_copy_buffer(_Queue, _SrcBuffer, _DstBuffer, _SrcOffset, _DstOffset,\n\t\t\t     _Cb, _WaitList) ->\n    ?nif_stub.\n\nenqueue_copy_buffer_rect(_Queue, _SrcBuffer, _DstBuffer,\n\t\t\t _SrcOrigin, _DstOrigin, _Region,\n\t\t\t _SrcRowPitch, _SrcSlicePitch,\n\t\t\t _DstRowPitch, _DstSlicePitch,\n\t\t\t _WaitList) ->\n    ?nif_stub.\n\nenqueue_copy_image(_QUeue, _SrcImage, _DstImage, _SourceOrigin, _DestOrigin, _Region, _WaitList) ->\n    ?nif_stub.\n\n%%  FillColor = <<R:32/unsigned,G:32/unsigned,B:32/unsigned,A:32/unsigned>>\n%%            | <<R:32/signed,G:32/signed,B:32/signed,A:32/signed>>\n%%            | <<R:32/float,G:32/float,B:32/float,A:32/float>>\n%%            Use device endian! check device_info(D, endian_little)\n-spec enqueue_fill_image(Queue::cl_queue(),\n\t\t\t Image::cl_mem(),\n\t\t\t FillColor::binary(),\n\t\t\t Origin::[non_neg_integer()],\n\t\t\t Region::[non_neg_integer()],\n\t\t\t WaitList::[cl_event()]) ->\n\t\t\t\t{'ok', cl_event()} | {'error', cl_error()}.\n\nenqueue_fill_image(_Queue, _Image, _FillColor, _Origin, _Region, _WaitList) ->\n    ?nif_stub.\n\nenqueue_copy_image_to_buffer(_Queue, _SrcImage, _DstBuffer, _Origin, _Region,\n\t\t\t     _DstOffset, _WaitList) ->\n    ?nif_stub.\n\nenqueue_copy_buffer_to_image(_Queue, _SrcBuffer, _DstImage, _SrcOffset,\n\t\t\t     _DstOrigin, _Region, _WaitList) ->\n    ?nif_stub.\n\nenqueue_map_buffer(_Queue, _Buffer, _MapFlags, _Offset, _Size, _WaitList) ->    \n    ?nif_stub.\n\nenqueue_map_image(_Queue, _Image, _MapFlags, _Origin, _Region, _WaitList) ->\n    ?nif_stub.\n\nenqueue_unmap_mem_object(_Queue, _Mem, _WaitList) ->    \n    ?nif_stub.\n\n-spec enqueue_migrate_mem_objects(Queue::cl_queue(),\n\t\t\t\t  MemObjects::[cl_mem()],\n\t\t\t\t  Flags::[host|content_undefined],\n\t\t\t\t  WaitList::[cl_event()]) ->\n    {'ok', cl_event()} | {'error', cl_error()}.\n\nenqueue_migrate_mem_objects(_Queue, _MemObjects, _Flags, _WaitList) ->\n    ?nif_stub.\n\n%%\n%% @spec flush(Queue::cl_queue()) ->\n%%    'ok' | {'error', cl_error()}\n%%\n%% @doc Issues all previously queued OpenCL commands \n%% in a command-queue to the device associated with the command-queue.\n%%\n%% flush only guarantees that all queued commands to command_queue get\n%% issued to the appropriate device. There is no guarantee that they\n%% will be complete after clFlush returns.\n-spec flush(Queue::cl_queue()) ->\n    'ok' | {'error', cl_error()}.\n\nflush(Queue) ->\n    case async_flush(Queue) of\n\t{ok,Ref} ->\n\t    receive\n\t\t{cl_async,Ref,Reply} ->\n\t\t    Reply\n\t    end;\n\tError -> Error\n    end.\n\nasync_flush(_Queue) ->\n    ?nif_stub.\n\n%%\n%% @spec finish(Queue::cl_queue()) ->\n%%    'ok' | {'error', cl_error()}\n%%\n%% @doc Blocks until all previously queued OpenCL commands \n%% in a command-queue are issued to the associated device and have\n%% completed.\n%%\n%% finish does not return until all queued commands in command_queue\n%% have been processed and completed. clFinish is also a\n%% synchronization point.\n-spec finish(Queue::cl_queue()) ->\n    'ok' | {'error', cl_error()}.\n\nfinish(Queue) ->\n    case async_finish(Queue) of\n\t{ok,Ref} ->\n\t    receive\n\t\t{cl_async,Ref,Reply} ->\n\t\t    Reply\n\t    end;\n\tError -> Error\n    end.\n\nasync_finish(_Queue) ->\n    ?nif_stub.\n\n%%\n%% @spec retain_event(Event::cl_event()) ->\n%%    'ok' | {'error', cl_error()}\n%% @doc  Increments the event reference count. \n%% NOTE: The OpenCL commands that return an event perform an implicit retain. \nretain_event(Event) when ?is_event(Event) ->\n    ok.\n\n%%\n%% @spec release_event(Event::cl_event()) ->\n%%    'ok' | {'error', cl_error()}\n%% @doc Decrements the event reference count. \n%%\n%%  Decrements the event reference count. The event object is deleted\n%%  once the reference count becomes zero, the specific command\n%%  identified by this event has completed (or terminated) and there\n%%  are no commands in the command-queues of a context that require a\n%%  wait for this event to complete.\nrelease_event(Event) when ?is_event(Event) ->\n    ok.\n\n%% @doc Returns all possible event_info items.\nevent_info() ->\n    [\n     command_queue,\n     command_type,\n     reference_count,\n     execution_status\n    ].\n\n%% @doc Returns specific information about the event object. \nget_event_info(_Event, _Info) ->\n    ?nif_stub.\n\n\n%% @doc Returns all specific information about the event object. \nget_event_info(Event) when ?is_event(Event) ->\n    get_info_list(Event, event_info(), fun get_event_info/2).\n\n\nevent_profiling_info() ->\n    case lists:member({2,0}, cl:versions()) of\n\ttrue ->\n\t    [\n\t     command_queued,\n\t     command_submit,\n\t     command_start,\n\t     command_end,\n\t     command_complete\n\t    ];\n\tfalse ->\n\t    [\n\t     command_queued,\n\t     command_submit,\n\t     command_start,\n\t     command_end\n\t    ]\n    end.\n\nget_event_profiling_info(_Event, _Info) ->\n    ?nif_stub.\n\nget_event_profiling_info(Event) ->\nget_info_list(Event, event_profiling_info(),\n\t      fun get_event_profiling_info/2).    \n\n%% IMAGES\n%% @doc return a list of image formats [{Order,Type}]\n\nget_supported_image_formats(_Context, _Flags, _ImageType) ->\n    ?nif_stub.\n\n-spec create_image2d(Context::cl_context(), Flags::[cl_mem_flag()],\n\t\t     ImageFormat::#cl_image_format{},\n\t\t     Width::non_neg_integer(),\n\t\t     Height::non_neg_integer(),\n\t\t     Pitch::non_neg_integer(),\n\t\t     Data::binary()) ->\n    {'ok', cl_mem()} | {'error', cl_error()}.\n\ncreate_image2d(_Context, _MemFlags, _ImageFormat, _Width, _Height, _Pitch,\n\t\t_Data) ->\n    ?nif_stub.\n\n-spec create_image3d(Context::cl_context(), Flags::[cl_mem_flag()],\n\t\t     ImageFormat::#cl_image_format{},\n\t\t     Width::non_neg_integer(),\n\t\t     Height::non_neg_integer(),\n\t\t     Depth::non_neg_integer(),\n\t\t     RowPitch::non_neg_integer(),\n\t\t     SlicePitch::non_neg_integer(),\n\t\t     Data::binary()) ->\n    {'ok', cl_mem()} | {'error', cl_error()}.\n\ncreate_image3d(_Context, _MemFlags, _ImageFormat, _Width, _Height, _Depth,\n\t       _RowPicth, _SlicePitch, _Data) ->\n    ?nif_stub.\n\n-spec create_image(Conext::cl_context(), Flags::[cl_mem_flag()],\n\t\t   ImageFormat::#cl_image_format{},\n\t\t   ImageDesc::#cl_image_desc{},\n\t\t   Data::binary()) ->\n    {'ok', cl_mem()} | {'error', cl_error()}.\n\ncreate_image(_Context, _MemFlags, _ImageFormat, _ImageDesc, _Data) ->\n    ?nif_stub.\n\n-spec create_pipe(Context::cl_context(), Flags::[cl_mem_flag()],\n\t\t  PipePacketSize::non_neg_integer(),\n\t\t  PipeMaxPackets::non_neg_integer()) ->\n\t\t\t {'ok', cl_mem()} | {'error', cl_error()}.\n\ncreate_pipe(_Context, _MemFlags, _PipePacketSize, _PipeMaxPackets) ->\n    ?nif_stub.\n\n%% Wait for all events in EventList to complete\n-spec wait_for_events(EventList::[cl_event]) ->\n\t\t\t     [{'ok','completed'} |\n\t\t\t      {'ok',binary()} |\n\t\t\t      {'error',cl_error()}].\n\nwait_for_events([Event|Es]) ->\n    [wait(Event) | wait_for_events(Es)];\nwait_for_events([]) ->\n    [].\n\n%%\n%% @spec wait(Event::cl_event) -> \n%%    {'ok','completed'} | {'ok',Binary} | {'error',cl_error()}\n%%\nwait(Event) ->\n    wait(Event, infinity).\n\n%% @spec wait_for_event(Event::cl_event) -> \n%%    {'ok','completed'} | {'ok',Binary} | {'error',cl_error()}\n%% @equiv wait(Event, infinity)\n%%\n\nwait_for_event(Event) ->\n    wait(Event, infinity).\n\n%%  \n%% @spec wait(Event::cl_event, Timeout::timeout()) -> \n%%    {'ok','completed'} | {'ok',Binary} | \n%%    {'error',cl_error()} | {'error',timeout}\n%% \n%%\n%% @doc  Waits for commands identified by event objects to complete. \n%%\n%%  Waits for commands identified by event objects\n%%  in event_list to complete. A command is considered complete if its\n%%  execution status is CL_COMPLETE or a negative value.\n\n\nwait(Event, Timeout) when ?is_event(Event) ->\n    case async_wait_for_event(Event) of\n\t{ok,Ref} ->\n\t    wait1(Ref,Event,Timeout);\n\tError ->\n\t    Error\n    end.\n\nwait1(Ref, Event, Timeout) when ?is_event(Event) ->\n    receive\n\t{cl_event, Ref, Binary} when is_binary(Binary) ->\n\t    release_event(Event),\n\t    {ok,Binary};\n\t{cl_event, Ref, complete} ->\n\t    release_event(Event),\n\t    {ok,completed};\n\t{cl_event, Ref, Err} ->\n\t    release_event(Event),\n\t    Err\n    after Timeout ->\n\t    {error, timeout}\n    end.\n\n%%  \n%% @spec async_wait_for_event(Event::cl_event()) -> \n%%    {'ok',reference()} | {'error',cl_error()}\n%%\n%% @doc  Initiate an asynchronous wait operation.\n%%\n%%  Generate a wait operation that will run non blocking.\n%%  A reference is return that can be used to match the event\n%%  that is sent when the event has completed or resulted in an error.\n%%  The event returned has the form <code>{cl_event, Ref, Result}</code>\n%%  where Ref is the reference that was returned from the call and\n%%  Result may be one of binary() | 'complete' or {error,cl_error()}.\n%%\n-spec async_wait_for_event(Event::cl_event()) ->\n    {'ok',reference()} | {'error',cl_error()}.\n\nasync_wait_for_event(_Event) ->\n    ?nif_stub.\n\n%% @hidden\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n%% Utilities\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n\nget_info_list(Object, InfoList, Fun) ->\n    get_info_list(Object, InfoList, Fun, undefined, []).\n\nget_info_list(Object, [I|Is], Fun, Err, Acc) ->\n    case Fun(Object, I) of\n\t{error,Reason} ->\n\t    io:format(\"InfoError: ~s [~p]\\n\", [I,Reason]),\n\t    get_info_list(Object, Is, Fun, Reason, Acc);\n\t{ok,Value} ->\n\t    get_info_list(Object, Is, Fun, Err, [{I,Value}|Acc])\n    end;\nget_info_list(_Object,[], _Fun, undefined, []) ->\n    {ok, []};\nget_info_list(_Object,[], _Fun, Err, []) ->\n    {error, Err};\nget_info_list(_Object,[], _Fun, _Err, Acc) ->\n    {ok, reverse(Acc)}.\n"
  },
  {
    "path": "src/cl10.erl",
    "content": "%%%---- BEGIN COPYRIGHT -------------------------------------------------------\n%%%\n%%% Copyright (C) 2007 - 2012, Rogvall Invest AB, <tony@rogvall.se>\n%%%\n%%% This software is licensed as described in the file COPYRIGHT, which\n%%% you should have received as part of this distribution. The terms\n%%% are also available at http://www.rogvall.se/docs/copyright.txt.\n%%%\n%%% You may opt to use, copy, modify, merge, publish, distribute and/or sell\n%%% copies of the Software, and permit persons to whom the Software is\n%%% furnished to do so, under the terms of the COPYRIGHT file.\n%%%\n%%% This software is distributed on an \"AS IS\" basis, WITHOUT WARRANTY OF ANY\n%%% KIND, either express or implied.\n%%%\n%%%---- END COPYRIGHT ---------------------------------------------------------\n%%% @author Tony Rogvall <tony@rogvall.se>\n%%% @copyright (C) 2013, Tony Rogvall\n%%% @doc\n%%%    OpenCL 1.0 API\n%%% @end\n%%% Created : 13 Jan 2013 by Tony Rogvall <tony@rogvall.se>\n\n-module(cl10).\n\n-on_load(init/0).\n\n-export([start/0, start/1, stop/0]).\n%% Platform\n-export([get_platform_ids/0]).\n-export([platform_info/0]).\n-export([get_platform_info/1,get_platform_info/2]).\n%% Devices\n-export([get_device_ids/0, get_device_ids/2]).\n-export([device_info/0]).\n-export([get_device_info/1,get_device_info/2]).\n%% Context\n-export([create_context/1]).\n-export([create_context_from_type/1]).\n-export([release_context/1]).\n-export([retain_context/1]).\n-export([context_info/0]).\n-export([get_context_info/1,get_context_info/2]).\n%% Command queue\n-export([create_queue/3]).\n-export([set_queue_property/3]).\n-export([release_queue/1]).\n-export([retain_queue/1]).\n-export([queue_info/0]).\n-export([get_queue_info/1,get_queue_info/2]).\n%% Memory object\n-export([create_buffer/3, create_buffer/4]).\n-export([release_mem_object/1]).\n-export([retain_mem_object/1]).\n-export([mem_object_info/0]).\n-export([get_mem_object_info/1,get_mem_object_info/2]).\n-export([image_info/0]).\n-export([get_image_info/1,get_image_info/2]).\n-export([get_supported_image_formats/3]).\n-export([create_image2d/7]).\n-export([create_image3d/9]).\n\n%% Sampler \n-export([create_sampler/4]).\n-export([release_sampler/1]).\n-export([retain_sampler/1]).\n-export([sampler_info/0]).\n-export([get_sampler_info/1,get_sampler_info/2]).\n%% Program\n-export([create_program_with_source/2]).\n-export([create_program_with_binary/3]).\n-export([release_program/1]).\n-export([retain_program/1]).\n-export([build_program/3, async_build_program/3]).\n-export([unload_compiler/0]).\n-export([program_info/0]).\n-export([get_program_info/1,get_program_info/2]).\n-export([program_build_info/0]).\n-export([get_program_build_info/2,get_program_build_info/3]).\n%% Kernel\n-export([create_kernel/2]).\n-export([create_kernels_in_program/1]).\n-export([set_kernel_arg/3]).\n-export([set_kernel_arg_size/3]).\n-export([release_kernel/1]).\n-export([retain_kernel/1]).\n-export([kernel_info/0]).\n-export([get_kernel_info/1,get_kernel_info/2]).\n-export([kernel_workgroup_info/0]).\n-export([get_kernel_workgroup_info/2,get_kernel_workgroup_info/3]).\n%% Events\n-export([enqueue_task/3, enqueue_task/4]).\n-export([nowait_enqueue_task/3]).\n-export([enqueue_nd_range_kernel/5]).\n-export([enqueue_nd_range_kernel/6]).\n-export([nowait_enqueue_nd_range_kernel/5]).\n-export([enqueue_marker/1]).\n-export([enqueue_barrier/1]).\n-export([enqueue_wait_for_events/2]).\n-export([enqueue_read_buffer/5]).\n-export([enqueue_write_buffer/6]).\n-export([enqueue_write_buffer/7]).\n-export([nowait_enqueue_write_buffer/6]).\n-export([enqueue_read_image/7]).\n-export([enqueue_write_image/8]).\n-export([enqueue_write_image/9]).\n-export([nowait_enqueue_write_image/8]).\n-export([enqueue_copy_image/7]).\n-export([enqueue_copy_image_to_buffer/7]).\n-export([enqueue_copy_buffer_to_image/7]).\n-export([enqueue_map_buffer/6]).\n-export([enqueue_map_image/6]).\n-export([enqueue_unmap_mem_object/3]).\n-export([release_event/1]).\n-export([retain_event/1]).\n-export([event_info/0]).\n-export([get_event_info/1, get_event_info/2]).\n-export([wait/1, wait/2]).\n\n-export([async_flush/1, flush/1]).\n-export([async_finish/1, finish/1]).\n-export([async_wait_for_event/1, wait_for_event/1]).\n\ninit() ->\n    case lists:member({1,0}, cl:versions()) of\n\tfalse -> erlang:error(cl_1_0_not_supported);\n\ttrue -> ok\n    end.\n\nstart(Args) ->  cl:start(Args).\nstart() -> cl:start().\nstop() -> cl:stop().\nget_platform_ids() -> cl:get_platform_ids().\nplatform_info() -> cl:platform_info().\nget_platform_info(A1) -> cl:get_platform_info(A1).\nget_platform_info(A1,A2) -> cl:get_platform_info(A1,A2).\nget_device_ids() -> cl:get_device_ids().\nget_device_ids(A1,A2) -> cl:get_device_ids(A1,A2).\ndevice_info() -> cl:device_info_10([]).\nget_device_info(A1) -> cl:get_device_info(A1).\nget_device_info(A1,A2) -> cl:get_device_info(A1,A2).\ncreate_context(A1) -> cl:create_context(A1).\ncreate_context_from_type(A1) -> cl:create_context_from_type(A1).\nrelease_context(A1) -> cl:release_context(A1).\nretain_context(A1) -> cl:retain_context(A1).\ncontext_info() -> cl:context_info().\nget_context_info(A1) -> cl:get_context_info(A1).\nget_context_info(A1,A2) -> cl:get_context_info(A1,A2).\ncreate_queue(A1,A2,A3) -> cl:create_queue(A1,A2,A3).\n-spec set_queue_property(_, _, _) -> no_return().\nset_queue_property(A1,A2,A3) -> cl:set_queue_property(A1,A2,A3).\nrelease_queue(A1) -> cl:release_queue(A1).\nretain_queue(A1) -> cl:retain_queue(A1).\nqueue_info() -> cl:queue_info().\nget_queue_info(A1) -> cl:get_queue_info(A1).\nget_queue_info(A1,A2) -> cl:get_queue_info(A1,A2).\ncreate_buffer(A1,A2,A3) -> cl:create_buffer(A1,A2,A3).\ncreate_buffer(A1,A2,A3,A4) -> cl:create_buffer(A1,A2,A3,A4).\nrelease_mem_object(A1) -> cl:release_mem_object(A1).\nretain_mem_object(A1) -> cl:retain_mem_object(A1).\nmem_object_info() -> cl:mem_object_info().\nget_mem_object_info(A1) -> cl:get_mem_object_info(A1).\nget_mem_object_info(A1,A2) -> cl:get_mem_object_info(A1,A2).\nimage_info() -> cl:image_info().\nget_image_info(A1) -> cl:get_image_info(A1).\nget_image_info(A1,A2) -> cl:get_image_info(A1,A2).\nget_supported_image_formats(A1,A2,A3) -> \n    cl:get_supported_image_formats(A1,A2,A3).\ncreate_image2d(A1,A2,A3,A4,A5,A6,A7) -> \n    cl:create_image2d(A1,A2,A3,A4,A5,A6,A7).\ncreate_image3d(A1,A2,A3,A4,A5,A6,A7,A8,A9) -> \n    cl:create_image3d(A1,A2,A3,A4,A5,A6,A7,A8,A9).\ncreate_sampler(A1,A2,A3,A4) -> cl:create_sampler(A1,A2,A3,A4).\nrelease_sampler(A1) -> cl:release_sampler(A1).\nretain_sampler(A1) -> cl:retain_sampler(A1).\nsampler_info() -> cl:sampler_info().\nget_sampler_info(A1) -> cl:get_sampler_info(A1).\nget_sampler_info(A1,A2) -> cl:get_sampler_info(A1,A2).\ncreate_program_with_source(A1,A2) -> cl:create_program_with_source(A1,A2).\ncreate_program_with_binary(A1,A2,A3) -> cl:create_program_with_binary(A1,A2,A3).\nrelease_program(A1) -> cl:release_program(A1).\nretain_program(A1) -> cl:retain_program(A1).\nbuild_program(A1,A2,A3) -> cl:build_program(A1,A2,A3).\nasync_build_program(A1,A2,A3) -> cl:async_build_program(A1,A2,A3).\nunload_compiler() -> cl:unload_compiler().\nprogram_info() -> cl:program_info().\nget_program_info(A1) -> cl:get_program_info(A1).\nget_program_info(A1,A2) -> cl:get_program_info(A1,A2).\nprogram_build_info() -> cl:program_build_info().\nget_program_build_info(A1,A2) -> cl:get_program_build_info(A1,A2).\nget_program_build_info(A1,A2,A3) -> cl:get_program_build_info(A1,A2,A3).\ncreate_kernel(A1,A2) -> cl:create_kernel(A1,A2).\ncreate_kernels_in_program(A1) -> cl:create_kernels_in_program(A1).\nset_kernel_arg(A1,A2,A3) -> cl:set_kernel_arg(A1,A2,A3).\nset_kernel_arg_size(A1,A2,A3) -> cl:set_kernel_arg_size(A1,A2,A3).\nrelease_kernel(A1) -> cl:release_kernel(A1).\nretain_kernel(A1) -> cl:retain_kernel(A1).\nkernel_info() -> cl:kernel_info().\nget_kernel_info(A1) -> cl:get_kernel_info(A1).\nget_kernel_info(A1,A2) -> cl:get_kernel_info(A1,A2).\nkernel_workgroup_info() -> cl:kernel_workgroup_info().\nget_kernel_workgroup_info(A1,A2) -> cl:get_kernel_workgroup_info(A1,A2).\nget_kernel_workgroup_info(A1,A2,A3) -> cl:get_kernel_workgroup_info(A1,A2,A3).\nenqueue_task(A1,A2,A3) -> cl:enqueue_task(A1,A2,A3). \nenqueue_task(A1,A2,A3,A4) -> cl:enqueue_task(A1,A2,A3,A4).\nnowait_enqueue_task(A1,A2,A3) -> cl:nowait_enqueue_task(A1,A2,A3).\nenqueue_nd_range_kernel(A1,A2,A3,A4,A5) -> \n    cl:enqueue_nd_range_kernel(A1,A2,A3,A4,A5).\nenqueue_nd_range_kernel(A1,A2,A3,A4,A5,A6) -> \n    cl:enqueue_nd_range_kernel(A1,A2,A3,A4,A5,A6).\nnowait_enqueue_nd_range_kernel(A1,A2,A3,A4,A5) -> \n    cl:nowait_enqueue_nd_range_kernel(A1,A2,A3,A4,A5).\nenqueue_marker(A1) -> cl:enqueue_marker(A1).\nenqueue_barrier(A1) -> cl:enqueue_barrier(A1).\nenqueue_wait_for_events(A1,A2) -> \n    cl:enqueue_wait_for_events(A1,A2).\nenqueue_read_buffer(A1,A2,A3,A4,A5) ->\n    cl:enqueue_read_buffer(A1,A2,A3,A4,A5).\nenqueue_write_buffer(A1,A2,A3,A4,A5,A6) -> \n    cl:enqueue_write_buffer(A1,A2,A3,A4,A5,A6).\nenqueue_write_buffer(A1,A2,A3,A4,A5,A6,A7) ->\n    cl:enqueue_write_buffer(A1,A2,A3,A4,A5,A6,A7).\nnowait_enqueue_write_buffer(A1,A2,A3,A4,A5,A6) -> \n    cl:nowait_enqueue_write_buffer(A1,A2,A3,A4,A5,A6).\nenqueue_read_image(A1,A2,A3,A4,A5,A6,A7) -> \n    cl:enqueue_read_image(A1,A2,A3,A4,A5,A6,A7).\nenqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8) -> \n    cl:enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8).\nenqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8,A9) ->\n    cl:enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8,A9).\nnowait_enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8) -> \n    cl:nowait_enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8).\nenqueue_copy_image(A1,A2,A3,A4,A5,A6,A7) ->\n    cl:enqueue_copy_image(A1,A2,A3,A4,A5,A6,A7).\nenqueue_copy_image_to_buffer(A1,A2,A3,A4,A5,A6,A7) ->\n    cl:enqueue_copy_image_to_buffer(A1,A2,A3,A4,A5,A6,A7).\nenqueue_copy_buffer_to_image(A1,A2,A3,A4,A5,A6,A7) ->\n    cl:enqueue_copy_buffer_to_image(A1,A2,A3,A4,A5,A6,A7).\nenqueue_map_buffer(A1,A2,A3,A4,A5,A6) -> \n    cl:enqueue_map_buffer(A1,A2,A3,A4,A5,A6).\nenqueue_map_image(A1,A2,A3,A4,A5,A6) -> \n    cl:enqueue_map_image(A1,A2,A3,A4,A5,A6).\nenqueue_unmap_mem_object(A1,A2,A3) -> \n    cl:enqueue_unmap_mem_object(A1,A2,A3).\nrelease_event(A1) -> cl:release_event(A1).\nretain_event(A1) -> cl:retain_event(A1).\nevent_info() -> cl:event_info().\nget_event_info(A1) -> cl:get_event_info(A1).\nget_event_info(A1,A2) -> cl:get_event_info(A1,A2).\nwait(A1) -> cl:wait(A1). \nwait(A1,A2) -> cl:wait(A1,A2).\nasync_flush(A1) -> cl:async_flush(A1). \nflush(A1) -> cl:flush(A1).\nasync_finish(A1) -> cl:async_finish(A1).\nfinish(A1) -> cl:finish(A1).\nasync_wait_for_event(A1) -> cl:async_wait_for_event(A1).\nwait_for_event(A1) -> cl:wait_for_event(A1).\n"
  },
  {
    "path": "src/cl11.erl",
    "content": "%%%---- BEGIN COPYRIGHT -------------------------------------------------------\n%%%\n%%% Copyright (C) 2007 - 2012, Rogvall Invest AB, <tony@rogvall.se>\n%%%\n%%% This software is licensed as described in the file COPYRIGHT, which\n%%% you should have received as part of this distribution. The terms\n%%% are also available at http://www.rogvall.se/docs/copyright.txt.\n%%%\n%%% You may opt to use, copy, modify, merge, publish, distribute and/or sell\n%%% copies of the Software, and permit persons to whom the Software is\n%%% furnished to do so, under the terms of the COPYRIGHT file.\n%%%\n%%% This software is distributed on an \"AS IS\" basis, WITHOUT WARRANTY OF ANY\n%%% KIND, either express or implied.\n%%%\n%%%---- END COPYRIGHT ---------------------------------------------------------\n%%% @author Tony Rogvall <tony@rogvall.se>\n%%% @copyright (C) 2013, Tony Rogvall\n%%% @doc\n%%%    OpenCL 1.1 API\n%%% @end\n%%% Created : 13 Jan 2013 by Tony Rogvall <tony@rogvall.se>\n\n-module(cl11).\n\n-on_load(init/0).\n\n-export([start/0, start/1, stop/0]).\n%% Platform\n-export([get_platform_ids/0]).\n-export([platform_info/0]).\n-export([get_platform_info/1,get_platform_info/2]).\n%% Devices\n-export([get_device_ids/0, get_device_ids/2]).\n-export([device_info/0]).\n-export([get_device_info/1,get_device_info/2]).\n%% Context\n-export([create_context/1]).\n-export([create_context_from_type/1]).\n-export([release_context/1]).\n-export([retain_context/1]).\n-export([context_info/0]).\n-export([get_context_info/1,get_context_info/2]).\n%% Command queue\n-export([create_queue/3]).\n-export([set_queue_property/3]).\n-export([release_queue/1]).\n-export([retain_queue/1]).\n-export([queue_info/0]).\n-export([get_queue_info/1,get_queue_info/2]).\n%% Memory object\n-export([create_buffer/3, create_buffer/4]).\n-export([release_mem_object/1]).\n-export([retain_mem_object/1]).\n-export([mem_object_info/0]).\n-export([get_mem_object_info/1,get_mem_object_info/2]).\n-export([image_info/0]).\n-export([get_image_info/1,get_image_info/2]).\n-export([get_supported_image_formats/3]).\n-export([create_image2d/7]).\n-export([create_image3d/9]).\n\n%% Sampler \n-export([create_sampler/4]).\n-export([release_sampler/1]).\n-export([retain_sampler/1]).\n-export([sampler_info/0]).\n-export([get_sampler_info/1,get_sampler_info/2]).\n%% Program\n-export([create_program_with_source/2]).\n-export([create_program_with_binary/3]).\n-export([release_program/1]).\n-export([retain_program/1]).\n-export([build_program/3, async_build_program/3]).\n-export([unload_compiler/0]).\n-export([program_info/0]).\n-export([get_program_info/1,get_program_info/2]).\n-export([program_build_info/0]).\n-export([get_program_build_info/2,get_program_build_info/3]).\n%% Kernel\n-export([create_kernel/2]).\n-export([create_kernels_in_program/1]).\n-export([set_kernel_arg/3]).\n-export([set_kernel_arg_size/3]).\n-export([release_kernel/1]).\n-export([retain_kernel/1]).\n-export([kernel_info/0]).\n-export([get_kernel_info/1,get_kernel_info/2]).\n-export([kernel_workgroup_info/0]).\n-export([get_kernel_workgroup_info/2,get_kernel_workgroup_info/3]).\n%% Events\n-export([enqueue_task/3, enqueue_task/4]).\n-export([nowait_enqueue_task/3]).\n-export([enqueue_nd_range_kernel/5]).\n-export([enqueue_nd_range_kernel/6]).\n-export([nowait_enqueue_nd_range_kernel/5]).\n-export([enqueue_marker/1]).\n-export([enqueue_barrier/1]).\n-export([enqueue_wait_for_events/2]).\n-export([enqueue_read_buffer/5]).\n-export([enqueue_write_buffer/6]).\n-export([enqueue_write_buffer/7]).\n-export([nowait_enqueue_write_buffer/6]).\n-export([enqueue_read_image/7]).\n-export([enqueue_write_image/8]).\n-export([enqueue_write_image/9]).\n-export([nowait_enqueue_write_image/8]).\n-export([enqueue_copy_image/7]).\n-export([enqueue_copy_image_to_buffer/7]).\n-export([enqueue_copy_buffer_to_image/7]).\n-export([enqueue_map_buffer/6]).\n-export([enqueue_map_image/6]).\n-export([enqueue_unmap_mem_object/3]).\n-export([release_event/1]).\n-export([retain_event/1]).\n-export([event_info/0]).\n-export([get_event_info/1, get_event_info/2]).\n-export([wait/1, wait/2]).\n\n-export([async_flush/1, flush/1]).\n-export([async_finish/1, finish/1]).\n-export([async_wait_for_event/1, wait_for_event/1]).\n\ninit() ->\n    case lists:member({1,1}, cl:versions()) of\n\tfalse -> erlang:error(cl_1_1_not_supported);\n\ttrue -> ok\n    end.\n\nstart(Args) ->  cl:start(Args).\nstart() -> cl:start().\nstop() -> cl:stop().\nget_platform_ids() -> cl:get_platform_ids().\nplatform_info() -> cl:platform_info().\nget_platform_info(A1) -> cl:get_platform_info(A1).\nget_platform_info(A1,A2) -> cl:get_platform_info(A1,A2).\nget_device_ids() -> cl:get_device_ids().\nget_device_ids(A1,A2) -> cl:get_device_ids(A1,A2).\ndevice_info() -> cl:device_info_10(cl:device_info_11([])).\nget_device_info(A1) -> cl:get_device_info(A1).\nget_device_info(A1,A2) -> cl:get_device_info(A1,A2).\ncreate_context(A1) -> cl:create_context(A1).\ncreate_context_from_type(A1) -> cl:create_context_from_type(A1).\nrelease_context(A1) -> cl:release_context(A1).\nretain_context(A1) -> cl:retain_context(A1).\ncontext_info() -> cl:context_info().\nget_context_info(A1) -> cl:get_context_info(A1).\nget_context_info(A1,A2) -> cl:get_context_info(A1,A2).\ncreate_queue(A1,A2,A3) -> cl:create_queue(A1,A2,A3).\n-spec set_queue_property(_, _, _) -> no_return().\nset_queue_property(A1,A2,A3) -> cl:set_queue_property(A1,A2,A3).\nrelease_queue(A1) -> cl:release_queue(A1).\nretain_queue(A1) -> cl:retain_queue(A1).\nqueue_info() -> cl:queue_info().\nget_queue_info(A1) -> cl:get_queue_info(A1).\nget_queue_info(A1,A2) -> cl:get_queue_info(A1,A2).\ncreate_buffer(A1,A2,A3) -> cl:create_buffer(A1,A2,A3).\ncreate_buffer(A1,A2,A3,A4) -> cl:create_buffer(A1,A2,A3,A4).\nrelease_mem_object(A1) -> cl:release_mem_object(A1).\nretain_mem_object(A1) -> cl:retain_mem_object(A1).\nmem_object_info() -> cl:mem_object_info().\nget_mem_object_info(A1) -> cl:get_mem_object_info(A1).\nget_mem_object_info(A1,A2) -> cl:get_mem_object_info(A1,A2).\nimage_info() -> cl:image_info().\nget_image_info(A1) -> cl:get_image_info(A1).\nget_image_info(A1,A2) -> cl:get_image_info(A1,A2).\nget_supported_image_formats(A1,A2,A3) -> \n    cl:get_supported_image_formats(A1,A2,A3).\ncreate_image2d(A1,A2,A3,A4,A5,A6,A7) -> \n    cl:create_image2d(A1,A2,A3,A4,A5,A6,A7).\ncreate_image3d(A1,A2,A3,A4,A5,A6,A7,A8,A9) -> \n    cl:create_image3d(A1,A2,A3,A4,A5,A6,A7,A8,A9).\ncreate_sampler(A1,A2,A3,A4) -> cl:create_sampler(A1,A2,A3,A4).\nrelease_sampler(A1) -> cl:release_sampler(A1).\nretain_sampler(A1) -> cl:retain_sampler(A1).\nsampler_info() -> cl:sampler_info().\nget_sampler_info(A1) -> cl:get_sampler_info(A1).\nget_sampler_info(A1,A2) -> cl:get_sampler_info(A1,A2).\ncreate_program_with_source(A1,A2) -> cl:create_program_with_source(A1,A2).\ncreate_program_with_binary(A1,A2,A3) -> cl:create_program_with_binary(A1,A2,A3).\nrelease_program(A1) -> cl:release_program(A1).\nretain_program(A1) -> cl:retain_program(A1).\nbuild_program(A1,A2,A3) -> cl:build_program(A1,A2,A3).\nasync_build_program(A1,A2,A3) -> cl:async_build_program(A1,A2,A3).\nunload_compiler() -> cl:unload_compiler().\nprogram_info() -> cl:program_info().\nget_program_info(A1) -> cl:get_program_info(A1).\nget_program_info(A1,A2) -> cl:get_program_info(A1,A2).\nprogram_build_info() -> cl:program_build_info().\nget_program_build_info(A1,A2) -> cl:get_program_build_info(A1,A2).\nget_program_build_info(A1,A2,A3) -> cl:get_program_build_info(A1,A2,A3).\ncreate_kernel(A1,A2) -> cl:create_kernel(A1,A2).\ncreate_kernels_in_program(A1) -> cl:create_kernels_in_program(A1).\nset_kernel_arg(A1,A2,A3) -> cl:set_kernel_arg(A1,A2,A3).\nset_kernel_arg_size(A1,A2,A3) -> cl:set_kernel_arg_size(A1,A2,A3).\nrelease_kernel(A1) -> cl:release_kernel(A1).\nretain_kernel(A1) -> cl:retain_kernel(A1).\nkernel_info() -> cl:kernel_info().\nget_kernel_info(A1) -> cl:get_kernel_info(A1).\nget_kernel_info(A1,A2) -> cl:get_kernel_info(A1,A2).\nkernel_workgroup_info() -> cl:kernel_workgroup_info().\nget_kernel_workgroup_info(A1,A2) -> cl:get_kernel_workgroup_info(A1,A2).\nget_kernel_workgroup_info(A1,A2,A3) -> cl:get_kernel_workgroup_info(A1,A2,A3).\nenqueue_task(A1,A2,A3) -> cl:enqueue_task(A1,A2,A3). \nenqueue_task(A1,A2,A3,A4) -> cl:enqueue_task(A1,A2,A3,A4).\nnowait_enqueue_task(A1,A2,A3) -> cl:nowait_enqueue_task(A1,A2,A3).\nenqueue_nd_range_kernel(A1,A2,A3,A4,A5) -> \n    cl:enqueue_nd_range_kernel(A1,A2,A3,A4,A5).\nenqueue_nd_range_kernel(A1,A2,A3,A4,A5,A6) -> \n    cl:enqueue_nd_range_kernel(A1,A2,A3,A4,A5,A6).\nnowait_enqueue_nd_range_kernel(A1,A2,A3,A4,A5) -> \n    cl:nowait_enqueue_nd_range_kernel(A1,A2,A3,A4,A5).\nenqueue_marker(A1) -> cl:enqueue_marker(A1).\nenqueue_barrier(A1) -> cl:enqueue_barrier(A1).\nenqueue_wait_for_events(A1,A2) -> \n    cl:enqueue_wait_for_events(A1,A2).\nenqueue_read_buffer(A1,A2,A3,A4,A5) ->\n    cl:enqueue_read_buffer(A1,A2,A3,A4,A5).\nenqueue_write_buffer(A1,A2,A3,A4,A5,A6) -> \n    cl:enqueue_write_buffer(A1,A2,A3,A4,A5,A6).\nenqueue_write_buffer(A1,A2,A3,A4,A5,A6,A7) ->\n    cl:enqueue_write_buffer(A1,A2,A3,A4,A5,A6,A7).\nnowait_enqueue_write_buffer(A1,A2,A3,A4,A5,A6) -> \n    cl:nowait_enqueue_write_buffer(A1,A2,A3,A4,A5,A6).\nenqueue_read_image(A1,A2,A3,A4,A5,A6,A7) -> \n    cl:enqueue_read_image(A1,A2,A3,A4,A5,A6,A7).\nenqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8) -> \n    cl:enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8).\nenqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8,A9) ->\n    cl:enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8,A9).\nnowait_enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8) -> \n    cl:nowait_enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8).\nenqueue_copy_image(A1,A2,A3,A4,A5,A6,A7) ->\n    cl:enqueue_copy_image(A1,A2,A3,A4,A5,A6,A7).\nenqueue_copy_image_to_buffer(A1,A2,A3,A4,A5,A6,A7) ->\n    cl:enqueue_copy_image_to_buffer(A1,A2,A3,A4,A5,A6,A7).\nenqueue_copy_buffer_to_image(A1,A2,A3,A4,A5,A6,A7) ->\n    cl:enqueue_copy_buffer_to_image(A1,A2,A3,A4,A5,A6,A7).\nenqueue_map_buffer(A1,A2,A3,A4,A5,A6) -> \n    cl:enqueue_map_buffer(A1,A2,A3,A4,A5,A6).\nenqueue_map_image(A1,A2,A3,A4,A5,A6) -> \n    cl:enqueue_map_image(A1,A2,A3,A4,A5,A6).\nenqueue_unmap_mem_object(A1,A2,A3) -> \n    cl:enqueue_unmap_mem_object(A1,A2,A3).\nrelease_event(A1) -> cl:release_event(A1).\nretain_event(A1) -> cl:retain_event(A1).\nevent_info() -> cl:event_info().\nget_event_info(A1) -> cl:get_event_info(A1).\nget_event_info(A1,A2) -> cl:get_event_info(A1,A2).\nwait(A1) -> cl:wait(A1). \nwait(A1,A2) -> cl:wait(A1,A2).\nasync_flush(A1) -> cl:async_flush(A1). \nflush(A1) -> cl:flush(A1).\nasync_finish(A1) -> cl:async_finish(A1).\nfinish(A1) -> cl:finish(A1).\nasync_wait_for_event(A1) -> cl:async_wait_for_event(A1).\nwait_for_event(A1) -> cl:wait_for_event(A1).\n"
  },
  {
    "path": "src/cl12.erl",
    "content": "%%%---- BEGIN COPYRIGHT -------------------------------------------------------\n%%%\n%%% Copyright (C) 2007 - 2012, Rogvall Invest AB, <tony@rogvall.se>\n%%%\n%%% This software is licensed as described in the file COPYRIGHT, which\n%%% you should have received as part of this distribution. The terms\n%%% are also available at http://www.rogvall.se/docs/copyright.txt.\n%%%\n%%% You may opt to use, copy, modify, merge, publish, distribute and/or sell\n%%% copies of the Software, and permit persons to whom the Software is\n%%% furnished to do so, under the terms of the COPYRIGHT file.\n%%%\n%%% This software is distributed on an \"AS IS\" basis, WITHOUT WARRANTY OF ANY\n%%% KIND, either express or implied.\n%%%\n%%%---- END COPYRIGHT ---------------------------------------------------------\n%%% @author Tony Rogvall <tony@rogvall.se>\n%%% @copyright (C) 2013, Tony Rogvall\n%%% @doc\n%%%    OpenCL 1.2 API\n%%% @end\n%%% Created : 13 Jan 2013 by Tony Rogvall <tony@rogvall.se>\n\n-module(cl12).\n\n-on_load(init/0).\n\n-export([start/0, start/1, stop/0]).\n-export([get_platform_ids/0]).\n-export([platform_info/0]).\n-export([get_platform_info/1,get_platform_info/2]).\n-export([get_device_ids/0, get_device_ids/2]).\n-export([device_info/0]).\n-export([get_device_info/1,get_device_info/2]).\n-export([create_context/1]).\n-export([create_context_from_type/1]).\n-export([release_context/1]).\n-export([retain_context/1]).\n-export([context_info/0]).\n-export([get_context_info/1,get_context_info/2]).\n-export([create_queue/3]).\n-export([set_queue_property/3]).\n-export([release_queue/1]).\n-export([retain_queue/1]).\n-export([queue_info/0]).\n-export([get_queue_info/1,get_queue_info/2]).\n-export([create_buffer/3, create_buffer/4]).\n-export([release_mem_object/1]).\n-export([retain_mem_object/1]).\n-export([mem_object_info/0]).\n-export([get_mem_object_info/1,get_mem_object_info/2]).\n-export([image_info/0]).\n-export([get_image_info/1,get_image_info/2]).\n-export([get_supported_image_formats/3]).\n-export([create_image/5]).\n-export([create_sampler/4]).\n-export([release_sampler/1]).\n-export([retain_sampler/1]).\n-export([sampler_info/0]).\n-export([get_sampler_info/1,get_sampler_info/2]).\n-export([create_program_with_source/2]).\n-export([create_program_with_binary/3]).\n-export([release_program/1]).\n-export([retain_program/1]).\n-export([build_program/3, async_build_program/3]).\n-export([unload_platform_compiler/1]).\n-export([program_info/0]).\n-export([get_program_info/1,get_program_info/2]).\n-export([program_build_info/0]).\n-export([get_program_build_info/2,get_program_build_info/3]).\n-export([create_kernel/2]).\n-export([create_kernels_in_program/1]).\n-export([set_kernel_arg/3]).\n-export([set_kernel_arg_size/3]).\n-export([release_kernel/1]).\n-export([retain_kernel/1]).\n-export([kernel_info/0]).\n-export([get_kernel_info/1,get_kernel_info/2]).\n-export([kernel_workgroup_info/0]).\n-export([get_kernel_workgroup_info/2,get_kernel_workgroup_info/3]).\n-export([enqueue_task/3, enqueue_task/4]).\n-export([nowait_enqueue_task/3]).\n-export([enqueue_nd_range_kernel/5]).\n-export([enqueue_nd_range_kernel/6]).\n-export([nowait_enqueue_nd_range_kernel/5]).\n-export([enqueue_marker_with_wait_list/2]).\n-export([enqueue_barrier_with_wait_list/2]).\n-export([enqueue_wait_for_events/2]).\n-export([enqueue_read_buffer/5]).\n-export([enqueue_write_buffer/6]).\n-export([enqueue_write_buffer/7]).\n-export([nowait_enqueue_write_buffer/6]).\n-export([enqueue_read_image/7]).\n-export([enqueue_write_image/8]).\n-export([enqueue_write_image/9]).\n-export([nowait_enqueue_write_image/8]).\n-export([enqueue_copy_image/7]).\n-export([enqueue_copy_image_to_buffer/7]).\n-export([enqueue_copy_buffer_to_image/7]).\n-export([enqueue_map_buffer/6]).\n-export([enqueue_map_image/6]).\n-export([enqueue_unmap_mem_object/3]).\n-export([release_event/1]).\n-export([retain_event/1]).\n-export([event_info/0]).\n-export([get_event_info/1, get_event_info/2]).\n-export([wait/1, wait/2]).\n-export([async_flush/1, flush/1]).\n-export([async_finish/1, finish/1]).\n-export([async_wait_for_event/1, wait_for_event/1]).\n\ninit() ->\n    case lists:member({1,2}, cl:versions()) of\n\tfalse -> erlang:error(cl_1_2_not_supported);\n\ttrue -> ok\n    end.\n\nstart(Args) ->  cl:start(Args).\nstart() -> cl:start().\nstop() -> cl:stop().\nget_platform_ids() -> cl:get_platform_ids().\nplatform_info() -> cl:platform_info().\nget_platform_info(A1) -> cl:get_platform_info(A1).\nget_platform_info(A1,A2) -> cl:get_platform_info(A1,A2).\nget_device_ids() -> cl:get_device_ids().\nget_device_ids(A1,A2) -> cl:get_device_ids(A1,A2).\ndevice_info() ->\n    cl:device_info_10(cl:device_info_11(cl:device_info_12([]))).\nget_device_info(A1) -> cl:get_device_info(A1).\nget_device_info(A1,A2) -> cl:get_device_info(A1,A2).\ncreate_context(A1) -> cl:create_context(A1).\ncreate_context_from_type(A1) -> cl:create_context_from_type(A1).\nrelease_context(A1) -> cl:release_context(A1).\nretain_context(A1) -> cl:retain_context(A1).\ncontext_info() -> cl:context_info().\nget_context_info(A1) -> cl:get_context_info(A1).\nget_context_info(A1,A2) -> cl:get_context_info(A1,A2).\ncreate_queue(A1,A2,A3) -> cl:create_queue(A1,A2,A3).\n-spec set_queue_property(_, _, _) -> no_return().\nset_queue_property(A1,A2,A3) -> cl:set_queue_property(A1,A2,A3).\nrelease_queue(A1) -> cl:release_queue(A1).\nretain_queue(A1) -> cl:retain_queue(A1).\nqueue_info() -> cl:queue_info().\nget_queue_info(A1) -> cl:get_queue_info(A1).\nget_queue_info(A1,A2) -> cl:get_queue_info(A1,A2).\ncreate_buffer(A1,A2,A3) -> cl:create_buffer(A1,A2,A3).\ncreate_buffer(A1,A2,A3,A4) -> cl:create_buffer(A1,A2,A3,A4).\nrelease_mem_object(A1) -> cl:release_mem_object(A1).\nretain_mem_object(A1) -> cl:retain_mem_object(A1).\nmem_object_info() -> cl:mem_object_info().\nget_mem_object_info(A1) -> cl:get_mem_object_info(A1).\nget_mem_object_info(A1,A2) -> cl:get_mem_object_info(A1,A2).\nimage_info() -> cl:image_info().\nget_image_info(A1) -> cl:get_image_info(A1).\nget_image_info(A1,A2) -> cl:get_image_info(A1,A2).\nget_supported_image_formats(A1,A2,A3) -> cl:get_supported_image_formats(A1,A2,A3).\ncreate_image(A1,A2,A3,A4,A5) -> cl:create_image(A1,A2,A3,A4,A5).\ncreate_sampler(A1,A2,A3,A4) -> cl:create_sampler(A1,A2,A3,A4).\nrelease_sampler(A1) -> cl:release_sampler(A1).\nretain_sampler(A1) -> cl:retain_sampler(A1).\nsampler_info() -> cl:sampler_info().\nget_sampler_info(A1) -> cl:get_sampler_info(A1).\nget_sampler_info(A1,A2) -> cl:get_sampler_info(A1,A2).\ncreate_program_with_source(A1,A2) -> cl:create_program_with_source(A1,A2).\ncreate_program_with_binary(A1,A2,A3) -> cl:create_program_with_binary(A1,A2,A3).\nrelease_program(A1) -> cl:release_program(A1).\nretain_program(A1) -> cl:retain_program(A1).\nbuild_program(A1,A2,A3) -> cl:build_program(A1,A2,A3).\nasync_build_program(A1,A2,A3) -> cl:async_build_program(A1,A2,A3).\nunload_platform_compiler(A1) -> cl:unload_platform_compiler(A1).\nprogram_info() -> cl:program_info().\nget_program_info(A1) -> cl:get_program_info(A1).\nget_program_info(A1,A2) -> cl:get_program_info(A1,A2).\nprogram_build_info() -> cl:program_build_info().\nget_program_build_info(A1,A2) -> cl:get_program_build_info(A1,A2).\nget_program_build_info(A1,A2,A3) -> cl:get_program_build_info(A1,A2,A3).\ncreate_kernel(A1,A2) -> cl:create_kernel(A1,A2).\ncreate_kernels_in_program(A1) -> cl:create_kernels_in_program(A1).\nset_kernel_arg(A1,A2,A3) -> cl:set_kernel_arg(A1,A2,A3).\nset_kernel_arg_size(A1,A2,A3) -> cl:set_kernel_arg_size(A1,A2,A3).\nrelease_kernel(A1) -> cl:release_kernel(A1).\nretain_kernel(A1) -> cl:retain_kernel(A1).\nkernel_info() -> cl:kernel_info().\nget_kernel_info(A1) -> cl:get_kernel_info(A1).\nget_kernel_info(A1,A2) -> cl:get_kernel_info(A1,A2).\nkernel_workgroup_info() -> cl:kernel_workgroup_info().\nget_kernel_workgroup_info(A1,A2) -> cl:get_kernel_workgroup_info(A1,A2).\nget_kernel_workgroup_info(A1,A2,A3) -> cl:get_kernel_workgroup_info(A1,A2,A3).\nenqueue_task(A1,A2,A3) -> cl:enqueue_task(A1,A2,A3). \nenqueue_task(A1,A2,A3,A4) -> cl:enqueue_task(A1,A2,A3,A4).\nnowait_enqueue_task(A1,A2,A3) -> cl:nowait_enqueue_task(A1,A2,A3).\nenqueue_nd_range_kernel(A1,A2,A3,A4,A5) -> \n    cl:enqueue_nd_range_kernel(A1,A2,A3,A4,A5).\nenqueue_nd_range_kernel(A1,A2,A3,A4,A5,A6) ->\n    cl:enqueue_nd_range_kernel(A1,A2,A3,A4,A5,A6).\nnowait_enqueue_nd_range_kernel(A1,A2,A3,A4,A5) ->\n    cl:nowait_enqueue_nd_range_kernel(A1,A2,A3,A4,A5).\nenqueue_marker_with_wait_list(A1,A2) -> \n    cl:enqueue_marker_with_wait_list(A1,A2).\nenqueue_barrier_with_wait_list(A1,A2) ->\n    cl:enqueue_barrier_with_wait_list(A1,A2).\nenqueue_wait_for_events(A1,A2) -> \n    cl:enqueue_wait_for_events(A1,A2).\nenqueue_read_buffer(A1,A2,A3,A4,A5) ->\n    cl:enqueue_read_buffer(A1,A2,A3,A4,A5).\nenqueue_write_buffer(A1,A2,A3,A4,A5,A6) ->\n    cl:enqueue_write_buffer(A1,A2,A3,A4,A5,A6).\nenqueue_write_buffer(A1,A2,A3,A4,A5,A6,A7) ->\n    cl:enqueue_write_buffer(A1,A2,A3,A4,A5,A6,A7).\nnowait_enqueue_write_buffer(A1,A2,A3,A4,A5,A6) ->\n    cl:nowait_enqueue_write_buffer(A1,A2,A3,A4,A5,A6).\nenqueue_read_image(A1,A2,A3,A4,A5,A6,A7) ->\n    cl:enqueue_read_image(A1,A2,A3,A4,A5,A6,A7).\nenqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8) ->\n    cl:enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8).\nenqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8,A9) ->\n    cl:enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8,A9).\nnowait_enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8) ->\n    cl:nowait_enqueue_write_image(A1,A2,A3,A4,A5,A6,A7,A8).\nenqueue_copy_image(A1,A2,A3,A4,A5,A6,A7) ->\n    cl:enqueue_copy_image(A1,A2,A3,A4,A5,A6,A7).\nenqueue_copy_image_to_buffer(A1,A2,A3,A4,A5,A6,A7) ->\n    cl:enqueue_copy_image_to_buffer(A1,A2,A3,A4,A5,A6,A7).\nenqueue_copy_buffer_to_image(A1,A2,A3,A4,A5,A6,A7) ->\n    cl:enqueue_copy_buffer_to_image(A1,A2,A3,A4,A5,A6,A7).\nenqueue_map_buffer(A1,A2,A3,A4,A5,A6) ->\n    cl:enqueue_map_buffer(A1,A2,A3,A4,A5,A6).\nenqueue_map_image(A1,A2,A3,A4,A5,A6) ->\n    cl:enqueue_map_image(A1,A2,A3,A4,A5,A6).\nenqueue_unmap_mem_object(A1,A2,A3) ->\n    cl:enqueue_unmap_mem_object(A1,A2,A3).\nrelease_event(A1) -> cl:release_event(A1).\nretain_event(A1) -> cl:retain_event(A1).\nevent_info() -> cl:event_info().\nget_event_info(A1) -> cl:get_event_info(A1).\nget_event_info(A1,A2) -> cl:get_event_info(A1,A2).\nwait(A1) -> cl:wait(A1). \nwait(A1,A2) -> cl:wait(A1,A2).\nasync_flush(A1) -> cl:async_flush(A1). \nflush(A1) -> cl:flush(A1).\nasync_finish(A1) -> cl:async_finish(A1).\nfinish(A1) -> cl:finish(A1).\nasync_wait_for_event(A1) -> cl:async_wait_for_event(A1).\nwait_for_event(A1) -> cl:wait_for_event(A1).\n"
  },
  {
    "path": "src/clu.erl",
    "content": "%%%---- BEGIN COPYRIGHT -------------------------------------------------------\n%%%\n%%% Copyright (C) 2007 - 2012, Rogvall Invest AB, <tony@rogvall.se>\n%%%\n%%% This software is licensed as described in the file COPYRIGHT, which\n%%% you should have received as part of this distribution. The terms\n%%% are also available at http://www.rogvall.se/docs/copyright.txt.\n%%%\n%%% You may opt to use, copy, modify, merge, publish, distribute and/or sell\n%%% copies of the Software, and permit persons to whom the Software is\n%%% furnished to do so, under the terms of the COPYRIGHT file.\n%%%\n%%% This software is distributed on an \"AS IS\" basis, WITHOUT WARRANTY OF ANY\n%%% KIND, either express or implied.\n%%%\n%%%---- END COPYRIGHT ---------------------------------------------------------\n%%% File    : clu.erl\n%%% Author  : Tony Rogvall <tony@rogvall.se>\n%%% Description : Utilities\n%%% Created : 30 Oct 2009 by Tony Rogvall <tony@rogvall.se>\n\n-module(clu).\n\n-export([setup/0, setup/1, teardown/1]).\n-export([context/1, device_list/1, device/1]).\n-export([build_source/2,build_source/3]).\n-export([build_binary/2,build_binary/3]).\n-export([build_source_file/2, build_source_file/3]).\n-export([compile_file/1,compile_file/2]).\n-export([get_program_binaries/1]).\n-export([apply_kernel_args/2]).\n-export([wait_complete/1]).\n-export([device_has_extension/2]).\n-export([devices_has_extension/2]).\n\n-include(\"../include/cl.hrl\").\n-import(lists, [map/2]).\n\n-type clu_state()   ::  #cl{} | undefined.\n\n%%\n%% @type clu_state() = any()\n%%\n%%\n%% @doc setup the platform and an initial context using\n%%  devices of type DevType. Setup currently use the\n%%  first platform found only.\n%% @spec setup(DevType::cl_device_type()) ->  clu_state()\n%%\n\nsetup(DevType) ->\n    cl:start(),\n    {ok,Ps} = cl:get_platform_ids(),\n    setup(DevType, Ps).\n\nsetup(DevType, [Platform|Ps]) ->\n    case cl:get_device_ids(Platform,DevType) of\n\t{ok, []} ->\n\t    setup(DevType, Ps);\n\t{ok,DeviceList} ->\n\t    case cl:create_context(DeviceList) of\n\t\t{ok,Context} ->\n\t\t    #cl { platform = Platform,\n\t\t\t  devices  = DeviceList,\n\t\t\t  context  = Context };\n\t\t{error, _} when Ps /= [] ->\n\t\t    setup(DevType, Ps);\n\t\tOther ->\n\t\t    Other\n\t    end;\n\t{error, device_not_found} ->\n\t    setup(DevType, Ps);\n\t{error, _} when Ps /= [] ->\n\t    setup(DevType, Ps);\n\tOther ->\n\t    Other\n    end;\nsetup(DevType, []) ->\n    {error, {device_not_found, DevType}}.\n\n\n%%\n%% @doc setup a clu context with all devices.\n%%\n%% @spec setup() -> clu_state()\n%%\nsetup() ->\n    setup(all).\n\n%%\n%% @doc Release the context setup by clu:setup().\n%%\n%% @spec teardown(E::clu_state()) -> 'ok' | {'error',cl_error()}\n%%\nteardown(E) ->\n    cl:release_context(E#cl.context).\n%%\n%% Fetch context\n%%\ncontext(E) ->\n    E#cl.context.\n%%\n%% Fetch device list\n%%\ndevice_list(E) ->\n    E#cl.devices.\n\n%%\n%% Fetch first device\n%%\ndevice(E) ->\n    hd(E#cl.devices).\n\n%%\n%% @doc Create and build a OpenCL program from a string.\n%%\n%% @spec build_source(E::clu_state(), Source::iodata()) ->\n%%   {'ok',cl_program()} | {'error',{cl_error(), Logs}}\n%%\n-spec build_source(E::clu_state(), Source::iodata()) ->\n\t\t\t  {'ok',cl_program()} | \n\t\t\t  {'error',{cl_error(),Logs::term}}.\n\nbuild_source(E, Source) ->\n    build_source(E, Source, \"\").\n\n-spec build_source(E::clu_state(), Source::iodata(), Options::string()) ->\n\t\t\t  {'ok',cl_program()} | \n\t\t\t  {'error',{cl_error(),Logs::term}}.\nbuild_source(E, Source, Options) ->\n    {ok,Program} = cl:create_program_with_source(E#cl.context,Source),\n    case cl:build_program(Program, E#cl.devices, Options) of\n\tok ->\n\t    Status = [cl:get_program_build_info(Program, Dev, status)\n\t\t      || Dev <- E#cl.devices],\n\t    case lists:any(fun({ok, success}) -> true; \n\t\t\t      (_) -> false end, Status) \n\t    of\n\t\ttrue -> \n\t\t    {ok,Program};\n\t\tfalse ->\n\t\t    Logs = get_program_logs(Program),\n\t\t    io:format(\"Logs: ~s\\n\", [Logs]),\n\t\t    {error,{Status,Logs}}\n\t    end;\n\tError ->\n\t    Logs = get_program_logs(Program),\n\t    io:format(\"Logs: ~s\\n\", [Logs]),\n\t    cl:release_program(Program),\n\t    {error,{Error,Logs}}\n    end.\n\n-spec build_source_file(E::clu_state(), File::string()) ->\n\t\t\t       {'ok',cl_program()} | \n\t\t\t       {'error',{cl_error(),Logs::term}}.\nbuild_source_file(E,File) ->\n    build_source_file(E, File,\"\").\n\n-spec build_source_file(E::clu_state(), File::string(), Options::string()) ->\n\t\t\t       {'ok',cl_program()} | \n\t\t\t       {'error',{cl_error(),Logs::term}}.\nbuild_source_file(E, File,Options) ->\n    case file:read_file(File) of\n\t{ok,Binary} ->\n\t    build_source(E,Binary,Options);\n\tError ->\n\t    Error\n    end.\n\n-spec compile_file(File::string()) ->\n\t\t\t  {'ok',{[cl_device_id()],[binary()]}} |\n\t\t\t  {'error',{cl_error(),Logs::term}}.\ncompile_file(File) ->\n    compile_file(File,\"\").\n\n-spec compile_file(File::string(), Options::string()) ->\n\t\t\t  {'ok',{[cl_device_id()],[binary()]}} |\n\t\t\t  {'error',{cl_error(),Logs::term}}.\n\ncompile_file(File,Options) ->\n    E = setup(all),\n    Result = build_source_file(E,File,Options),\n    Res =\n\tcase Result of\n\t    {error,{_,_Logs}} ->\n\t\t%% Listed in build_source, should it be?\n\t\t%% lists:foreach(\n\t\t%%   fun(Log) -> io:format(\"~s\\n\", [Log]) end, \n\t\t%%   Logs),\n\t\tResult;\n\t    {ok,Program} ->\n\t\tBRes = get_program_binaries(Program),\n\t\tcl:release_program(Program),\n\t\tBRes;\n\t    Error ->\n\t\tError\n\tend,\n    teardown(E),\n    Res.\n\n%% @doc Retrieve the binaries associated with a program build.\n%%  the binaries may be cached for later use with build_binary/2.\n%%\n%% @spec get_program_binaries(Program::cl_program()) ->\n%%  {ok,{[cl_device_id()],[binary()]}}\n%%\n\nget_program_binaries(Program) ->\n    {ok,DeviceList} = cl:get_program_info(Program, devices),\n    {ok,BinaryList} = cl:get_program_info(Program, binaries),\n    {ok,{DeviceList, BinaryList}}.\n\nget_program_logs(Program) ->\n    {ok,DeviceList} = cl:get_program_info(Program, devices),\n    map(fun(Device) ->\n\t\t{ok,Log} = cl:get_program_build_info(Program,Device,log),\n\t\tLog\n\tend, DeviceList).\n\nbuild_binary(E, {DeviceList,BinaryList}) ->\n    build_binary(E, {DeviceList,BinaryList},\"\").\n\nbuild_binary(E, {DeviceList,BinaryList},Options) ->\n    {ok,Program} = cl:create_program_with_binary(E#cl.context, DeviceList, BinaryList),\n    case cl:build_program(Program, DeviceList, Options) of\n\tok ->\n\t    {ok,Program};\n\tError ->\n\t    Logs = \n\t\tmap(fun(Device) ->\n\t\t\t    {ok,Log} = cl:get_program_build_info(Program,\n\t\t\t\t\t\t\t\t  Device,log),\n\t\t\t    Log\n\t\t    end, E#cl.devices),\n\t    {error,{Error,Logs}}\n    end.\n\n%%\n%% Utility to set all kernel arguments (and do arity check)\n%%\napply_kernel_args(Kernel, Args) ->\n    {ok,N} = cl:get_kernel_info(Kernel, num_args),\n    Arity = length(Args),\n    if N /= Arity -> \n\t    {ok,Name} = cl:get_kernel_info(Kernel, function_name),\n\t    erlang:error({bad_arity,Name,N});\n       true ->\n\t    try \n\t\tapply_args(Kernel, 0, Args)\n\t    catch \n\t\terror:{badmatch,Error} -> \n\t\t    erlang:error(Error)\n\t    end\n    end.\n\napply_args(Kernel, I, [{local,Size}|As]) ->\n    %%io:format(\"kernel set arg ~w size to ~p\\n\", [I,Size]),\n    ok = cl:set_kernel_arg_size(Kernel,I,Size),\n    apply_args(Kernel,I+1,As);\napply_args(Kernel,I,[A|As]) ->\n    %%io:format(\"kernel set arg ~w to ~p\\n\", [I,A]),\n    ok = cl:set_kernel_arg(Kernel,I,A),\n    apply_args(Kernel,I+1,As);\napply_args(_Kernel, _I, []) -> \n    ok.\n\n%% manual wait for event to complete (crash on failure)\n%% should test for error status\nwait_complete(Event) ->\n    case cl:get_event_info(Event, execution_status) of\n\t{ok,complete} ->\n\t    ok;\n\t{ok,Other} ->\n\t    io:format(\"Status: ~p\\n\", [Other]),\n\t    timer:sleep(100),\n\t    wait_complete(Event)\n    end.\n%%\n%% utility function to test if an extension is present in a device\n%%\ndevice_has_extension(Device, Extension) when is_atom(Extension) ->\n    device_has_extension(Device, atom_to_list(Extension));\ndevice_has_extension(Device, Extension) when is_list(Extension) ->\n    {ok,Extensions} = cl:get_device_info(Device,extensions),\n    lists:member(Extension, string:tokens(Extensions, \" \")).\n\ndevices_has_extension(Clu, Extension) ->\n    lists:all(\n      fun(D) -> device_has_extension(D, Extension) end,\n      device_list(Clu)).\n\n\t      \n    \n    \n"
  },
  {
    "path": "test/cl_SUITE.erl",
    "content": "%%% File    : cl_SUITE\n%%% Author  : Dan Gudmundsson\n%%% Description : test cl\n\n-module(cl_SUITE).\n-export([all/0, init_per_suite/1, end_per_suite/1]).\n\n-include(\"cl.hrl\").\n\nall() ->\n    [{cl_test, all},\n     {cl_basic, ct_test},\n     {cl_binary_test, ct_test},\n     {cl_buffer, all},\n     {cl_image, all}\n    ].\n\ninit_per_suite(Config) ->\n    try\n\tio:format(\"Running init per SUITE: ~p~n\", [Config]),\n\tCLU = clu:setup(),\n\t{ok, [Type|_]} = cl:get_device_info(clu:device(CLU), type),\n\tclu:teardown(CLU),\n\t[{type, Type}|Config]\n    catch _:Reason ->\n\t    io:format(\"Skipping test case failed to figure out cl device~n\"),\n\t    io:format(\"~p: ~p~n\",[Reason, erlang:get_stacktrace()]),\n\t    {skip, \"Can not find cl type\"}\n    end.\n\nend_per_suite(_) ->\n    ok.\n\n\n"
  },
  {
    "path": "test/cl_basic.erl",
    "content": "%% Basic tests\n-module(cl_basic).\n\n-export([init_per_suite/1, end_per_suite/1]).\n-export([test/0, ct_test/1, test/1]).\n\n\n-import(lists, [foreach/2]).\n\n-include(\"../include/cl.hrl\").\n\n-spec init_per_suite(Config0::list(tuple())) ->\n                            (Config1::list(tuple())) | \n                            {skip,Reason::term()} | \n                            {skip_and_save,Reason::term(),\n\t\t\t     Config1::list(tuple())}.\n\ninit_per_suite(Config) -> cl_SUITE:init_per_suite(Config).\n\n-spec end_per_suite(Config::list(tuple())) -> ok.\n\nend_per_suite(_Config) ->\n    ok.\n\n\ntest() ->\n    test(all).\n\nct_test(Config) when is_list(Config) ->\n    test(all).\n\ntest(DevType) ->\n    E = clu:setup(DevType),\n    {ok,PlatformInfo} = cl:get_platform_info(E#cl.platform),\n    io:format(\"PlatformInfo: ~p\\n\", [PlatformInfo]),\n\n    foreach(\n      fun(Device) ->\n\t      io:format(\"Device: ~p\\n\", [Device]),\n\t      io:format(\"DeviceInfo:\\n\", []),\n\t      {ok,DeviceInfo} = cl:get_device_info(Device),\n\t      lists:foreach(\n\t\tfun({Attr,Value}) ->\n\t\t\tio:format(\"  ~s: ~p\\n\", [Attr,Value]),\n\t\t\tcase (Attr =:= extensions) andalso\n\t\t\t    lists:member(\"cl_nv_device_attribute_query\", \n\t\t\t\t\t string:tokens(Value,\" \")) of\n\t\t\t    true ->\n\t\t\t\tnv_device_info(Device);\n\t\t\t    false ->\n\t\t\t\tok\n\t\t\tend\n\t\tend, DeviceInfo)\n      end, E#cl.devices),\n\n    {ok,ContextInfo} = cl:get_context_info(E#cl.context),\n    io:format(\"ContextInfo: ~p\\n\", [ContextInfo]),\n    cl:retain_context(E#cl.context),\n    {ok,ContextInfo2} = cl:get_context_info(E#cl.context),\n    io:format(\"Context2: ~p\\n\", [ContextInfo2]),\n\n    foreach(fun(Device) ->\n\t\t    test_queue(E, Device)  end, \n\t    E#cl.devices),\n\n    foreach(fun(Device) ->\n\t\t    test_sampler(E, Device)  end, \n\t    E#cl.devices),\n\n    test_buffer(E),\n\n    test_program(E#cl.context, E#cl.devices),\n\n    clu:teardown(E).\n\n\nnv_device_info(Device) ->\n    io:format(\"  cl_nv_device_attribute_query:\\n\", []),\n    lists:foreach(\n      fun(NvAttr) ->\n\t      case cl:get_device_info(Device, NvAttr) of\n\t\t  {ok,NvValue} ->\n\t\t      io:format(\"    ~s: ~p\\n\", [NvAttr,NvValue]);\n\t\t  {error,Reason} ->\n\t\t      io:format(\"InfoError: ~s [~p]\\n\", [NvAttr,Reason])\n\t      end\n      end, [\n\t    compute_capability_major_nv,\n\t    compute_capability_minor_nv,\n\t    registers_per_block_nv,\n\t    warp_size_nv,\n\t    gpu_overlap_nv,\n\t    kernel_exec_timeout_nv,\n\t    device_integrated_memory_nv]),\n    case {cl:get_device_info(Device, compute_capability_major_nv),\n\t  cl:get_device_info(Device, compute_capability_minor_nv) } of\n\t{{ok,Major},{ok,Minor}} ->\n\t    io:format(\"    ~s: ~p\\n\", [compute_capability_major_nv,Major]),\n\t    io:format(\"    ~s: ~p\\n\", [compute_capability_mainor_nv,Minor]),\n\t    Cores = case {Major,Minor} of\n\t\t\t{1,1} -> 8;\n\t\t\t{1,2} -> 8;\n\t\t\t{1,3} -> 8;\n\t\t\t{2,0} -> 32;\n\t\t\t{2,1} -> 48;\n\t\t\t{3,0} -> 192;\n\t\t\t{3,5} -> 192;\n\t\t\t{5,0} -> 128;\n\t\t\t_ -> 0  %% unknown (to me)\n\t\t    end,\n\t    \n\t    ComputeUnits = case cl:get_device_info(Device, max_compute_units) of\n\t\t\t       {ok,U} -> U;\n\t\t\t       {error,_} -> 0\n\t\t\t   end,\n\t    io:format(\"    number_of_cores: ~w\\n\", [Cores]),\n\t    io:format(\"    total_number_of_cores: ~w\\n\", [ComputeUnits*Cores]);\n\t_ ->\n\t    ok\n    end.\n\t    \n\ntest_program(Context, DeviceList) ->\n    %% Program1\n    Source1 = \"\n__kernel void program1(int n, int m) {\n    int result = n + m;\n}\n\",\n    {ok,Program} = cl:create_program_with_source(Context,Source1),\n    foreach(\n      fun(Device) ->\n\t      {ok,Status} = cl:get_program_build_info(Program,Device,status),\n\t      io:format(\"Status @ ~w: ~p\\n\", [Device,Status])\n      end, DeviceList),\n\n    io:format(\"Program: ~p\\n\", [Program]),\n    program_info(Program),\n\n    foreach(\n      fun(Device) ->\n\t      build_info(Program, Device)\n      end, DeviceList),\n\n    case cl:build_program(Program, DeviceList, \"-Dhello=1 -Dtest -cl-kernel-arg-info\") of\n\tok ->\n\t    foreach(\n\t      fun(Device) ->\n\t\t      build_info(Program, Device)\n\t      end, DeviceList),\n\t    program_info(Program),\n\t    {ok,Kernels} = cl:create_kernels_in_program(Program),\n\t    foreach(\n\t      fun(Kernel) ->\n\t\t      {ok,KernelInfo} = cl:get_kernel_info(Kernel),\n\t\t      io:format(\"KernelInfo: ~p\\n\", [KernelInfo]),\n\t\t      foreach(\n\t\t\tfun(Device) ->\n\t\t\t\t{ok,I}=cl:get_kernel_workgroup_info(Kernel,Device),\n\t\t\t\tio:format(\"KernelWorkGroupInfo: ~p\\n\", [I])\n\t\t\tend, DeviceList),\n\t\t      case lists:member({1,2}, cl:versions()) of\n\t\t\t  true ->\n\t\t\t      ArgInfo = (catch cl:get_kernel_arg_info(Kernel)),\n\t\t\t      io:format(\"arg_info: ~p\\n\", [ArgInfo]);\n\t\t\t  false ->\n\t\t\t      ok\n\t\t      end\n\t      end, Kernels),\n\t    foreach(\n\t      fun(Device) ->\n\t\t      {ok,Queue} = cl:create_queue(Context,Device,[profiling_enable]),\n\t\t      foreach(\n\t\t\tfun(Kernel) ->\n\t\t\t\tcl:set_kernel_arg(Kernel, 0, 12),\n\t\t\t\tcl:set_kernel_arg(Kernel, 1, 13),\n\t\t\t\t{ok,Event} = cl:enqueue_task(Queue, Kernel, []),\n\t\t\t\t{ok,EventInfo} = cl:get_event_info(Event),\n\t\t\t\tio:format(\"EventInfo: ~p\\n\", [EventInfo]),\n\t\t\t\tcl:flush(Queue),\n\t\t\t\tio:format(\"Event Status:=~p\\n\", \n\t\t\t\t\t  [cl:wait(Event,1000)]),\n\t\t\t\t{ok,ProfileInfo} = cl:get_event_profiling_info(Event),\n\t\t\t\tio:format(\"EventProfilingInfo: ~p\\n\", [ProfileInfo])\n\n\t\t\tend, Kernels)\n\t      end, DeviceList),\n\t    ok;\n\tError ->\n\t    io:format(\"\\n\\nBuild Error: ~p\\n\\n\", [Error]),\n\t    foreach(\n\t      fun(Device) ->\n\t\t      {ok,BuildInfo} = cl:get_program_build_info(Program,Device),\n\t\t      io:format(\"BuildInfo @ ~w: ~p\\n\", [Device,BuildInfo])\n\t      end, DeviceList)\n    end,\n    cl:release_program(Program),\n    ok.\n\nprogram_info(Program) ->\n    io:format(\"ProgramInfo:\\n\", []),\n    foreach(\n      fun(Attr) ->\n\t      case cl:get_program_info(Program,Attr) of\n\t\t  {ok,Value} ->\n\t\t      io:format(\"  ~s: ~p\\n\", [Attr,Value]);\n\t\t  {error,Reason} ->\n\t\t      io:format(\"InfoError: ~s [~p]\\n\", \n\t\t\t\t[Attr,Reason])\n\t      end\n      end, cl:program_info()).\n\nbuild_info(Program, Device) ->\n    io:format(\"BuildInfo @ ~w\\n\", [Device]),\n    {ok,BuildInfo} = cl:get_program_build_info(Program,Device),\n    lists:foreach(\n      fun({Attr,Value}) ->\n\t      io:format(\"  ~s: ~p\\n\", [Attr,Value])\n      end, BuildInfo),\n    case lists:member({1,2}, cl:versions()) of\n\ttrue ->\n\t    %% fixme: version handle program_build_info \n\t    case cl:get_program_build_info(Program,Device,binary_type) of\n\t\t{ok,BinaryInfo} ->\n\t\t    io:format(\"  ~s: ~p\\n\", [binary_type,BinaryInfo]);\n\t\t{error,Reason} ->\n\t\t    io:format(\"InfoError: ~s [~p]\\n\", \n\t\t\t      [binary_type,Reason])\n\t    end;\n\tfalse ->\n\t    ok\n    end.\n\ntest_queue(E, Device) ->\n    {ok,Queue} = cl:create_queue(E#cl.context,Device,[]),\n    io:format(\"Queue: ~p\\n\", [Queue]),\n    {ok,QueueInfo} = cl:get_queue_info(Queue),\n    io:format(\"QueueInfo: ~p\\n\", [QueueInfo]),\n    cl:release_queue(Queue),\n    ok.\n    \n\ntest_buffer(E) ->\n    %% Read/Write buffer\n    {ok,Buffer} = cl:create_buffer(E#cl.context,[read_write],1024),\n    io:format(\"Buffer: ~p\\n\", [Buffer]),\n    {ok,BufferInfo} = cl:get_mem_object_info(Buffer),\n    io:format(\"BufferInfo: ~p\\n\", [BufferInfo]),    \n    cl:release_mem_object(Buffer),\n\n    %% Read only buffer\n    {ok,Buffer2} = cl:create_buffer(E#cl.context,[read_only],0,\n\t\t\t\t     <<\"Hello brave new world\">>),\n    io:format(\"Buffer2: ~p\\n\", [Buffer2]),\n    {ok,Buffer2Info} = cl:get_mem_object_info(Buffer2),\n    io:format(\"Buffer2Info: ~p\\n\", [Buffer2Info]),\n    cl:release_mem_object(Buffer2),\n    ok.\n\n    \n\ntest_sampler(E, Device) ->\n    {ok,DeviceInfo} = cl:get_device_info(Device),\n    Name = proplists:get_value(name, DeviceInfo),\n    case proplists:get_value(image_support, DeviceInfo) of\n\ttrue ->\n\t    %% Sampler1\n\t    {ok,Sampler1} = cl:create_sampler(E#cl.context,true,clamp,nearest),\n\t    io:format(\"Sampler1: ~p\\n\", [Sampler1]),\n\t    {ok,Sampler1Info} = cl:get_sampler_info(Sampler1),\n\t    io:format(\"Sampler1Info: ~p\\n\", [Sampler1Info]),\n\t    cl:release_sampler(Sampler1),\n\t    \n\t    %% Sampler2\n\t    {ok,Sampler2} = cl:create_sampler(E#cl.context,false,repeat,linear),\n\t    io:format(\"Sampler2: ~p\\n\", [Sampler2]),\n\t    {ok,Sampler2Info} = cl:get_sampler_info(Sampler2),\n\t    io:format(\"Sampler2Info: ~p\\n\", [Sampler2Info]),\n\t    cl:release_sampler(Sampler2),\n\t    ok;\n\tfalse ->\n\t    io:format(\"No image support for device ~s ~n\",[Name])\t    \n    end.\n\n\n\n\t      \n    \n    \n    \n"
  },
  {
    "path": "test/cl_binary_test.erl",
    "content": "%%% File    : cl_binary_test.erl\n%%% Author  : Tony Rogvall <tony@rogvall.se>\n%%% Description : test build of binary programs\n%%% Created :  7 Nov 2009 by Tony Rogvall <tony@rogvall.se>\n\n-module(cl_binary_test).\n\n-export([test/0, ct_test/1, init_per_suite/1, end_per_suite/1]).\n\n-spec init_per_suite(Config0::list(tuple())) ->\n                            (Config1::list(tuple())) | \n                            {skip,Reason::term()} | \n                            {skip_and_save,Reason::term(),\n\t\t\t     Config1::list(tuple())}.\n\ninit_per_suite(Config) -> cl_SUITE:init_per_suite(Config).\n\n-spec end_per_suite(Config::list(tuple())) -> ok.\n\nend_per_suite(_Config) ->\n    ok.\n\n\nct_test(_) ->\n    test().\n\ntest() ->\n    E = clu:setup(),\n    {ok,P1} = clu:build_source(E, \"__kernel void foo(int n) { int x; x = n; }\"),\n    {ok,B} = clu:get_program_binaries(P1),\n    ok = cl:release_program(P1),\n    {ok,P2} = clu:build_binary(E, B),\n    ok = cl:release_program(P2),\n    ok.\n\n    \n    \n\n"
  },
  {
    "path": "test/cl_buffer.erl",
    "content": "%%% @author Tony Rogvall <tony@rogvall.se>\n%%% @copyright (C) 2014, Tony Rogvall\n%%% @doc\n%%%    Buffer test/example\n%%% @end\n%%% Created :  8 May 2014 by Tony Rogvall <tony@rogvall.se>\n\n-module(cl_buffer).\n\n-export([init_per_suite/1, end_per_suite/1]).\n-export([all/0,\n\t copy/1,\n\t read_rect/1,\n\t write_rect/1,\n\t sub/1,\n\t fill/1,\n\t migrate/1]).\n\n-spec init_per_suite(Config0::list(tuple())) ->\n                            (Config1::list(tuple())) | \n                            {skip,Reason::term()} | \n                            {skip_and_save,Reason::term(),\n\t\t\t     Config1::list(tuple())}.\n\ninit_per_suite(Config) -> cl_SUITE:init_per_suite(Config).\n\n-spec end_per_suite(Config::list(tuple())) -> ok.\n\nend_per_suite(_Config) ->\n    ok.\n\n\nall() ->\n    [copy, read_rect, write_rect, sub, fill, migrate].\n\n%% test write/copy/read\n\n%% test of copy buffer, require version 1.0\ncopy(Config) ->\n    C = clu:setup(proplists:get_value(type, Config, gpu)),\n    {ok,Q} = cl:create_queue(clu:context(C),clu:device(C),[]),\n    {ok,Buf1} = cl:create_buffer(clu:context(C),[read_write], 1024),\n    {ok,Buf2} = cl:create_buffer(clu:context(C),[read_write], 1024),\n    Data1 = erlang:iolist_to_binary(lists:duplicate(4,lists:seq(0,255))),\n    {ok,E1} = cl:enqueue_write_buffer(Q, Buf1, 0, 1024, Data1, []),\n    {ok,E2} = cl:enqueue_copy_buffer(Q, Buf1, Buf2, 0, 0, 1024, [E1]),\n    {ok,E3} = cl:enqueue_read_buffer(Q, Buf2, 0, 1024, [E2]),\n    cl:flush(Q),\n    cl:wait_for_events([E1,E2]),\n    {ok,Data2} = cl:wait(E3),\n    clu:teardown(C),\n    Data1 =:= Data2.\n\n%% read rectangluar area, require version 1.1\nread_rect(Config) ->\n    C = clu:setup(proplists:get_value(type, Config, gpu)),\n    true = lists:member({1,1},cl:versions()),\n    {ok,Q} = cl:create_queue(clu:context(C),clu:device(C),[]),\n    {ok,Buf1} = cl:create_buffer(clu:context(C),[read_write], 8*8),\n    Data1 = <<0,0,0,0,0,0,0,0,\n\t      0,0,0,0,0,0,0,0,\n\t      0,0,1,2,3,4,0,0,\n\t      0,0,5,6,7,8,0,0,\n\t      0,0,0,0,0,0,0,0,\n\t      0,0,0,0,0,0,0,0,\n\t      0,0,0,0,0,0,0,0,\n\t      0, 0,0,0,0,0,0,0>>,\n    {ok,E1} = cl:enqueue_write_buffer(Q, Buf1, 0, 64, Data1, []),\n    {ok,E2} = cl:enqueue_read_buffer_rect(Q, Buf1,\n\t\t\t\t\t  [2,2,0],\n\t\t\t\t\t  [0,0,0],\n\t\t\t\t\t  [4,2,1],\n\t\t\t\t\t  8, 0,\n\t\t\t\t\t  4, 0,\n\t\t\t\t\t  [E1]),\n    cl:wait_for_events([E1]),\n    {ok,Data2} = cl:wait(E2),\n    clu:teardown(C),\n    Data2 =:= <<1,2,3,4,5,6,7,8>>.\n\n%% write rectangluar area, require version 1.1\nwrite_rect(Config) ->\n    C = clu:setup(proplists:get_value(type, Config, gpu)),\n    true = lists:member({1,1},cl:versions()),\n    {ok,Q} = cl:create_queue(clu:context(C),clu:device(C),[]),\n    {ok,Buf1} = cl:create_buffer(clu:context(C),[read_write], 8*8),\n    Data0 = <<9,9,9,9,9,9,9,9,\n\t      9,9,9,9,9,9,9,9,\n\t      9,9,9,9,9,9,9,9,\n\t      9,9,9,9,9,9,9,9,\n\t      9,9,9,9,9,9,9,9,\n\t      9,9,9,9,9,9,9,9,\n\t      9,9,9,9,9,9,9,9,\n\t      9,9,9,9,9,9,9,9>>,\n    {ok,E1} = cl:enqueue_write_buffer(Q, Buf1, 0, 64, Data0, []),\n    Data1 = <<1,2,3,4,\n\t      5,6,7,8>>,\n    {ok,E2} = cl:enqueue_write_buffer_rect(Q, Buf1,\n\t\t\t\t\t   [2,2,0],\n\t\t\t\t\t   [0,0,0],\n\t\t\t\t\t   [4,2,1],\n\t\t\t\t\t   8, 0,\n\t\t\t\t\t   4, 0,\n\t\t\t\t\t   Data1,\n\t\t\t\t\t   [E1]),\n    {ok,E3} = cl:enqueue_read_buffer(Q, Buf1, 0, 64, [E2]),\n    cl:flush(Q),\n    cl:wait_for_events([E1,E2]),\n    {ok,Data3} = cl:wait(E3),\n    clu:teardown(C),\n    Data3 =:= <<9,9,9,9,9,9,9,9,\n\t\t9,9,9,9,9,9,9,9,\n\t\t9,9,1,2,3,4,9,9,\n\t\t9,9,5,6,7,8,9,9,\n\t\t9,9,9,9,9,9,9,9,\n\t\t9,9,9,9,9,9,9,9,\n\t\t9,9,9,9,9,9,9,9,\n\t\t9,9,9,9,9,9,9,9>>.\n\n%% cerate sub buffer, require version 1.1\nsub(Config) ->\n    C = clu:setup(proplists:get_value(type, Config, gpu)),\n    true = lists:member({1,1},cl:versions()),\n    {ok,Q} = cl:create_queue(clu:context(C),clu:device(C),[]),\n    {ok,Buf1} = cl:create_buffer(clu:context(C),[read_write], 8*8),\n    Data1 = <<0,0,0,0,0,0,0,0,\n\t      0,0,0,0,0,0,0,0,\n\t      0,0,1,2,3,4,0,0,\n\t      0,0,5,6,7,8,0,0,\n\t      0,0,0,0,0,0,0,0,\n\t      0,0,0,0,0,0,0,0,\n\t      0,0,0,0,0,0,0,0,\n\t      0, 0,0,0,0,0,0,0>>,\n    {ok,E1} = cl:enqueue_write_buffer(Q, Buf1, 0, 64, Data1, []),\n    {ok,Buf2} = cl:create_sub_buffer(Buf1,[read_write],region,[18,14]),\n    {ok,E2} = cl:enqueue_read_buffer(Q, Buf2, 0, 12, [E1]),\n    cl:flush(Q),\n    cl:wait_for_events([E1]),\n    {ok,Data2} = cl:wait(E2),\n    clu:teardown(C),\n    Data2 =:= <<1,2,3,4,0,0,0,0,5,6,7,8>>.\n\n\n%% fill buffer, require version 1.2\nfill(Config) ->\n    C = clu:setup(proplists:get_value(type, Config, gpu)),\n    true = lists:member({1,2},cl:versions()),\n    {ok,Q} = cl:create_queue(clu:context(C),clu:device(C),[]),\n    {ok,Buf1} = cl:create_buffer(clu:context(C),[read_write], 8*8),\n    {ok,E1} = cl:enqueue_fill_buffer(Q, Buf1, <<9>>, 0, 64, []),\n    {ok,E2} = cl:enqueue_fill_buffer(Q, Buf1, <<1,2,3,4>>, 12, 4, [E1]),\n    {ok,E3} = cl:enqueue_fill_buffer(Q, Buf1, <<5,6,7,8>>, 20, 4, [E2]),\n    {ok,E4} = cl:enqueue_read_buffer(Q, Buf1, 0, 64, [E3]),\n    cl:flush(Q),\n    cl:wait_for_events([E1,E2,E3]),\n    {ok,Data1} = cl:wait(E4),\n    clu:teardown(C),\n    Data1 =:= <<9,9,9,9,9,9,9,9,\n\t\t9,9,9,9,1,2,3,4,\n\t\t9,9,9,9,5,6,7,8,\n\t\t9,9,9,9,9,9,9,9,\n\t\t9,9,9,9,9,9,9,9,\n\t\t9,9,9,9,9,9,9,9,\n\t\t9,9,9,9,9,9,9,9,\n\t\t9,9,9,9,9,9,9,9>>.\n\nmigrate(_) ->\n    C = clu:setup(all),\n    true = lists:member({1,2},cl:versions()),\n    case clu:device_list(C) of\n\t[D1,D2|_] ->\n\t    {ok,Q1} = cl:create_queue(clu:context(C),D1,[]),\n\t    {ok,Q2} = cl:create_queue(clu:context(C),D2,[]),\n\t    {ok,B1} = cl:create_buffer(clu:context(C),[read_write], 8*8),\n\t    {ok,E1} = cl:enqueue_fill_buffer(Q1, B1, <<9>>, 0, 64, []),\n\t    cl:flush(Q1),\n\t    {ok,completed} = cl:wait(E1),\n\t    {ok,E2} = cl:enqueue_migrate_mem_objects(Q2, [B1], [], []),\n\t    cl:flush(Q2),\n\t    %% fixme: add a kernel to check that the data was migrated\n\t    cl:wait(E2);\n\t_ -> ignore\n    end.\n"
  },
  {
    "path": "test/cl_image.erl",
    "content": "%%% @author Tony Rogvall <tony@rogvall.se>\n%%% @copyright (C) 2014, Tony Rogvall\n%%% @doc\n%%%     cl_image test\n%%% @end\n%%% Created :  9 May 2014 by Tony Rogvall <tony@rogvall.se>\n\n-module(cl_image).\n\n-export([init_per_suite/1, end_per_suite/1]).\n-export([all/0,\n\t create_image2d_a/1, \n\t create_image2d_b/1,\n\t create_image2d_c/1,\n\t create_image2d_d/1,\n\t create_image3d_a/1,\n\t create_image3d_b/1,\n\t create_image3d_c/1,\n\t create_image3d_d/1,\n\t pixop/1]).\n\n-include_lib(\"cl/include/cl.hrl\").\n\n-spec init_per_suite(Config0::list(tuple())) ->\n                            (Config1::list(tuple())) | \n                            {skip,Reason::term()} | \n                            {skip_and_save,Reason::term(),\n\t\t\t     Config1::list(tuple())}.\n\ninit_per_suite(Config) -> cl_SUITE:init_per_suite(Config).\n\n-spec end_per_suite(Config::list(tuple())) -> ok.\n\nend_per_suite(_Config) ->\n    ok.\n    \n\nall() ->\n    [create_image2d_a, create_image2d_b, create_image2d_c, create_image2d_d,\n     create_image3d_a, create_image3d_b, create_image3d_c, create_image3d_d,\n     pixop\n    ].\n\n\ncreate_image2d_a(Config) ->\n    C = clu:setup(proplists:get_value(type,Config,gpu)),\n    cl:create_image2d(clu:context(C),[read_write],\n\t\t      #cl_image_format { cl_channel_order = rgba,\n\t\t\t\t\t cl_channel_type  = unorm_int8 },\n\t\t      64,\n\t\t      64,\n\t\t      0,\n\t\t      <<>>).\n\ncreate_image2d_b(Config) ->\n    C = clu:setup(proplists:get_value(type,Config,gpu)),\n    ImageData = create_image2d_data(64, 64, 4),\n    cl:create_image2d(clu:context(C),[read_write],\n\t\t      #cl_image_format { cl_channel_order = rgba,\n\t\t\t\t\t cl_channel_type  = unorm_int8 },\n\t\t      64,\n\t\t      64,\n\t\t      64*4,\n\t\t      ImageData).\n\ncreate_image2d_c(Config) ->\n    true = lists:member({1,2},cl:versions()),\n    C = clu:setup(proplists:get_value(type,Config,gpu)),\n    cl:create_image(clu:context(C),[read_write],\n\t\t    #cl_image_format { cl_channel_order = rgba,\n\t\t\t\t       cl_channel_type  = unorm_int8 },\n\t\t    #cl_image_desc {\n\t\t       image_type = image2d,\n\t\t       image_width = 64,\n\t\t       image_height = 64,\n\t\t       image_depth = 1,\n\t\t       image_array_size = 1,\n\t\t       image_row_pitch = 0 },\n\t\t    <<>>).\n\ncreate_image2d_d(Config) ->\n    true = lists:member({1,2},cl:versions()),\n    C = clu:setup(proplists:get_value(type,Config,gpu)),\n    ImageData = create_image2d_data(64, 64, 4),\n    cl:create_image(clu:context(C),[read_write],\n\t\t    #cl_image_format { cl_channel_order = rgba,\n\t\t\t\t       cl_channel_type  = unorm_int8 },\n\t\t    #cl_image_desc {\n\t\t       image_type = image2d,\n\t\t       image_width = 64,\n\t\t       image_height = 64,\n\t\t       image_depth = 1,\n\t\t       image_array_size = 1,\n\t\t       image_row_pitch = 64*4 },\n\t\t    ImageData).\n\ncreate_image2d_data(W,H,BytesPerPixel) ->\n    << <<1234:BytesPerPixel/unit:8>> ||\n\t_ <- lists:seq(1,W),\n\t_ <- lists:seq(1,H) >>.\n\n\ncreate_image3d_a(Config) ->\n    C = clu:setup(proplists:get_value(type,Config,gpu)),\n    cl:create_image3d(clu:context(C),[read_write],\n\t\t      #cl_image_format { cl_channel_order = rgba,\n\t\t\t\t\t cl_channel_type  = unorm_int8 },\n\t\t      64,\n\t\t      64,\n\t\t      64,\n\t\t      0,\n\t\t      0,\n\t\t      <<>>).\n\ncreate_image3d_b(Config) ->\n    C = clu:setup(proplists:get_value(type,Config,gpu)),\n    ImageData = create_image3d_data(64, 64, 64, 4),\n    cl:create_image3d(clu:context(C),[read_write],\n\t\t      #cl_image_format { cl_channel_order = rgba,\n\t\t\t\t\t cl_channel_type  = unorm_int8 },\n\t\t      64,\n\t\t      64,\n\t\t      64,\n\t\t      64*4,\n\t\t      64*64*4,\n\t\t      ImageData).\n\ncreate_image3d_c(Config) ->\n    true = lists:member({1,2},cl:versions()),\n    C = clu:setup(proplists:get_value(type,Config,gpu)),\n    cl:create_image(clu:context(C),[read_write],\n\t\t    #cl_image_format { cl_channel_order = rgba,\n\t\t\t\t       cl_channel_type  = unorm_int8 },\n\t\t    #cl_image_desc {\n\t\t       image_type = image3d,\n\t\t       image_width = 64,\n\t\t       image_height = 64,\n\t\t       image_depth = 64,\n\t\t       image_array_size = 1,\n\t\t       image_row_pitch = 0,\n\t\t       image_slice_pitch = 0\n\t\t      },\n\t\t    <<>>).\n\ncreate_image3d_d(Config) ->\n    true = lists:member({1,2},cl:versions()),\n    C = clu:setup(proplists:get_value(type,Config,gpu)),\n    ImageData = create_image3d_data(64, 64, 64, 4),\n    cl:create_image(clu:context(C),[read_write],\n\t\t    #cl_image_format { cl_channel_order = rgba,\n\t\t\t\t       cl_channel_type  = unorm_int8 },\n\t\t    #cl_image_desc {\n\t\t       image_type = image3d,\n\t\t       image_width = 64,\n\t\t       image_height = 64,\n\t\t       image_depth = 64,\n\t\t       image_array_size = 1,\n\t\t       image_row_pitch = 64*4,\n\t\t       image_slice_pitch = 64*64*4\n\t\t      },\n\t\t    ImageData).\n\n\ncreate_image3d_data(W,H,D,BytesPerPixel) ->\n    << <<Di:BytesPerPixel/unit:8>> ||\n\tDi <- lists:seq(1,D),\n\t_ <- lists:seq(1,H),\n\t_ <- lists:seq(1,W)\n    >>.\n\n%% test image pixel operations\n\npixop(Config) ->\n    exit({skip, \"Fails on linux machine\"}),\n    Clu = clu:setup(proplists:get_value(type,Config,cpu)),\n    {ok,A} =\n\tcl:create_image2d(clu:context(Clu),[read_write],\n\t\t\t  #cl_image_format { cl_channel_order = rgba,\n\t\t\t\t\t     cl_channel_type  = unorm_int8 },\n\t\t\t  2,\n\t\t\t  2,\n\t\t\t  2*4,\n\t\t\t  <<100,200,50,127, 25,255,50,100,\n\t\t\t    30,64,10,20,    3,2,1,220>> ),\n    %% {ok,E1} = cl:enqueue_write_image(Q, A, [0,0], [2,2], 2*4, 0, Data, []),\n    {ok,B} =\n\tcl:create_image2d(clu:context(Clu),[read_write],\n\t\t\t  #cl_image_format { cl_channel_order = rgba,\n\t\t\t\t\t     cl_channel_type  = unorm_int8 },\n\t\t\t  2,\n\t\t\t  2,\n\t\t\t  2*4,\n\t\t\t  <<50,100,25,255,  100,100,100,127,\n\t\t\t    100,200,50,127, 1,2,3,20>>),\n    {ok,C} =\n\tcl:create_image2d(clu:context(Clu),[read_write],\n\t\t\t  #cl_image_format { cl_channel_order = rgba,\n\t\t\t\t\t     cl_channel_type  = unorm_int8 },\n\t\t\t  2,\n\t\t\t  2,\n\t\t\t  0,\n\t\t\t  <<>>),\n\n    {ok,Q} = cl:create_queue(clu:context(Clu),clu:device(Clu),[]),\n    File =\n\tcase proplists:get_value(data_dir, Config) of\n\t    false -> \"pixop.cl\";\n\t    Dir -> filename:join(filename:dirname(filename:dirname(Dir)), \"pixop.cl\")\n\tend,\n    io:format(\"File: ~p~n\", [File]),\n    {ok,Program} = clu:build_source_file(Clu, File, \"\"),\n    {ok,Kernel} = cl:create_kernel(Program, \"pixmap_blend\"),\n    clu:apply_kernel_args(Kernel, [A,B,C,2,2]),\n    {ok,E1} = cl:enqueue_nd_range_kernel(Q, Kernel,\n\t\t\t\t\t [2,2], [],\n\t\t\t\t\t []),\n    cl:flush(Q),\n    {ok,completed} = cl:wait(E1),\n\n    {ok,E2}  = cl:enqueue_read_image(Q, C, [0,0], [2,2], 2*4, 0, []),\n    cl:flush(Q),\n    {ok,Data} = cl:wait(E2),\n    Data.\n"
  },
  {
    "path": "test/cl_info.erl",
    "content": "%%% @author Tony Rogvall <tony@rogvall.se>\n%%% @copyright (C) 2023, Tony Rogvall\n%%% @doc\n%%%    Simple test to display platform and device extensions\n%%% @end\n%%% Created : 10 Sep 2023 by Tony Rogvall <tony@rogvall.se>\n\n-module(cl_info).\n\n-export([start/0]).\n\nstart() ->\n    {ok,IDs} = cl:get_platform_ids(),\n    lists:foreach(\n      fun(ID) ->\n\t      io:format(\"~p\\n\", [cl:get_platform_info(ID)])\n      end, IDs),\n\n    CLU = clu:setup(),\n    Ds = clu:device_list(CLU),\n    lists:foreach(\n      fun(D) ->    \n\t      io:format(\"~p\\n\", [cl:get_device_info(D)])\n      end, Ds).\n\n    \n\n"
  },
  {
    "path": "test/cl_noop.erl",
    "content": "%%% @author Tony Rogvall <tony@rogvall.se>\n%%% @copyright (C) 2019, Tony Rogvall\n%%% @doc\n%%%    Test cl nif calling overhead\n%%% @end\n%%% Created : 11 Mar 2019 by Tony Rogvall <tony@rogvall.se>\n\n-module(cl_noop).\n\n-compile(export_all).\n\n\ntest() ->\n    T0 = erlang:monotonic_time(),\n    loop_noop(1000000),\n    T1 = erlang:monotonic_time(),\n    Time1 = erlang:convert_time_unit(T1 - T0, native, microsecond),\n    loop_noop_(1000000),\n    T2 = erlang:monotonic_time(),\n    Time2 = erlang:convert_time_unit(T2 - T1, native, microsecond),\n    loop_dirty_noop(1000000),\n    T3 = erlang:monotonic_time(),\n    Time3 = erlang:convert_time_unit(T3 - T2, native, microsecond),\n    {Time1/1000000, Time2/1000000, Time3/1000000}.\n\nloop_noop(0) -> ok;\nloop_noop(I) ->\n    cl:noop(),\n    loop_noop(I-1).\n\nloop_noop_(0) -> ok;\nloop_noop_(I) ->\n    cl:noop_(),\n    loop_noop_(I-1).\n\nloop_dirty_noop(0) -> ok;\nloop_dirty_noop(I) ->\n    cl:dirty_noop(),\n    loop_dirty_noop(I-1).\n\n    \n    \n"
  },
  {
    "path": "test/cl_test.erl",
    "content": "%%% @author Tony Rogvall <tony@rogvall.se>\n%%% @copyright (C) 2010, Tony Rogvall\n%%% @doc\n%%%\n%%% @end\n%%% Created : 25 Dec 2010 by Tony Rogvall <tony@rogvall.se>\n\n-module(cl_test).\n\n-export([init_per_suite/1, end_per_suite/1]).\n-export([all/0, ct_test0/1, ct_test1/1]).\n\n-export([test0/0, test0/2]).\n-export([test1/0, test1/2]).\n\n-import(lists, [foreach/2]).\n\n-define(BUFFER_SIZE, 1024*256).\n\n-spec init_per_suite(Config0::list(tuple())) ->\n                            (Config1::list(tuple())) | \n                            {skip,Reason::term()} | \n                            {skip_and_save,Reason::term(),\n\t\t\t     Config1::list(tuple())}.\n\ninit_per_suite(Config) -> cl_SUITE:init_per_suite(Config).\n\n-spec end_per_suite(Config::list(tuple())) -> ok.\n\nend_per_suite(_Config) ->\n    ok.\n\n\nall() -> [ct_test0, ct_test1].\n\ntest0() ->\n    test0(cpu, ?BUFFER_SIZE).\n\nct_test0(Config) ->\n    test0(proplists:get_value(type, Config, gpu), ?BUFFER_SIZE).\n\ntest0(Type, Size) ->\n    {ok,[PI|_]} = cl:get_platform_ids(),\n    {ok,[D]} = cl:get_device_ids(PI, Type),\n    {ok,C} = cl:create_context([D]),\n    {ok,Q} = cl:create_queue(C, D, []),\n    {ok,Buf} = cl:create_buffer(C, [read_only], Size),\n    N = Size div 2,\n    Data = make_buffer(N),\n    {ok,E1} = cl:enqueue_write_buffer(Q, Buf, 0, N, Data, []),\n    {ok,E2} = cl:enqueue_write_buffer(Q, Buf, N, N, Data, []),\n    Res1 = cl:wait(E1,3000),\n    io:format(\"Res1 = ~p\\n\", [Res1]),\n    Res2 = cl:wait(E2,3000),\n    io:format(\"Res2 = ~p\\n\", [Res2]),\n    {ok,E3} = cl:enqueue_read_buffer(Q, Buf, 0, N, []),\n    case cl:wait(E3,3000) of\n\t{ok, Data} -> \n\t    io:format(\"read_buffer: verified\\n\"),\n\t    ok;\n\tRes3 ->\n\t    io:format(\"Res3 = ~p\\n\", [Res3])\n    end.\n\nprogram(ok) -> \"\n__kernel void program1(int n, int m) {\n    int result = n + m;\n}\n\";\nprogram(error) -> \"\n__kernel void program1(int n, int m) {\n    int result = n + k;\n}\n\".\n\nct_test1(Config) ->\n    test1(proplists:get_value(type, Config, gpu), ok).\n\ntest1() ->\n    test1(cpu, ok).\n\ntest1(Type, Prog) ->\n    {ok,[PI|_]} = cl:get_platform_ids(),\n    {ok,DeviceList} = cl:get_device_ids(PI, Type),\n    {ok,C} = cl:create_context(DeviceList),\n    {ok,P} = cl:create_program_with_source(C, program(Prog)),\n    io:format(\"Program: ~p\\n\", [P]),\n    {ok,Info} = cl:get_program_info(P),\n    io:format(\"ProgramInfo: ~p\\n\", [Info]),\n    foreach(\n      fun(D) ->\n\t      {ok,BuildInfo} = cl:get_program_build_info(P,D),\n\t      io:format(\"BuildInfo @ ~w: ~p\\n\", [D,BuildInfo])\n      end, DeviceList),\n    case cl:build_program(P, DeviceList, \"-Dhello=1 -Dtest\") of\n\tok ->\n\t    foreach(\n\t      fun(D) ->\n\t\t      {ok,BuildInfo} = cl:get_program_build_info(P,D),\n\t\t      io:format(\"BuildInfo @ ~w: ~p\\n\", [D,BuildInfo])\n\t      end, DeviceList),\n\t    ok;\n\tError ->\n\t    io:format(\"\\n\\nBuild Error: ~p\\n\\n\", [Error]),\n\t    foreach(\n\t      fun(D) ->\n\t\t      {ok,BuildInfo} = cl:get_program_build_info(P,D),\n\t\t      io:format(\"BuildInfo @ ~w: ~p\\n\", [D,BuildInfo])\n\t      end, DeviceList)\n    end.\n\n\n    \n\nmake_buffer(0) ->  <<>>;\nmake_buffer(1) ->  <<1>>;\nmake_buffer(2) ->  <<1,2>>;\nmake_buffer(N) ->\n    Bin = make_buffer(N div 2),\n    if N band 1 =:= 1 ->\n\t    list_to_binary([1,Bin,Bin]);\n       true ->\n\t    list_to_binary([Bin,Bin])\n    end.\n    \n\n\n\n"
  },
  {
    "path": "test/pixop.cl",
    "content": "/* -*- c -*-\n *\n */\n\nfloat4 pixel_over(float4 a, float4 b);\nfloat4 pixel_blend(float4 a, float4 b);\n\nfloat4 pixel_blend(float4 a, float4 b)\n{\n    return a.w*a + (1-a.w)*b;\n}\n\nfloat4 pixel_over(float4 a, float4 b)\n{\n    return a.w*a + (1-a.w)*b.w*b;\n}\n\nkernel void pixmap_over(read_write image2d_t a,\n\t\t\tread_write image2d_t b,\n\t\t\tread_write image2d_t c,\n\t\t\tuint w, uint h)\n{\n    int x = get_global_id(0);\n    int y = get_global_id(1);\n    if ((x < (int)w) && (y < (int)h)) {\n\tint2 coord = {x,y};\n\tfloat4 ap = read_imagef(a, coord);\n\tfloat4 bp = read_imagef(b, coord);\n\tfloat4 cp = pixel_over(ap, bp);\n\twrite_imagef(c, coord, cp);\n    }\n}\n\nkernel void pixmap_blend(read_write image2d_t a,\n\t\t\t read_write image2d_t b,\n\t\t\t read_write image2d_t c,\n\t\t\t uint w, uint h)\n{\n    int x = get_global_id(0);\n    int y = get_global_id(1);\n    if ((x < (int)w) && (y < (int)h)) {\n\tint2 coord = {x,y};\n\tfloat4 ap = read_imagef(a, coord);\n\tfloat4 bp = read_imagef(b, coord);\n\tfloat4 cp = pixel_blend(ap, bp);\n\twrite_imagef(c, coord, cp);\n    }\n}\n"
  }
]