[
  {
    "path": ".gitignore",
    "content": "build/\n"
  },
  {
    "path": ".travis.yml",
    "content": "language: c\ncompiler:\n  - gcc\n  - clang\ncache:\n  directories:\n  - $HOME/OpenBlasInstall\nsudo: false\nenv:\n  - TORCH_LUA_VERSION=LUAJIT21\n  - TORCH_LUA_VERSION=LUA51\n  - TORCH_LUA_VERSION=LUA52\nos:\n  - linux\naddons:\n  apt:\n    packages:\n    - cmake\n    - gfortran\n    - gcc-multilib\n    - gfortran-multilib\n    - liblapack-dev\n    - build-essential\n    - gcc\n    - g++\n    - curl\n    - cmake\n    - libreadline-dev\n    - git-core\n    - libqt4-core\n    - libqt4-gui\n    - libqt4-dev\n    - libjpeg-dev\n    - libpng-dev\n    - ncurses-dev\n    - imagemagick\n    - libzmq3-dev\n    - gfortran\n    - unzip\n    - gnuplot\n    - gnuplot-x11\nbefore_script:\n- export ROOT_TRAVIS_DIR=$(pwd)\n- export INSTALL_PREFIX=~/torch/install\n-  ls $HOME/OpenBlasInstall/lib || (cd /tmp/ && git clone https://github.com/xianyi/OpenBLAS.git -b master && cd OpenBLAS && (make NO_AFFINITY=1 -j$(getconf _NPROCESSORS_ONLN) 2>/dev/null >/dev/null) && make PREFIX=$HOME/OpenBlasInstall install)\n- git clone https://github.com/torch/distro.git ~/torch --recursive\n- cd ~/torch && git submodule update --init --recursive\n- mkdir build && cd build\n- export CMAKE_LIBRARY_PATH=$HOME/OpenBlasInstall/include:$HOME/OpenBlasInstall/lib:$CMAKE_LIBRARY_PATH\n- cmake .. -DCMAKE_INSTALL_PREFIX=\"${INSTALL_PREFIX}\" -DCMAKE_BUILD_TYPE=Release -DWITH_${TORCH_LUA_VERSION}=ON\n- make && make install\n- cd $ROOT_TRAVIS_DIR\n- export LD_LIBRARY_PATH=${INSTALL_PREFIX}/lib:$LD_LIBRARY_PATH\nscript:\n- ${INSTALL_PREFIX}/bin/luarocks make rocks/torch-scm-1.rockspec\n- ${INSTALL_PREFIX}/bin/luarocks install luaffi\n- export PATH=${INSTALL_PREFIX}/bin:$PATH\n- export TESTLUA=$(which luajit lua | head -n 1)\n- ${TESTLUA} -ltorch -e \"t=torch.test(); if t.errors[1] then os.exit(1) end\"\n- cd test\n- ${TESTLUA} test_writeObject.lua\n- ${TESTLUA} test_Tester.lua\n"
  },
  {
    "path": "CMakeLists.txt",
    "content": "IF(APPLE)\n  CMAKE_MINIMUM_REQUIRED(VERSION 2.8.12 FATAL_ERROR)\n  CMAKE_POLICY(VERSION 2.8.12)\nELSE()\n  CMAKE_MINIMUM_REQUIRED(VERSION 2.8 FATAL_ERROR)\n  CMAKE_POLICY(VERSION 2.8)\nENDIF()\n\nSET(CMAKE_MODULE_PATH\n  \"${CMAKE_CURRENT_SOURCE_DIR}/cmake\"\n  \"${CMAKE_MODULE_PATH}\")\n\nIF (NOT MSVC)\n  IF (MINGW)\n    SET(CMAKE_C_FLAGS \"${CMAKE_C_FLAGS} -Werror=format\")\n  ELSE()\n    SET(CMAKE_C_FLAGS \"${CMAKE_C_FLAGS} -Werror=implicit-function-declaration -Werror=format\")\n  ENDIF(MINGW)\nENDIF(NOT MSVC)\n\n# Flags\n# When using MSVC\nIF(MSVC)\n  # we want to respect the standard, and we are bored of those **** .\n  ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE=1)\nENDIF(MSVC)\n\n# OpenMP support?\nSET(WITH_OPENMP ON CACHE BOOL \"OpenMP support if available?\")\nIF (APPLE AND CMAKE_COMPILER_IS_GNUCC)\n  EXEC_PROGRAM (uname ARGS -v  OUTPUT_VARIABLE DARWIN_VERSION)\n  STRING (REGEX MATCH \"[0-9]+\" DARWIN_VERSION ${DARWIN_VERSION})\n  MESSAGE (STATUS \"MAC OS Darwin Version: ${DARWIN_VERSION}\")\n  IF (DARWIN_VERSION GREATER 9)\n    SET(APPLE_OPENMP_SUCKS 1)\n  ENDIF (DARWIN_VERSION GREATER 9)\n  EXECUTE_PROCESS (COMMAND ${CMAKE_C_COMPILER} -dumpversion\n    OUTPUT_VARIABLE GCC_VERSION)\n  IF (APPLE_OPENMP_SUCKS AND GCC_VERSION VERSION_LESS 4.6.2)\n    MESSAGE(STATUS \"Warning: Disabling OpenMP (unstable with this version of GCC)\")\n    MESSAGE(STATUS \" Install GCC >= 4.6.2 or change your OS to enable OpenMP\")\n    SET(CMAKE_C_FLAGS \"${CMAKE_C_FLAGS} -Wno-unknown-pragmas\")\n    SET(WITH_OPENMP OFF CACHE BOOL \"OpenMP support if available?\" FORCE)\n  ENDIF ()\nENDIF ()\n\nIF (WITH_OPENMP)\n  FIND_PACKAGE(OpenMP)\n  IF(OPENMP_FOUND)\n    MESSAGE(STATUS \"Compiling with OpenMP support\")\n    SET(CMAKE_C_FLAGS \"${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}\")\n    SET(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}\")\n    SET(CMAKE_EXE_LINKER_FLAGS \"${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}\")\n  ENDIF(OPENMP_FOUND)\nENDIF (WITH_OPENMP)\n\n# Includes\nINCLUDE(TorchPaths)\nINCLUDE(TorchPathsInit)\nINCLUDE(TorchPackage)\nINCLUDE(TorchWrap)\nINCLUDE(TorchExports)\n\n# Torch libraries\nADD_SUBDIRECTORY(lib)\n\nCONFIGURE_FILE(paths.lua.in \"${CMAKE_CURRENT_BINARY_DIR}/paths.lua\")\n\nINCLUDE_DIRECTORIES(BEFORE \"${LUA_INCDIR}\")\nINCLUDE_DIRECTORIES(BEFORE \"${CMAKE_CURRENT_SOURCE_DIR}/lib/TH\")\nINCLUDE_DIRECTORIES(BEFORE \"${CMAKE_CURRENT_BINARY_DIR}/lib/TH\")\nINCLUDE_DIRECTORIES(BEFORE \"${CMAKE_CURRENT_SOURCE_DIR}/lib/luaT\")\nLINK_DIRECTORIES(\"${LUA_LIBDIR}\")\n\nSET(src DiskFile.c File.c MemoryFile.c PipeFile.c Storage.c Tensor.c Timer.c utils.c init.c TensorOperator.c TensorMath.c random.c Generator.c)\nSET(luasrc init.lua File.lua Tensor.lua CmdLine.lua FFInterface.lua Tester.lua TestSuite.lua ${CMAKE_CURRENT_BINARY_DIR}/paths.lua test/test.lua)\n\n# Necessary do generate wrapper\nADD_TORCH_WRAP(tensormathwrap TensorMath.lua)\nADD_TORCH_WRAP(randomwrap random.lua)\n\nADD_TORCH_PACKAGE(torch \"${src}\" \"${luasrc}\")\n\nTARGET_LINK_LIBRARIES(torch luaT TH)\n\nIF(LUALIB)\n  TARGET_LINK_LIBRARIES(torch ${LUALIB})\nENDIF()\n\nINSTALL(FILES \"README.md\" DESTINATION \"${Torch_INSTALL_LUA_PATH_SUBDIR}/torch\")\nINSTALL(DIRECTORY \"doc\" DESTINATION \"${Torch_INSTALL_LUA_PATH_SUBDIR}/torch\")\n"
  },
  {
    "path": "CONTRIBUTING.md",
    "content": "# Contributing to Torch7 Core (torch7, nn, cutorch, cunn)\n\nThanks a lot! There are plenty of ways you can help!\n\nPlease take a moment to review this document in order to make the contribution\nprocess easy and effective for everyone involved.\n\nFollowing these guidelines helps to communicate that you respect the time of\nthe developers managing and developing this open source project. In return,\nthey should reciprocate that respect in addressing your issue or assessing\npatches and features.\n\n\n## Using the issue tracker\n\nThe [issue tracker](https://github.com/torch/torch7/issues) is\nthe preferred channel for [bug reports](#bugs), [features requests](#features)\nand [submitting pull requests](#pull-requests), but please respect the following\nrestrictions:\n\n* Please **do not** use the issue tracker for personal support requests (use\n  [mailing-list](https://groups.google.com/forum/#!forum/torch7)).\n\n* Please **do not** open issues regarding the code in a torch package \n  outside the core. For example don't open issues about the \n  REPL in the torch7 issue tracker, use the trepl issue tracker for that.\n\n<a name=\"bugs\"></a>\n## Bug reports\n\nA bug is a _demonstrable problem_ that is caused by the code in the repository.\nGood bug reports are extremely helpful - thank you!\n\nGuidelines for bug reports:\n\n1. **Use the GitHub issue search** &mdash; check if the issue has already been\n   reported.\n\n2. **Check if the issue has been fixed** &mdash; try to reproduce it using the\n   latest `master` or development branch in the repository.\n\n3. **Isolate the problem** &mdash; ideally create test case that is within reason,\n   preferably within 100 lines of code.\n\nA good bug report shouldn't leave others needing to chase you up for more\ninformation. Please try to be as detailed as possible in your report. What is\nyour environment? What steps will reproduce the issue? What OS do you\nexperience the problem? What would you expect to be the outcome? All these\ndetails will help people to fix any potential bugs.\n\n<a name=\"features\"></a>\n## Feature requests\n\nFeature requests are welcome to be filed. Torch is community-developed, \nthe maintainers are not exclusive torch developers, so keep that in mind.\nThe purpose of feature requests is for others who are looking to implement\na feature are aware of the interest in the feature.\n\n\n<a name=\"pull-requests\"></a>\n## Pull requests\n\nGood pull requests - patches, improvements, new features - are a fantastic\nhelp. They should remain focused in scope **and avoid containing unrelated\ncommits.**\n\n**Please ask first** before embarking on any significant pull request (e.g.\nimplementing features, refactoring code, porting to a different language),\notherwise you risk spending a lot of time working on something that the\nproject's developers might not want to merge into the project.\n\nPlease adhere to the coding conventions used throughout a project (indentation,\naccurate comments, etc.) and any other requirements (such as test coverage).\n\nAdhering to the following this process is the best way to get your work\nincluded in the project:\n\n1. [Fork](https://help.github.com/articles/fork-a-repo) the project, clone your\n   fork, and configure the remotes:\n\n   ```bash\n   # Clone your fork of the repo into the current directory\n   git clone https://github.com/<your-username>/torch7.git\n   # Navigate to the newly cloned directory\n   cd torch7\n   # Assign the original repo to a remote called \"upstream\"\n   git remote add upstream https://github.com/torch/torch7.git\n   ```\n\n2. If you cloned a while ago, get the latest changes from upstream:\n\n   ```bash\n   git checkout master\n   git pull upstream master\n   ```\n\n3. Create a new topic branch (off the main project development branch) to\n   contain your feature, change, or fix:\n\n   ```bash\n   git checkout -b <topic-branch-name>\n   ```\n\n4. Commit your changes in logical chunks. Please try to adhere to these [git commit\n   message guidelines](http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html)\n   . Use Git's [interactive rebase](https://help.github.com/articles/about-git-rebase)\n   feature to tidy up your commits before making them public. This helps us keep the \n   commit history in logical blocks and clean, as torch grows. \n   For example: \n     - If you are adding a new function or a module, keep the module + tests + doc \n       to a single commit unless logically warranted. \n     - If you are fixing a bug, keep the bugfix to a single commit unless logically warranted.\n\n5. Locally merge (or rebase) the upstream development branch into your topic branch:\n\n   ```bash\n   git pull [--rebase] upstream master\n   ```\n\n6. Push your topic branch up to your fork:\n\n   ```bash\n   git push origin <topic-branch-name>\n   ```\n\n7. [Open a Pull Request](https://help.github.com/articles/using-pull-requests/)\n    with a clear title and description.\n\n**IMPORTANT**: By submitting a patch, you agree to allow the project owners to\nlicense your work under the terms of the BSD License.\n"
  },
  {
    "path": "COPYRIGHT.txt",
    "content": "Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)\nCopyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu)\nCopyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)\nCopyright (c) 2011-2013 NYU (Clement Farabet)\nCopyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston)\nCopyright (c) 2006      Idiap Research Institute (Samy Bengio)\nCopyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz)\n\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without\nmodification, are permitted provided that the following conditions are met:\n\n1. Redistributions of source code must retain the above copyright\n   notice, this list of conditions and the following disclaimer.\n\n2. Redistributions in binary form must reproduce the above copyright\n   notice, this list of conditions and the following disclaimer in the\n   documentation and/or other materials provided with the distribution.\n\n3. Neither the names of Deepmind Technologies, NYU, NEC Laboratories America \n   and IDIAP Research Institute nor the names of its contributors may be \n   used to endorse or promote products derived from this software without \n   specific prior written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\nARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\nLIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\nCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\nSUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\nINTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\nCONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\nARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\nPOSSIBILITY OF SUCH DAMAGE.\n"
  },
  {
    "path": "CmdLine.lua",
    "content": "local CmdLine = torch.class('torch.CmdLine')\n\nlocal function strip(str)\n   return string.match(str, '%-*(.*)')\nend\n\nlocal function pad(str, sz)\n   return str .. string.rep(' ', sz-#str)\nend\n\nfunction CmdLine:error(msg)\n   print('')\n   io.stderr:write(msg)\n   print('')\n   self:help()\n   os.exit(1)\nend\n\nfunction CmdLine:__readArgument__(params, arg, i, nArgument)\n   local argument = self.arguments[nArgument]\n   local value = arg[i]\n\n   if nArgument > #self.arguments then\n      self:error('invalid argument: ' .. value)\n   end\n   if argument.type and type(value) ~= argument.type then\n      self:error('invalid argument type for argument ' .. argument.key .. ' (should be ' .. argument.type .. ')')\n   end\n   params[strip(argument.key)] = value\n   return 1\nend\n\nfunction CmdLine:__readOption__(params, arg, i)\n   local key = arg[i]\n   local option = self.options[key]\n   if not option then\n      self:error('unknown option ' .. key)\n   end\n\n   if option.type and option.type == 'boolean' then\n      params[strip(key)] = not option.default\n      return 1\n   else\n      local value = arg[i+1]\n      if not value then\n         self:error('missing argument for option ' .. key)\n      end\n      if not option.type or option.type == 'string' then\n      elseif option.type == 'number' then\n         value = tonumber(value)\n      else\n         self:error('unknown required option type ' .. option.type)\n      end\n      if not value then\n         self:error('invalid type for option ' .. key .. ' (should be ' .. option.type .. ')')\n      end\n      params[strip(key)] = value\n      return 2\n   end\nend\n\nfunction CmdLine:__init(argseparator_,keyseparator_)\n   self.argseparator = argseparator_ or ','\n   self.keyseparator = keyseparator_ or '='\n   self.options = {}\n   self.arguments = {}\n   self.helplines = {}\n   self.dateformat = nil\n   self.silentio = false\nend\n\nfunction CmdLine:silent()\n   self.silentio = true\nend\n\nfunction CmdLine:addTime(name, format)\n   format = format or '%Y-%m-%d %H:%M:%S'\n   if type(format) ~= 'string' then\n      error('Argument has to be string')\n   end\n   if name ~= nil then\n      name = '[' .. name .. ']: '\n   else\n      name = ''\n   end\n   self.dateformat = format .. name\nend\n\n\nfunction CmdLine:argument(key, help, _type_)\n   table.insert(self.arguments, {key=key, help=help, type=_type_})\n   table.insert(self.helplines, self.arguments[#self.arguments])\nend\n\nfunction CmdLine:option(key, default, help, _type_)\n   if default == nil then\n      error('option ' .. key .. ' has no default value')\n   end\n   _type_ = _type_ or type(default)\n   if type(default) ~= _type_ then\n      error('option ' .. key .. ' has wrong default type value')\n   end\n   self.options[key] = {key=key, default=default, help=help, type=_type_}\n   table.insert(self.helplines, self.options[key])\nend\n\nfunction CmdLine:default()\n   local params = {}\n   for option,v in pairs(self.options) do\n      params[strip(option)] = v.default\n   end\n   return params\nend\n\nfunction CmdLine:parse(arg)\n   local i = 1\n   local params = self:default()\n\n   local nArgument = 0\n\n   while i <= #arg do\n      if arg[i] == '-help' or arg[i] == '-h' or arg[i] == '--help' then\n         self:help(arg)\n         os.exit(0)\n      end\n\n      if self.options[arg[i]] then\n         i = i + self:__readOption__(params, arg, i)\n      else\n         nArgument = nArgument + 1\n         i = i + self:__readArgument__(params, arg, i, nArgument)\n      end\n   end\n\n   if nArgument ~= #self.arguments then\n      self:error('not enough arguments')\n   end\n\n   return params\nend\n\nfunction CmdLine:string(prefix, params, ignore)\n   local arguments = {}\n   local options = {}\n   prefix = prefix or ''\n\n   for k,v in pairs(params) do\n      if ignore[k] then\n         print('-- ignore option ' .. k)\n      elseif self.options['-' .. k] then\n         if v ~= self.options['-' .. k].default or ignore[k] == false then\n            if type(v) == 'boolean' then\n               if v then\n                  v = 't'\n               else\n                  v = 'f'\n               end\n            end\n            table.insert(options, k .. self.keyseparator .. v)\n            print(k,v,self.options['-' .. k].default)\n        end\n       else\n         local narg\n         for i=1,#self.arguments do\n            if strip(self.arguments[i].key) == k then\n               narg = i\n            end\n         end\n         if narg then\n            arguments[narg] = k .. self.keyseparator .. v\n         else\n            print('WARNING: unknown option/argument: ' .. k .. ' IGNORING for DIRECTORY NAME')\n         end\n      end\n   end\n   table.sort(options)\n   local str = table.concat(arguments, self.argseparator)\n   if str == '' then\n      str = table.concat(options, self.argseparator)\n   else\n      str = str .. self.argseparator .. table.concat(options, self.argseparator)\n   end\n   if str == '' then\n      return prefix\n   else\n      return prefix .. self.argseparator .. str\n   end\nend\n\nlocal oprint = nil\nfunction CmdLine:log(file, params)\n   local f = (io.type(file) == 'file' and file) or io.open(file, 'w')\n   oprint = oprint or print -- get the current print function lazily\n   function print(...)\n      local n = select(\"#\", ...)\n      local arg = {...}\n      if not self.silentio then\n\t oprint(...)\n      end\n      local str = {}\n      if self.dateformat then\n\t table.insert(str, os.date(self.dateformat))\n      end\n      for i=1,n do\n\t table.insert(str,tostring(arg[i]))\n      end\n      table.insert(str,'\\n')\n      f:write(table.concat(str,' '))\n      f:flush()\n   end\n   print('[program started on ' .. os.date() .. ']')\n   print('[command line arguments]')\n   if params then\n      for k,v in pairs(params) do\n         print(k,v)\n      end\n   end\n   print('[----------------------]')\nend\n\nfunction CmdLine:text(txt)\n   txt = txt or ''\n   assert(type(txt) == 'string')\n   table.insert(self.helplines, txt)\nend\n\nfunction CmdLine:help(arg)\n   io.write('Usage: ')\n   if arg then io.write(arg[0] .. ' ') end\n   io.write('[options]')\n   for i=1,#self.arguments do\n      io.write(' <' .. strip(self.arguments[i].key) .. '>')\n   end\n   io.write('\\n')\n\n   -- first pass to compute max length\n   local optsz = 0\n   for _,option in ipairs(self.helplines) do\n      if type(option) == 'table' then\n         if option.default ~= nil then -- it is an option\n            if #option.key > optsz then\n               optsz = #option.key\n            end\n         else -- it is an argument\n            if #strip(option.key)+2 > optsz then\n               optsz = #strip(option.key)+2\n            end\n         end\n      end\n   end\n\n   -- second pass to print\n   for _,option in ipairs(self.helplines) do\n      if type(option) == 'table' then\n         io.write('  ')\n         if option.default ~= nil then -- it is an option\n            io.write(pad(option.key, optsz))\n            if option.help then io.write(' ' .. option.help) end\n            io.write(' [' .. tostring(option.default) .. ']')\n         else -- it is an argument\n            io.write(pad('<' .. strip(option.key) .. '>', optsz))\n            if option.help then io.write(' ' .. option.help) end\n         end\n      else\n         io.write(option) -- just some additional help\n      end\n      io.write('\\n')\n   end\nend\n"
  },
  {
    "path": "DiskFile.c",
    "content": "#include \"general.h\"\n\nstatic int torch_DiskFile_new(lua_State *L)\n{\n  const char *name = luaL_checkstring(L, 1);\n  const char *mode = luaL_optstring(L, 2, \"r\");\n  int isQuiet = luaT_optboolean(L, 3, 0);\n  THFile *self = THDiskFile_new(name, mode, isQuiet);\n\n  luaT_pushudata(L, self, \"torch.DiskFile\");\n  return 1;\n}\n\nstatic int torch_DiskFile_free(lua_State *L)\n{\n  THFile *self = luaT_checkudata(L, 1, \"torch.DiskFile\");\n  THFile_free(self);\n  return 0;\n}\n\nstatic int torch_DiskFile_isLittleEndianCPU(lua_State *L)\n{\n  lua_pushboolean(L, THDiskFile_isLittleEndianCPU());\n  return 1;\n}\n\nstatic int torch_DiskFile_isBigEndianCPU(lua_State *L)\n{\n  lua_pushboolean(L, !THDiskFile_isLittleEndianCPU());\n  return 1;\n}\n\nstatic int torch_DiskFile_nativeEndianEncoding(lua_State *L)\n{\n  THFile *self = luaT_checkudata(L, 1, \"torch.DiskFile\");\n  THDiskFile_nativeEndianEncoding(self);\n  lua_settop(L, 1);\n  return 1;\n}\n\nstatic int torch_DiskFile_littleEndianEncoding(lua_State *L)\n{\n  THFile *self = luaT_checkudata(L, 1, \"torch.DiskFile\");\n  THDiskFile_littleEndianEncoding(self);\n  lua_settop(L, 1);\n  return 1;\n}\n\nstatic int torch_DiskFile_bigEndianEncoding(lua_State *L)\n{\n  THFile *self = luaT_checkudata(L, 1, \"torch.DiskFile\");\n  THDiskFile_bigEndianEncoding(self);\n  lua_settop(L, 1);\n  return 1;\n}\n\nstatic int torch_DiskFile_longSize(lua_State *L)\n{\n  THFile *self = luaT_checkudata(L, 1, \"torch.DiskFile\");\n  THDiskFile_longSize(self, lua_tointeger(L, 2));\n  lua_settop(L, 1);\n  return 1;\n}\n\nstatic int torch_DiskFile_noBuffer(lua_State *L)\n{\n  THFile *self = luaT_checkudata(L, 1, \"torch.DiskFile\");\n  THDiskFile_noBuffer(self);\n  lua_settop(L, 1);\n  return 1;\n}\n\nstatic int torch_DiskFile___tostring__(lua_State *L)\n{\n  THFile *self = luaT_checkudata(L, 1, \"torch.DiskFile\");\n  lua_pushfstring(L, \"torch.DiskFile on <%s> [status: %s -- mode %c%c]\",\n                  THDiskFile_name(self),\n                  (THFile_isOpened(self) ? \"open\" : \"closed\"),\n                  (THFile_isReadable(self) ? 'r' : ' '),\n                  (THFile_isWritable(self) ? 'w' : ' '));\n\n  return 1;\n}\nstatic const struct luaL_Reg torch_DiskFile__ [] = {\n  {\"isLittleEndianCPU\", torch_DiskFile_isLittleEndianCPU},\n  {\"isBigEndianCPU\", torch_DiskFile_isBigEndianCPU},\n  {\"nativeEndianEncoding\", torch_DiskFile_nativeEndianEncoding},\n  {\"littleEndianEncoding\", torch_DiskFile_littleEndianEncoding},\n  {\"bigEndianEncoding\", torch_DiskFile_bigEndianEncoding},\n  {\"longSize\", torch_DiskFile_longSize},\n  {\"noBuffer\", torch_DiskFile_noBuffer},\n  {\"__tostring__\", torch_DiskFile___tostring__},\n  {NULL, NULL}\n};\n\nvoid torch_DiskFile_init(lua_State *L)\n{\n  luaT_newmetatable(L, \"torch.DiskFile\", \"torch.File\",\n                    torch_DiskFile_new, torch_DiskFile_free, NULL);\n\n  luaT_setfuncs(L, torch_DiskFile__, 0);\n  lua_pop(L, 1);\n}\n"
  },
  {
    "path": "FFInterface.lua",
    "content": "-- if this causes issues, you may need to:\n-- luarocks remove --force ffi\n-- and follow instructions to install\n-- https://github.com/facebook/luaffifb\nlocal ok, ffi = pcall(require, 'ffi')\n\nlocal function checkArgument(condition, fn, ud, msg, level)\n   local level = level or 3\n   if not condition then\n      error(\"bad argument #\" .. ud .. \" to '\" .. fn .. \"' (\" .. msg .. \")\", level)\n   end\nend\n\nlocal function checkArgumentType(expected, actual, fn, ud, level)\n   local level = level or 3\n   if expected ~= actual then\n      checkArgument(false, fn, ud, expected .. \" expected, got \" .. actual, level + 1)\n   end\nend\n\nif ok then\n\n   local Real2real = {\n      Byte='unsigned char',\n      Char='char',\n      Short='short',\n      Int='int',\n      Long='long',\n      Float='float',\n      Double='double',\n      Half='THHalf'\n   }\n\n   -- Allocator\n   ffi.cdef[[\ntypedef struct THAllocator {\n  void* (*malloc)(void*, ptrdiff_t);\n  void* (*realloc)(void*, void*, ptrdiff_t);\n  void (*free)(void*, void*);\n} THAllocator;\n]]\n\n   -- Half\n   ffi.cdef[[\ntypedef struct {\n  unsigned short x;\n} __THHalf;\ntypedef __THHalf THHalf;\n]]\n\n   -- Storage\n   for Real, real in pairs(Real2real) do\n\n      local cdefs = [[\ntypedef struct THRealStorage\n{\n    real *data;\n    ptrdiff_t size;\n    int refcount;\n    char flag;\n    THAllocator *allocator;\n    void *allocatorContext;\n} THRealStorage;\n]]\n      cdefs = cdefs:gsub('Real', Real):gsub('real', real)\n      ffi.cdef(cdefs)\n\n      local Storage = torch.getmetatable(string.format('torch.%sStorage', Real))\n      local Storage_tt = ffi.typeof('TH' .. Real .. 'Storage**')\n\n      rawset(Storage,\n             \"cdata\",\n             function(self)\n                return Storage_tt(self)[0]\n             end)\n\n      rawset(Storage,\n             \"data\",\n             function(self)\n                return Storage_tt(self)[0].data\n             end)\n   end\n\n   -- Tensor\n   for Real, real in pairs(Real2real) do\n\n      local cdefs = [[\ntypedef struct THRealTensor\n{\n    long *size;\n    long *stride;\n    int nDimension;\n\n    THRealStorage *storage;\n    ptrdiff_t storageOffset;\n    int refcount;\n\n    char flag;\n\n} THRealTensor;\n]]\n      cdefs = cdefs:gsub('Real', Real):gsub('real', real)\n      ffi.cdef(cdefs)\n\n      local Tensor_type = string.format('torch.%sTensor', Real)\n      local Tensor = torch.getmetatable(Tensor_type)\n      local Tensor_tt = ffi.typeof('TH' .. Real .. 'Tensor**')\n\n      rawset(Tensor,\n             \"cdata\",\n             function(self)\n                if not self then return nil; end\n                return Tensor_tt(self)[0]\n             end)\n\n      rawset(Tensor,\n             \"data\",\n             function(self)\n                if not self then return nil; end\n                self = Tensor_tt(self)[0]\n                return self.storage ~= nil and self.storage.data + self.storageOffset or nil\n             end)\n\n      -- faster apply (contiguous case)\n      if Tensor_type ~= 'torch.HalfTensor' then\n         local apply = Tensor.apply\n         rawset(Tensor,\n                \"apply\",\n                function(self, func)\n                   if self:isContiguous() and self.data then\n                      local self_d = self:data()\n                      for i=0,self:nElement()-1 do\n                         local res = func(tonumber(self_d[i])) -- tonumber() required for long...\n                         if res then\n                            self_d[i] = res\n                         end\n                      end\n                      return self\n                   else\n                      return apply(self, func)\n                   end\n                end)\n\n         -- faster map (contiguous case)\n         local map = Tensor.map\n         rawset(Tensor,\n                \"map\",\n                function(self, src, func)\n                   checkArgument(torch.isTensor(src), \"map\", 1, \"tensor expected\")\n                   checkArgumentType(self:type(), src:type(), \"map\", 1)\n\n                   if self:isContiguous() and src:isContiguous() and self.data and src.data then\n                      local self_d = self:data()\n                      local src_d = src:data()\n                      assert(src:nElement() == self:nElement(), 'size mismatch')\n                      for i=0,self:nElement()-1 do\n                         local res = func(tonumber(self_d[i]), tonumber(src_d[i])) -- tonumber() required for long...\n                         if res then\n                            self_d[i] = res\n                         end\n                      end\n                      return self\n                   else\n                      return map(self, src, func)\n                   end\n                end)\n\n         -- faster map2 (contiguous case)\n         local map2 = Tensor.map2\n         rawset(Tensor,\n                \"map2\",\n                function(self, src1, src2, func)\n                   checkArgument(torch.isTensor(src1), \"map\", 1, \"tensor expected\")\n                   checkArgument(torch.isTensor(src2), \"map\", 2, \"tensor expected\")\n                   checkArgumentType(self:type(), src1:type(), \"map\", 1)\n                   checkArgumentType(self:type(), src2:type(), \"map\", 2)\n\n                   if self:isContiguous() and src1:isContiguous() and src2:isContiguous() and self.data and src1.data and src2.data then\n                      local self_d = self:data()\n                     local src1_d = src1:data()\n                      local src2_d = src2:data()\n                      assert(src1:nElement() == self:nElement(), 'size mismatch')\n                      assert(src2:nElement() == self:nElement(), 'size mismatch')\n                      for i=0,self:nElement()-1 do\n                         local res = func(tonumber(self_d[i]), tonumber(src1_d[i]), tonumber(src2_d[i])) -- tonumber() required for long...\n                         if res then\n                            self_d[i] = res\n                         end\n                      end\n                      return self\n                   else\n                      return map2(self, src1, src2, func)\n                   end\n                end)\n             end\n   end\n\n   -- torch.data\n   -- will fail if :data() is not defined\n   function torch.data(self, asnumber)\n      if not self then return nil; end\n      local data = self:data()\n      if asnumber then\n         return ffi.cast('intptr_t', data)\n      else\n         return data\n      end\n   end\n\n   -- torch.cdata\n   -- will fail if :cdata() is not defined\n   function torch.cdata(self, asnumber)\n      if not self then return nil; end\n      local cdata = self:cdata()\n      if asnumber then\n         return ffi.cast('intptr_t', cdata)\n      else\n         return cdata\n      end\n   end\n\nend\n"
  },
  {
    "path": "File.c",
    "content": "#include \"general.h\"\n#include \"THFile.h\"\n#include \"luaT.h\"\n\n#define IMPLEMENT_TORCH_FILE_FLAG(NAME)                   \\\n  static int torch_File_##NAME(lua_State *L)              \\\n  {                                                       \\\n    THFile *self = luaT_checkudata(L, 1, \"torch.File\");  \\\n    lua_pushboolean(L, THFile_##NAME(self));              \\\n    return 1;                                             \\\n  }\n\nIMPLEMENT_TORCH_FILE_FLAG(isQuiet)\nIMPLEMENT_TORCH_FILE_FLAG(isReadable)\nIMPLEMENT_TORCH_FILE_FLAG(isWritable)\nIMPLEMENT_TORCH_FILE_FLAG(isBinary)\nIMPLEMENT_TORCH_FILE_FLAG(isAutoSpacing)\nIMPLEMENT_TORCH_FILE_FLAG(hasError)\n\n#define IMPLEMENT_TORCH_FILE_FUNC(NAME)                   \\\n  static int torch_File_##NAME(lua_State *L)              \\\n  {                                                       \\\n    THFile *self = luaT_checkudata(L, 1, \"torch.File\");  \\\n    THFile_##NAME(self);                                  \\\n    lua_settop(L, 1);                                     \\\n    return 1;                                             \\\n  }\n\nIMPLEMENT_TORCH_FILE_FUNC(binary)\nIMPLEMENT_TORCH_FILE_FUNC(ascii)\nIMPLEMENT_TORCH_FILE_FUNC(autoSpacing)\nIMPLEMENT_TORCH_FILE_FUNC(noAutoSpacing)\nIMPLEMENT_TORCH_FILE_FUNC(quiet)\nIMPLEMENT_TORCH_FILE_FUNC(pedantic)\nIMPLEMENT_TORCH_FILE_FUNC(clearError)\n\nIMPLEMENT_TORCH_FILE_FUNC(synchronize)\n\nstatic int torch_File_seek(lua_State *L)\n{\n  THFile *self = luaT_checkudata(L, 1, \"torch.File\");\n  ptrdiff_t position = luaL_checkinteger(L, 2)-1;\n  // >= 0 because it has 1 already subtracted\n  THArgCheck(position >= 0, 2, \"position has to be greater than 0!\");\n  THFile_seek(self, (size_t)position);\n  lua_settop(L, 1);\n  return 1;\n}\n\nIMPLEMENT_TORCH_FILE_FUNC(seekEnd)\n\nstatic int torch_File_position(lua_State *L)\n{\n  THFile *self = luaT_checkudata(L, 1, \"torch.File\");\n  lua_pushnumber(L, THFile_position(self)+1);\n  return 1;\n}\n\nIMPLEMENT_TORCH_FILE_FUNC(close)\n\n#define IMPLEMENT_TORCH_FILE_RW(TYPEC, TYPE)                            \\\n  static int torch_File_read##TYPEC(lua_State *L)                       \\\n  {                                                                     \\\n    THFile *self = luaT_checkudata(L, 1, \"torch.File\");                \\\n    int narg = lua_gettop(L);                                           \\\n                                                                        \\\n    if(narg == 1)                                                       \\\n    {                                                                   \\\n      lua_pushnumber(L, THFile_read##TYPEC##Scalar(self));              \\\n      return 1;                                                         \\\n    }                                                                   \\\n    else if(narg == 2)                                                  \\\n    {                                                                   \\\n      if(lua_isnumber(L, 2))                                            \\\n      {                                                                 \\\n        ptrdiff_t size = lua_tonumber(L, 2);                                 \\\n        ptrdiff_t nread;                                                     \\\n                                                                        \\\n        TH##TYPEC##Storage *storage = TH##TYPEC##Storage_newWithSize(size); \\\n        luaT_pushudata(L, storage, \"torch.\" #TYPEC \"Storage\");          \\\n        nread = THFile_read##TYPEC(self, storage);                      \\\n        if(nread != size)                                               \\\n          TH##TYPEC##Storage_resize(storage, nread);                    \\\n        return 1;                                                       \\\n      }                                                                 \\\n      else if(luaT_toudata(L, 2, \"torch.\" #TYPEC \"Storage\"))            \\\n      {                                                                 \\\n        TH##TYPEC##Storage *storage = luaT_toudata(L, 2, \"torch.\" #TYPEC \"Storage\"); \\\n        lua_pushnumber(L, THFile_read##TYPEC(self, storage));           \\\n        return 1;                                                       \\\n      }                                                                 \\\n    }                                                                   \\\n                                                                        \\\n    luaL_error(L, \"nothing, number, or \" #TYPEC \"Storage expected\");    \\\n    return 0;                                                           \\\n  }                                                                     \\\n                                                                        \\\n  static int torch_File_write##TYPEC(lua_State *L)                      \\\n  {                                                                     \\\n    THFile *self = luaT_checkudata(L, 1, \"torch.File\");                \\\n    int narg = lua_gettop(L);                                           \\\n                                                                        \\\n    if(narg == 2)                                                       \\\n    {                                                                   \\\n      if(lua_isnumber(L, 2))                                            \\\n      {                                                                 \\\n        TYPE value = lua_tonumber(L, 2);                                \\\n        THFile_write##TYPEC##Scalar(self, (TYPE)value);                 \\\n        return 0;                                                       \\\n      }                                                                 \\\n      else if(luaT_toudata(L, 2, \"torch.\" #TYPEC \"Storage\"))            \\\n      {                                                                 \\\n        TH##TYPEC##Storage *storage = luaT_toudata(L, 2, \"torch.\" #TYPEC \"Storage\"); \\\n        lua_pushnumber(L, THFile_write##TYPEC(self, storage));          \\\n        return 1;                                                       \\\n      }                                                                 \\\n    }                                                                   \\\n                                                                        \\\n    luaL_error(L, \"number, or \" #TYPEC \"Storage expected\");             \\\n    return 0;                                                           \\\n  }\n\n\nIMPLEMENT_TORCH_FILE_RW(Byte, unsigned char)\nIMPLEMENT_TORCH_FILE_RW(Char, char)\nIMPLEMENT_TORCH_FILE_RW(Short, short)\nIMPLEMENT_TORCH_FILE_RW(Int, int)\nIMPLEMENT_TORCH_FILE_RW(Long, long)\nIMPLEMENT_TORCH_FILE_RW(Float, float)\nIMPLEMENT_TORCH_FILE_RW(Double, double)\n\nstatic int torch_File_readString(lua_State *L)\n{\n  THFile *self = luaT_checkudata(L, 1, \"torch.File\");\n  const char *format = luaL_checkstring(L, 2);\n  char *str;\n  ptrdiff_t size;\n\n  size = THFile_readStringRaw(self, format, &str);\n  lua_pushlstring(L, str, size);\n  THFree(str);\n\n  return 1;\n}\n\nstatic int torch_File_writeString(lua_State *L)\n{\n  THFile *self = luaT_checkudata(L, 1, \"torch.File\");\n  const char *str = NULL;\n  size_t size;\n\n  luaL_checktype(L, 2, LUA_TSTRING);\n  str = lua_tolstring(L, 2, &size);\n  lua_pushnumber(L, THFile_writeStringRaw(self, str, size));\n  return 1;\n}\n\nstatic const struct luaL_Reg torch_File__ [] = {\n  {\"isQuiet\", torch_File_isQuiet},\n  {\"isReadable\", torch_File_isReadable},\n  {\"isWritable\", torch_File_isWritable},\n  {\"isBinary\", torch_File_isBinary},\n  {\"isAutoSpacing\", torch_File_isAutoSpacing},\n  {\"hasError\", torch_File_hasError},\n  {\"binary\", torch_File_binary},\n  {\"ascii\", torch_File_ascii},\n  {\"autoSpacing\", torch_File_autoSpacing},\n  {\"noAutoSpacing\", torch_File_noAutoSpacing},\n  {\"quiet\", torch_File_quiet},\n  {\"pedantic\", torch_File_pedantic},\n  {\"clearError\", torch_File_clearError},\n\n  /* DEBUG: CHECK DISK FREE & READ/WRITE STRING*/\n\n  {\"readByte\", torch_File_readByte},\n  {\"readChar\", torch_File_readChar},\n  {\"readShort\", torch_File_readShort},\n  {\"readInt\", torch_File_readInt},\n  {\"readLong\", torch_File_readLong},\n  {\"readFloat\", torch_File_readFloat},\n  {\"readDouble\", torch_File_readDouble},\n  {\"readString\", torch_File_readString},\n\n  {\"writeByte\", torch_File_writeByte},\n  {\"writeChar\", torch_File_writeChar},\n  {\"writeShort\", torch_File_writeShort},\n  {\"writeInt\", torch_File_writeInt},\n  {\"writeLong\", torch_File_writeLong},\n  {\"writeFloat\", torch_File_writeFloat},\n  {\"writeDouble\", torch_File_writeDouble},\n  {\"writeString\", torch_File_writeString},\n\n  {\"synchronize\", torch_File_synchronize},\n  {\"seek\", torch_File_seek},\n  {\"seekEnd\", torch_File_seekEnd},\n  {\"position\", torch_File_position},\n  {\"close\", torch_File_close},\n\n  {NULL, NULL}\n};\n\nvoid torch_File_init(lua_State *L)\n{\n  luaT_newmetatable(L, \"torch.File\", NULL, NULL, NULL, NULL);\n  luaT_setfuncs(L, torch_File__, 0);\n  lua_pop(L, 1);\n}\n"
  },
  {
    "path": "File.lua",
    "content": "local File = torch.getmetatable('torch.File')\n\nfunction File:writeBool(value)\n   if value then\n      self:writeInt(1)\n   else\n      self:writeInt(0)\n   end\nend\n\nfunction File:readBool()\n   return (self:readInt() == 1)\nend\n\nlocal TYPE_NIL      = 0\nlocal TYPE_NUMBER   = 1\nlocal TYPE_STRING   = 2\nlocal TYPE_TABLE    = 3\nlocal TYPE_TORCH    = 4\nlocal TYPE_BOOLEAN  = 5\nlocal TYPE_FUNCTION = 6\nlocal TYPE_RECUR_FUNCTION = 8\nlocal LEGACY_TYPE_RECUR_FUNCTION = 7\n\n-- Lua 5.2 compatibility\nlocal loadstring = loadstring or load\n\nfunction File:isWritableObject(object)\n   local typename = type(object)\n   local typeidx\n   if type(object) ~= 'boolean' and not object then\n      typeidx = TYPE_NIL\n   elseif torch.typename(object) and torch.factory(torch.typename(object)) then\n      typeidx = TYPE_TORCH\n   elseif typename == 'table' then\n      typeidx = TYPE_TABLE\n   elseif typename == 'number' then\n      typeidx = TYPE_NUMBER\n   elseif typename == 'string' then\n      typeidx = TYPE_STRING\n   elseif typename == 'boolean' then\n      typeidx = TYPE_BOOLEAN\n   elseif typename == 'function' and pcall(string.dump, object) then\n      typeidx = TYPE_RECUR_FUNCTION\n   end\n   return typeidx\nend\n\nfunction File:referenced(ref)\n   -- we use an environment to keep a record of written objects\n   if not torch.getenv(self).writeObjects then\n      torch.setenv(self, {\n            writeObjects={}, writeObjectsRef={},\n            readObjects={},\n            objectNameStack={},\n            upvalueRefToId={}, upvalueIdToClosure={},\n         })\n   end\n   local env = torch.getenv(self)\n   env.force = not ref\n   torch.setenv(self,env)\n   return self\nend\n\nfunction File:isReferenced()\n   -- if no environment, then no forcing setup yet\n   if not torch.getenv(self).writeObjects then\n      return true\n   end\n   local env = torch.getenv(self)\n   return not env.force\nend\n\nlocal function getmetamethod(obj, name)\n   local func\n   local status\n\n   -- check getmetatable(obj).__name or\n   -- check getmetatable(obj).name\n   status, func = pcall(\n      function()\n         -- note that sometimes the metatable is hidden\n         -- we get it for sure through the torch type system\n         local mt = torch.getmetatable(torch.typename(obj))\n         if mt then\n            return mt['__' .. name] or mt[name]\n         end\n      end\n   )\n   if status and type(func) == 'function' then\n      return func\n   end\nend\n\nlocal UPVALUES_TOKEN = {} -- unique object\nlocal function formatStack(objectNameStack)\n   -- Format object name stack skipping UPVALUES_TOKEN and upvalue index\n   local parts = {}\n   for i, v in ipairs(objectNameStack) do\n      if v ~= UPVALUES_TOKEN and objectNameStack[i-1] ~= UPVALUES_TOKEN then\n         table.insert(parts, v)\n      end\n   end\n   return table.concat(parts, '.')\nend\n\nfunction File:writeObject(object, debugname, hook)\n   -- define a default hook function if not provided\n   hook = hook or function(object) return object end\n   -- we use an environment to keep a record of written objects\n   if not torch.getenv(self).writeObjects then\n      torch.setenv(self, {\n            writeObjects={}, writeObjectsRef={},\n            readObjects={},\n            objectNameStack={},\n            upvalueRefToId={}, upvalueIdToClosure={},\n         })\n   end\n   -- That guy is used for references' book-keeping\n   local sobject = object\n   -- That guy is the object that is actually persisted\n   -- hook(object) can be used to modify the object before writing it to the file.\n   -- Useful for serializing objects under a config\n   -- that we want to deserialize safely under another config.\n   -- (e.g. Cuda to Float tensors, cudnn to nn, ...)\n   object = hook(object)\n   local force = torch.getenv(self).force\n\n   -- if nil object, only write the type and return\n   if type(object) ~= 'boolean' and not object then\n      self:writeInt(TYPE_NIL)\n      return\n   end\n\n   local objectNameStack = torch.getenv(self).objectNameStack\n   table.insert(objectNameStack, debugname or '<?>')\n\n   -- check the type we are dealing with\n   local typeidx = self:isWritableObject(object)\n   if not typeidx then\n      error(string.format('Unwritable object <%s> at %s', type(object), formatStack(objectNameStack)))\n   end\n   self:writeInt(typeidx)\n\n   if typeidx == TYPE_NUMBER then\n      self:writeDouble(object)\n   elseif typeidx == TYPE_BOOLEAN then\n      self:writeBool(object)\n   elseif typeidx == TYPE_STRING then\n      local stringStorage = torch.CharStorage():string(object)\n      self:writeInt(#stringStorage)\n      self:writeChar(stringStorage)\n   elseif typeidx == TYPE_TORCH or typeidx == TYPE_TABLE or  typeidx == TYPE_RECUR_FUNCTION then\n      -- check it exists already (we look at the pointer!)\n      local objects = torch.getenv(self).writeObjects\n      local objectsRef = torch.getenv(self).writeObjectsRef\n      local index = objects[torch.pointer(sobject)]\n\n      if index and (not force) then\n         -- if already exists, write only its index\n         self:writeInt(index)\n      else\n         -- else write the object itself\n         index = objects.nWriteObject or 0\n         index = index + 1\n         if not force then\n            objects[torch.pointer(sobject)] = index\n            objectsRef[object] = index -- we make sure the object is not going to disappear\n         end\n         self:writeInt(index)\n         objects.nWriteObject = index\n         if typeidx == TYPE_RECUR_FUNCTION then\n            local upvalueRefToId = torch.getenv(self).upvalueRefToId\n            -- Unique ID for each ref since lightuserdata are not serializable\n            local nextId = 1\n            for _ in pairs(upvalueRefToId) do nextId=nextId+1 end\n            local upvalues = {}\n            local counter = 0\n            while true do\n               counter = counter + 1\n               local name,value = debug.getupvalue(object, counter)\n               if not name then break end\n               if name == '_ENV' then value = nil end\n               local id=nil\n               -- debug.upvalueid exists only for lua>=5.2 and luajit\n               if debug.upvalueid then\n                  local upvalueRef = debug.upvalueid(object, counter)\n                  if not upvalueRefToId[upvalueRef] then\n                     upvalueRefToId[upvalueRef] = nextId\n                     nextId = nextId + 1\n                  end\n                  id = upvalueRefToId[upvalueRef]\n               end\n               table.insert(upvalues, {name=name, id=id, value=value})\n            end\n            local dumped = string.dump(object)\n            local stringStorage = torch.CharStorage():string(dumped)\n            self:writeInt(#stringStorage)\n            self:writeChar(stringStorage)\n            self:writeObject(upvalues, UPVALUES_TOKEN, hook)\n         elseif typeidx == TYPE_TORCH then\n            local version   = torch.CharStorage():string('V ' .. torch.version(object))\n            local className = torch.CharStorage():string(torch.typename(object))\n            self:writeInt(#version)\n            self:writeChar(version)\n            self:writeInt(#className)\n            self:writeChar(className)\n            local write = getmetamethod(object, 'write')\n            if write then\n               write(object, self)\n            elseif type(object) == 'table' then\n               local var = {}\n               for k,v in pairs(object) do\n                  if self:isWritableObject(v) then\n                     var[k] = v\n                  else\n                     print(string.format('$ Warning: cannot write object field <%s> of <%s> %s', k, torch.typename(object), formatStack(objectNameStack)))\n                  end\n               end\n               self:writeObject(var, torch.typename(object), hook)\n            else\n               error(string.format('<%s> is a non-serializable Torch object %s', torch.typename(object), formatStack(objectNameStack)))\n            end\n         else -- it is a table\n            local size = 0; for k,v in pairs(object) do size = size + 1 end\n            self:writeInt(size)\n            for k,v in pairs(object) do\n               self:writeObject(k, nil, hook)\n               local name = (type(k) == 'string' or type(k) == 'number') and tostring(k) or nil\n               -- special case name for upvalues\n               if objectNameStack[#objectNameStack-1] == UPVALUES_TOKEN and\n                  name == 'value' and type(object.name) == 'string' then\n                  name = object.name\n               end\n               self:writeObject(v, name, hook)\n            end\n         end\n      end\n   else\n      error('Unwritable object')\n   end\n   table.remove(objectNameStack)\nend\n\nfunction File:readObject()\n   -- we use an environment to keep a record of read objects\n   if not torch.getenv(self).writeObjects then\n      torch.setenv(self, {\n            writeObjects={}, writeObjectsRef={},\n            readObjects={},\n            objectNameStack={},\n            upvalueRefToId={}, upvalueIdToClosure={},\n         })\n   end\n\n   local force = torch.getenv(self).force\n\n   -- read the typeidx\n   local typeidx = self:readInt()\n\n   -- is it nil?\n   if typeidx == TYPE_NIL then\n      return nil\n   end\n\n   if typeidx == TYPE_NUMBER then\n      return self:readDouble()\n   elseif typeidx == TYPE_BOOLEAN then\n      return self:readBool()\n   elseif typeidx == TYPE_STRING then\n      local size = self:readInt()\n      return self:readChar(size):string()\n   elseif typeidx == TYPE_FUNCTION then\n       local size = self:readInt()\n       local dumped = self:readChar(size):string()\n       local func, err = loadstring(dumped)\n       if not func then\n          io.stderr:write(string.format('Warning: Failed to load function from bytecode: %s', err))\n       end\n       local upvalues = self:readObject()\n       for index,upvalue in ipairs(upvalues) do\n          debug.setupvalue(func, index, upvalue)\n       end\n       return func\n   elseif typeidx == TYPE_TABLE or typeidx == TYPE_TORCH or typeidx == TYPE_RECUR_FUNCTION or typeidx == LEGACY_TYPE_RECUR_FUNCTION then\n      -- read the index\n      local index = self:readInt()\n\n      -- check it is loaded already\n      local objects = torch.getenv(self).readObjects\n      if objects[index] and not force then\n         return objects[index]\n      end\n\n      -- otherwise read it\n      if typeidx == TYPE_RECUR_FUNCTION or typeidx == LEGACY_TYPE_RECUR_FUNCTION then\n         local size = self:readInt()\n         local dumped = self:readChar(size):string()\n         local func, err = loadstring(dumped)\n         if not func then\n\t    io.stderr:write(string.format('Warning: Failed to load function from bytecode: %s', err))\n         end\n         if not force then\n             objects[index] = func\n         end\n         local upvalueIdToClosure = torch.getenv(self).upvalueIdToClosure\n         local upvalues = self:readObject()\n         for index,upvalue in ipairs(upvalues) do\n            if typeidx == LEGACY_TYPE_RECUR_FUNCTION then\n               debug.setupvalue(func, index, upvalue)\n            elseif upvalue.name == '_ENV' then\n               debug.setupvalue(func, index, _ENV)\n            else\n               debug.setupvalue(func, index, upvalue.value)\n               -- debug.upvaluejoin exists only for lua>=5.2 and luajit\n               if debug.upvaluejoin and upvalue.id then\n                  if upvalueIdToClosure[upvalue.id] then\n                     -- This upvalue is linked to another one\n                     local otherClosure = upvalueIdToClosure[upvalue.id]\n                     debug.upvaluejoin(func, index, otherClosure.func, otherClosure.index)\n                  else\n                     -- Save this closure for next time\n                     upvalueIdToClosure[upvalue.id] = {\n                        func = func,\n                        index = index,\n                     }\n                  end\n               end\n            end\n         end\n         return func\n      elseif typeidx == TYPE_TORCH then\n         local version, className, versionNumber\n         version = self:readChar(self:readInt()):string()\n         versionNumber = tonumber(string.match(version, '^V (.*)$'))\n         if not versionNumber then\n            className = version\n            versionNumber = 0 -- file created before existence of versioning system\n         else\n            className = self:readChar(self:readInt()):string()\n         end\n         if not torch.factory(className) then\n            error(string.format('unknown Torch class <%s>', tostring(className)))\n         end\n         local object = torch.factory(className)(self)\n         if not force then\n             objects[index] = object\n         end\n         local read = getmetamethod(object, 'read')\n         if read then\n            read(object, self, versionNumber)\n         elseif type(object) == 'table' then\n            local var = self:readObject()\n            for k,v in pairs(var) do\n               object[k] = v\n            end\n         else\n            error(string.format('Cannot load object class <%s>', tostring(className)))\n         end\n         return object\n      else -- it is a table\n         local size = self:readInt()\n         local object = {}\n         if not force then\n             objects[index] = object\n         end\n         for i = 1,size do\n            local k = self:readObject()\n            local v = self:readObject()\n            object[k] = v\n         end\n         return object\n      end\n   else\n      error('unknown object')\n   end\nend\n\n-- simple helpers to save/load arbitrary objects/tables\nfunction torch.save(filename, object, mode, referenced)\n   assert(mode == nil or mode == 'binary' or mode == 'ascii', '\"binary\" or \"ascii\" (or nil) expected for mode')\n   assert(referenced == nil or referenced == true or referenced == false, 'true or false (or nil) expected for referenced')\n   mode = mode or 'binary'\n   referenced = referenced == nil and true or referenced\n   local file = torch.DiskFile(filename, 'w')\n   file[mode](file)\n   file:referenced(referenced)\n   file:writeObject(object)\n   file:close()\nend\n\nfunction torch.load(filename, mode, referenced)\n   assert(mode == 'binary' or mode == 'b32' or mode == 'b64' or\n          mode == nil or mode == 'ascii',\n          '\"binary\", \"b32\", \"b64\" or \"ascii\" (or nil) expected for mode')\n   assert(referenced == nil or referenced == true or referenced == false,\n          'true or false (or nil) expected for referenced')\n   local longSize\n   if mode == 'b32' or mode == 'b64' then\n      longSize = tonumber(mode:match('%d+')) / 8\n      mode = 'binary'\n   end\n   mode = mode or 'binary'\n   referenced = referenced == nil and true or referenced\n   local file = torch.DiskFile(filename, 'r')\n   file[mode](file)\n   file:referenced(referenced)\n   if longSize then file:longSize(longSize) end\n   local object = file:readObject()\n   file:close()\n   return object\nend\n\n-- simple helpers to serialize/deserialize arbitrary objects/tables\nfunction torch.serialize(object, mode)\n   local storage = torch.serializeToStorage(object, mode)\n   return storage:string()\nend\n\n-- Serialize to a CharStorage, not a lua string. This avoids\nfunction torch.serializeToStorage(object, mode)\n   mode = mode or 'binary'\n   local f = torch.MemoryFile()\n   f = f[mode](f)\n   f:writeObject(object)\n   local storage = f:storage()\n   -- the storage includes an extra NULL character: get rid of it\n   storage:resize(storage:size()-1)\n   f:close()\n   return storage\nend\n\nfunction torch.deserializeFromStorage(storage, mode)\n   mode = mode or 'binary'\n   local tx = torch.CharTensor(storage)\n   local xp = torch.CharStorage(tx:size(1)+1)\n   local txp = torch.CharTensor(xp)\n   txp:narrow(1,1,tx:size(1)):copy(tx)\n   txp[tx:size(1)+1] = 0\n   local f = torch.MemoryFile(xp)\n   f = f[mode](f)\n   local object = f:readObject()\n   f:close()\n   return object\nend\n\nfunction torch.deserialize(str, mode)\n   local storage = torch.CharStorage():string(str)\n   return torch.deserializeFromStorage(storage, mode)\nend\n\n-- public API (saveobj/loadobj are safe for global import)\ntorch.saveobj = torch.save\ntorch.loadobj = torch.load\n"
  },
  {
    "path": "Generator.c",
    "content": "#include <general.h>\n\nint torch_Generator_new(lua_State *L)\n{\n  THGenerator *gen = THGenerator_new();\n  luaT_pushudata(L, gen, torch_Generator);\n  return 1;\n}\n\nint torch_Generator_free(lua_State *L)\n{\n  THGenerator *gen= luaT_checkudata(L, 1, torch_Generator);\n  THGenerator_free(gen);\n  return 0;\n}\n\nstatic int torch_Generator_write(lua_State *L)\n{\n  THGenerator *gen = luaT_checkudata(L, 1, torch_Generator);\n  THFile *file = luaT_checkudata(L, 2, \"torch.File\");\n\n  THFile_writeByteRaw(file, (unsigned char *)gen, sizeof(THGenerator));\n  return 0;\n}\n\nstatic int torch_Generator_read(lua_State *L)\n{\n  THGenerator *gen = luaT_checkudata(L, 1, torch_Generator);\n  THFile *file = luaT_checkudata(L, 2, \"torch.File\");\n\n  THFile_readByteRaw(file, (unsigned char *)gen, sizeof(THGenerator));\n  return 0;\n}\n\n\nstatic const struct luaL_Reg torch_Generator_table_ [] = {\n  {\"write\", torch_Generator_write},\n  {\"read\", torch_Generator_read},\n  {NULL, NULL}\n};\n\n#define torch_Generator_factory torch_Generator_new\n\nvoid torch_Generator_init(lua_State *L)\n{\n  luaT_newmetatable(L, torch_Generator, NULL,\n                    torch_Generator_new, torch_Generator_free, torch_Generator_factory);\n  luaT_setfuncs(L, torch_Generator_table_, 0);\n  lua_pop(L, 1);\n}\n"
  },
  {
    "path": "MemoryFile.c",
    "content": "#include \"general.h\"\n\nstatic int torch_MemoryFile_new(lua_State *L)\n{\n  const char *mode;\n  THCharStorage *storage = luaT_toudata(L, 1, \"torch.CharStorage\");\n  THFile *self;\n\n  if(storage)\n  {\n    mode = luaL_optstring(L, 2, \"rw\");\n    self = THMemoryFile_newWithStorage(storage, mode);\n  }\n  else\n  {\n    mode = luaL_optstring(L, 1, \"rw\");\n    self = THMemoryFile_new(mode);\n  }\n\n  luaT_pushudata(L, self, \"torch.MemoryFile\");\n  return 1;\n}\n\nstatic int torch_MemoryFile_storage(lua_State *L)\n{\n  THFile *self = luaT_checkudata(L, 1, \"torch.MemoryFile\");\n  THCharStorage_retain(THMemoryFile_storage(self));\n  luaT_pushudata(L, THMemoryFile_storage(self), \"torch.CharStorage\");\n  return 1;\n}\n\nstatic int torch_longSize(lua_State *L)\n{\n  THFile *self = luaT_checkudata(L, 1, \"torch.MemoryFile\");\n  THMemoryFile_longSize(self, lua_tointeger(L, 2));\n  lua_settop(L, 1);\n  return 1;\n}\n\nstatic int torch_MemoryFile_free(lua_State *L)\n{\n  THFile *self = luaT_checkudata(L, 1, \"torch.MemoryFile\");\n  THFile_free(self);\n  return 0;\n}\n\nstatic int torch_MemoryFile___tostring__(lua_State *L)\n{\n  THFile *self = luaT_checkudata(L, 1, \"torch.MemoryFile\");\n  lua_pushfstring(L, \"torch.MemoryFile [status: %s -- mode: %c%c]\",\n                  (THFile_isOpened(self) ? \"open\" : \"closed\"),\n                  (THFile_isReadable(self) ? 'r' : ' '),\n                  (THFile_isWritable(self) ? 'w' : ' '));\n  return 1;\n}\n\nstatic const struct luaL_Reg torch_MemoryFile__ [] = {\n  {\"storage\", torch_MemoryFile_storage},\n  {\"longSize\", torch_longSize},\n  {\"__tostring__\", torch_MemoryFile___tostring__},\n  {NULL, NULL}\n};\n\nvoid torch_MemoryFile_init(lua_State *L)\n{\n  luaT_newmetatable(L, \"torch.MemoryFile\", \"torch.File\",\n                    torch_MemoryFile_new, torch_MemoryFile_free, NULL);\n  luaT_setfuncs(L, torch_MemoryFile__, 0);\n  lua_pop(L, 1);\n}\n"
  },
  {
    "path": "PipeFile.c",
    "content": "#include \"general.h\"\n\nstatic int torch_PipeFile_new(lua_State *L)\n{\n  const char *name = luaL_checkstring(L, 1);\n  const char *mode = luaL_optstring(L, 2, \"r\");\n  int isQuiet = luaT_optboolean(L, 3, 0);\n  THFile *self = THPipeFile_new(name, mode, isQuiet);\n\n  luaT_pushudata(L, self, \"torch.PipeFile\");\n  return 1;\n}\n\nstatic int torch_PipeFile_free(lua_State *L)\n{\n  THFile *self = luaT_checkudata(L, 1, \"torch.PipeFile\");\n  THFile_free(self);\n  return 0;\n}\n\nstatic int torch_PipeFile___tostring__(lua_State *L)\n{\n  THFile *self = luaT_checkudata(L, 1, \"torch.PipeFile\");\n  lua_pushfstring(L, \"torch.PipeFile on <%s> [status: %s -- mode: %c%c]\",\n                  THDiskFile_name(self),\n                  (THFile_isOpened(self) ? \"open\" : \"closed\"),\n                  (THFile_isReadable(self) ? 'r' : ' '),\n                  (THFile_isWritable(self) ? 'w' : ' '));\n  return 1;\n}\n\nstatic const struct luaL_Reg torch_PipeFile__ [] = {\n  {\"__tostring__\", torch_PipeFile___tostring__},\n  {NULL, NULL}\n};\n\nvoid torch_PipeFile_init(lua_State *L)\n{\n  luaT_newmetatable(L, \"torch.PipeFile\", \"torch.DiskFile\",\n                    torch_PipeFile_new, torch_PipeFile_free, NULL);\n  luaT_setfuncs(L, torch_PipeFile__, 0);\n  lua_pop(L, 1);\n}\n"
  },
  {
    "path": "README.md",
    "content": "[![Join the chat at https://gitter.im/torch/torch7](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/torch/torch7?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)\n[![Build Status](https://travis-ci.org/torch/torch7.svg)](https://travis-ci.org/torch/torch7)\n\n## Development Status\n\nTorch is not in active development. The functionality provided by the C backend of Torch, which are the TH, THNN, THC, THCUNN libraries is actively extended and re-written in the ATen C++11 library ([source](https://github.com/pytorch/pytorch/tree/master/aten), [mirror](https://github.com/zdevito/ATen/)).\nATen exposes all operators you would expect from torch7, nn, cutorch, and cunn directly in C++11 and includes additional support for sparse tensors and distributed operations. It is to note however that the API and semantics of the backend libraries in Torch-7 are different from the semantice provided by ATen. For example ATen provides numpy-style broadcasting while TH* dont. For information on building the forked Torch-7 libraries in C, refer to [\"The C interface\" in pytorch/aten/src/README.md](https://github.com/pytorch/pytorch/tree/master/aten/src#the-c-interface).\n\n\n## Need help? ##\n\nTorch7 community support can be found at the following locations. As of 2019, the Torch-7 community is close to non-existent.\n\n* Questions, Support, Install issues: [Google groups](https://groups.google.com/forum/#!forum/torch7)\n* Reporting bugs: [torch7](https://github.com/torch/torch7/issues) [nn](https://github.com/torch/nn/issues) [cutorch](https://github.com/torch/cutorch/issues) [cunn](https://github.com/torch/cutorch/issues) [optim](https://github.com/torch/optim/issues) [threads](https://github.com/torch/threads/issues)\n* Hanging out with other developers and users (strictly no install issues, no large blobs of text): [Gitter Chat](https://gitter.im/torch/torch7)\n\n<a name=\"torch.reference.dok\"></a>\n# Torch Package Reference Manual #\n\n__Torch__ is the main package in [Torch7](http://torch.ch) where data\nstructures for multi-dimensional tensors and mathematical operations\nover these are defined. Additionally, it provides many utilities for\naccessing files, serializing objects of arbitrary types and other\nuseful utilities.\n\n<a name=\"torch.overview.dok\"></a>\n## Torch Packages ##\n\n  * Tensor Library\n    * [Tensor](doc/tensor.md) defines the _all powerful_ tensor object that provides multi-dimensional numerical arrays with type templating.\n    * [Mathematical operations](doc/maths.md) that are defined for the tensor object types.\n    * [Storage](doc/storage.md) defines a simple storage interface that controls the underlying storage for any tensor object.\n  * File I/O Interface Library\n    * [File](doc/file.md) is an abstract interface for common file operations.\n    * [Disk File](doc/diskfile.md) defines operations on files stored on disk.\n    * [Memory File](doc/memoryfile.md) defines operations on stored in RAM.\n    * [Pipe File](doc/pipefile.md) defines operations for using piped commands.\n    * [High-Level File operations](doc/serialization.md) defines higher-level serialization functions.\n  * Useful Utilities\n    * [Timer](doc/timer.md) provides functionality for _measuring time_.\n    * [Tester](doc/tester.md) is a generic tester framework.\n    * [CmdLine](doc/cmdline.md) is a command line argument parsing utility.\n    * [Random](doc/random.md) defines a random number generator package with various distributions.\n    * Finally useful [utility](doc/utility.md) functions are provided for easy handling of torch tensor types and class inheritance.\n\n<a name=\"torch.links.dok\"></a>\n## Useful Links ##\n\n  * [Community packages](https://github.com/torch/torch7/wiki/Cheatsheet)\n  * [Torch Blog](http://torch.ch/blog/)\n  * [Torch Slides](https://github.com/soumith/cvpr2015/blob/master/cvpr-torch.pdf)\n\n"
  },
  {
    "path": "ROADMAP.md",
    "content": "\n# Torch Roadmap (August 2015 - March 2016)\n\nThis roadmap document is intended to serve as a loose plan of our vision for Torch in the short term.  \nIt is open to community feedback and contribution and only intends to serve as an initial draft.  \nAfter community feedback, we shall freeze it and work on it.  \n\nThe roadmap focuses on five separate things\n\n- Core development: improving the core technically. Design changes, code refactors, performance, they go here.\n- Documentation and Accessibility: Outlining the changes in documentation, and improving general user and developer documentation in various ways.\n- Versioning and Packaging: Planned and much needed changes to the packaging of Torch are discussed here.\n- Continuous Build Infrastructure: Making our continuous builds more robust, introducing CUDA and OpenCL contbuilds etc.\n- Other improvements\n\n\n## Torch Core Project Development\n\n - New class system:\n   - **[definite]** with no global side-effects (i.e. the class constructor should be scoped into its parent package)\n     Get rid of every statement/system that has a global effect on the environment (torch.setdefaultensortype => dangerous and not clean)\n   - **[needs discussion]** fully serializable (i.e. when deserializing/reloading a model, there shouldn't be a need to load libraries that defined the class originally, like nn; the class definition should be serialized as well: this would remove a lot of backward compatibility hacks that we have to add to class definitions currently\n       - **koray**: I like this, but wouldn't it break backward compatibility?\n\t\t            Currently, whatever we serialize, it is just the data and implementation is defined\n\t\t\t\t\tat load time, so if a bug is fixed (or introduced) you use that.\n\t\t\t\t\tAnd it starts being ambiguous, what if I load a layer from file and\n\t\t\t\t\tcreate a new one and their implementation is inconsistent...)\n - **[definite]** Get rid of non-tensor-related stuff (like serialization) in TH, and move it to lua side\n - **[needs discussion]** OpenMP: Should it stay or go? Is Threads sufficient?\n       - **Ronan**: I really wonder about this guy, especially now that I have been using threads intensively. I am not sure that fine-grine threading is necessary.\n\t   - **koray**: I guess you mean with threading, there is no need for OpenMP, but I disagree.\n\t          Our convolution layer will use multiple threads and then if we run a ReLu over a huge state space, it would become embarrassingly slow.\n\t\t\t  We shouldn't expect everyone to run their experiments in a threading framework. It is more work than necessary sometimes.)\n - **[needs discussion]** Templated C++ in TH Core?\n                    - **Ronan**: Should I cleanup TH core? In the end, I am scared to move to C++, but some iterators based taking a closure could be nice (I have some of those that I could add easily).\n\t\t\t\t\t         I could move to C++ if it was only template + keeping pointers (and not C++11/14/17, because that would limit the number of users that it can reach because of the latest compilers needed etc.).\n - **[definite]** Migrate to a single, better/modern testing support\n              - **koray**: like some aspects of Totem, but should be in core Tester\n - **[definite]** Benchmarking support in Tester\n - **[definite]** Consistent testing scripts across all core projects\n - **[definite]** 'nn' container unified interface between containers and graph\n - **[mostly definite]** Switch to batch only assumption in 'nn'. Right now, the code is unnecessarily complicated for stochastic/batch confusion, we needed extra functions like nInputDims and such.\n - **[needs discussion]** Support named arguments in the constructor for all 'nn' layers.\n - **[definite]** 'rnn' package.\n      - **Soumith**: Nicholas Leonard's seems to be a good one.\n - **[mostly definite]** argcheck for all core functions in torch. Get rid of cwrap's ugliness.\n - **[definite]** improve paths to support more file system operations\n       - **Clement**: could lfs and penlight be made more standard? penlight is a heavy package but provides so much utility\n\t   - **Soumith**: I think penlight is lightweight and provides strong utility, definitely consider dependence.\n - **[definite]** JIT/Lua/FFI/GC:\n   - **koray**: I think Torch should be agnostic to whatever is the backend;\n   - **clement**: yes!\n   - at this point, we need to have all core packages use the regular Lua api (almost the case)\n     - **Ronan**: agreed.\n\n- **[definite]** plan to have standalone FFI?\n  - Facebook releases their puc LUA based FFI package mostly improved by Sam Gross\n  - [needs discussion] **Ronan** improves it a bit more to use Leon's C99 parser\n                         - **Koray**: I am not opposed to Leon's C99 parser, but we should not have the QT like situation where\n\t\t\t\t\t\t       it relies mostly on Leon to maintain it.\n\t\t\t\t\t\t\t   And, still we need to have FFI since there are people and packages that rely on it now.\n- **[definite]** Lua 5.2 migration (I think it's already finished ;) ).\n- **[mostly definite]** Lua 5.3 migration\n- **[mostly definite]** Optionally replace GC by Ref-counting (existing version in luajit-rocks; but completely broken but will need to be fixed)\n- **[needs discussion]** Make OpenCL support more visible under torch/opencl (**Soumith**: Hugh Perkins will maintain it of course ;) ).\n- **[definite]** Split nn into THNN and nn. THNN would be NN package using TH as backend and nn would be the lua layer. THNN can be used as a standalone C library. Same for cunn\n- **[Definite]** CUDA typed tensor support - CudaHalfTensor CudaDoubleTensor etc.\n- **[Definite]** better plotting support\n- **[needs discussion]** UI package that doesn't suck?\n  - **Ronan**: something based on cairo?\n    - **clement**: not sure if this would have much adoption\n    - **Ronan**: yes, it is a worry. I started to do some fancy stuff there, it is not that hard.\n\t         However, I would need quite some time to polish it.\n\t\t\t I think having something fully customizable from lua really \n                         makes a difference (rather than something like Qt, for example). \n  - something based on a web client?\n      - **clement**: i like the idea of itorch but could never easily build it, build process is too big.\n      - **Ronan**: I cannot use something which forces me to use global variables.\n      - **koray**: I think at the end of the day, we need to have both a GUI client and a web based client.\n\t\t   My main problem with web based clients is that I can't easily create \n                   custom displays to play an animation or such.\n\t\t   It is an offline process that I need to generate a movie and then load it in.\n\t\t   This and similar things make it hard to use for me.\n\t\t   Also, I agree, I actually could not install iTorch on my laptop \n                   before cvpr tutorial somehow, it did not want to work :).\n  - **soumith**: I think we should propose a common display API that any interface can implement, \n                 that way the users don't need to change scripts across different UI backends.\n\t         Also, szym/display is a good candidate for the Web UI, ITorch is indeed a bit of a pain to install.\n\n  - Should we endorse iTorch for everyone to use? \n    - **Ronan**: I know **Soumith** likes it, but I am not a big fan. \n    -            Heavy+encourages the use of global variables. Excellent for tutorials, though.\n \t   - This ties to the first question in **Other Questions** section.\n \t   - Can we/community do pull requests on iTorch? ( **Soumith**: Yes )\n \t   - First step would be to leanify dependencies and/or install procedure (**Soumith**: agreed)\n- **[needs discussion]** How about Penlight? It has many crucial things that people use.\n   Should we endorse it, use some things from it? Replicate some things in penlight in torch?\n   - **clement**: upvoting this! we use it extensively.\n   - **Ronan**: I live better with less abstractions, but I can be convinced there.\n          However, I find penlight quite big.\n          There are things like the classes that I do not like as well (because of the way they chose for creating classes).\n- **[needs discussion]** how about Moses? New lean functional package that's pretty useful\n- **[definite]** A style guide\n  - Guidelines are super important:\n    - for Lua: at least impose strict camel case + 3 spaces (no tab)\n    - for C: camel case + use of underscore to represent namespace scoping + 2 spaces\n\n## Documentation + Accessibility\n\n - Tutorials: provide guidelines and basic framework/standard to write and publish tutorials?\n - Universal dataset API\n   - Dataset classes for several popular datasets\n   - high performance, thread support etc.\n   - support CPU and GPU\n - Model Zoo + Training scripts, with training scripts we can highlight Torch's strengths\n  - How do we build a super friendly model zoo? git repo of pre-trained models?\n    - Better documentation support, have a doc server\n \t- Documentation for TH/THC interface and design\n \t- Inline documentation parser\n - doc/shell integration (maybe this is still working but needs redoing?)\n\n## Versioning + Packaging\n - Package owners need to start releasing frequent versions (i.e. torch v7.0.1, 7.0.2, ...)\n - scm packages should become deprecated\n - Packages need to avoid global side effects, and return themselves as simple tables (Lua 5.2 started enforcing this on the C side)\n - Provide standard AMI instances that people can launch (already loosely done by the community). We can load it with many standard+optional packages and/or provide one line option to update to latest.\n\n## Build Infrastructure Requirements\n - Prepare core distro release\n - Professional Continuous build for distro and individual core projects\n - Continuous build for GPU\n \t- continuous build should include testing\n - The distro should be build and tested at every pull into any of the member projects\n - CI for Linux and OSX\n\n## Other Questions?\n - If there is a project that seems good from outside or consortium, how do we endorse/improve/modify that?\n \t- do we put some technical criteria to do that?\n \t- being able to do pull requests?\n\t- Licensing?\n \t- or maybe maintain a list of suggested packages?\n \t- when does existence of a package stop us from developing the same in core torch?\n\t- **Soumith**: I think this should largely be community driven and by popularity. Top starred or watched repos in the ecosystem would be a good start.\n \t\n"
  },
  {
    "path": "Storage.c",
    "content": "#include \"general.h\"\n\n#define torch_Storage_(NAME) TH_CONCAT_4(torch_,Real,Storage_,NAME)\n#define THFile_readRealRaw TH_CONCAT_3(THFile_read, Real, Raw)\n#define THFile_writeRealRaw TH_CONCAT_3(THFile_write, Real, Raw)\n#define torch_Storage TH_CONCAT_STRING_3(torch.,Real,Storage)\n\n#include \"generic/Storage.c\"\n#include \"THGenerateAllTypes.h\"\n\n#include \"generic/Storage.c\"\n#include \"THGenerateHalfType.h\"\n"
  },
  {
    "path": "Tensor.c",
    "content": "#include \"general.h\"\n\n#define torch_Storage_(NAME) TH_CONCAT_4(torch_,Real,Storage_,NAME)\n#define torch_Storage TH_CONCAT_STRING_3(torch.,Real,Storage)\n#define torch_Tensor_(NAME) TH_CONCAT_4(torch_,Real,Tensor_,NAME)\n#define torch_Tensor TH_CONCAT_STRING_3(torch.,Real,Tensor)\n\n#include \"generic/Tensor.c\"\n#include \"THGenerateAllTypes.h\"\n\n#include \"generic/Tensor.c\"\n#include \"THGenerateHalfType.h\"\n"
  },
  {
    "path": "Tensor.lua",
    "content": "-- additional methods for Storage\nlocal Storage = {}\n\n-- additional methods for Tensor\nlocal Tensor = {}\n\n-- types\nlocal types = {'Byte', 'Char', 'Short', 'Int', 'Long', 'Float', 'Half', 'Double'}\n\n-- Lua 5.2 compatibility\nlocal log10 = math.log10 or function(x) return math.log(x, 10) end\n\n-- tostring() functions for Tensor and Storage\nlocal function Storage__printformat(self)\n   if self:size() == 0 then\n     return \"\", nil, 0\n   end\n   local intMode = true\n   local type = torch.typename(self)\n--   if type == 'torch.FloatStorage' or type == 'torch.DoubleStorage' then\n      for i=1,self:size() do\n         if self[i] ~= math.ceil(self[i]) then\n            intMode = false\n            break\n         end\n      end\n--   end\n   local tensor = torch.DoubleTensor(torch.DoubleStorage(self:size()):copy(self), 1, self:size()):abs()\n   local expMin = tensor:min()\n   if expMin ~= 0 then\n      expMin = math.floor(log10(expMin)) + 1\n   else\n      expMin = 1\n   end\n   local expMax = tensor:max()\n   if expMax ~= 0 then\n      expMax = math.floor(log10(expMax)) + 1\n   else\n      expMax = 1\n   end\n\n   local format\n   local scale\n   local sz\n   if intMode then\n      if expMax > 9 then\n         format = \"%11.4e\"\n         sz = 11\n      else\n         format = \"%SZd\"\n         sz = expMax + 1\n      end\n   else\n      if expMax-expMin > 4 then\n         format = \"%SZ.4e\"\n         sz = 11\n         if math.abs(expMax) > 99 or math.abs(expMin) > 99 then\n            sz = sz + 1\n         end\n      else\n         if expMax > 5 or expMax < 0 then\n            format = \"%SZ.4f\"\n            sz = 7\n            scale = math.pow(10, expMax-1)\n         else\n            format = \"%SZ.4f\"\n            if expMax == 0 then\n               sz = 7\n            else\n               sz = expMax+6\n            end\n         end\n      end\n   end\n   format = string.gsub(format, 'SZ', sz)\n   if scale == 1 then\n      scale = nil\n   end\n   return format, scale, sz\nend\n\nfunction Storage.__tostring__(self)\n   local strt = {}\n   local format,scale = Storage__printformat(self)\n   if format:sub(2,4) == 'nan' then format = '%f' end\n   if scale then\n      table.insert(strt, string.format('%g', scale) .. ' *\\n')\n      for i = 1,self:size() do\n         table.insert(strt, string.format(format, self[i]/scale) .. '\\n')\n      end\n   else\n      for i = 1,self:size() do\n         table.insert(strt, string.format(format, self[i]) .. '\\n')\n      end\n   end\n   table.insert(strt, '[' .. torch.typename(self) .. ' of size ' .. self:size() .. ']\\n')\n   local str = table.concat(strt)\n   return str\nend\n\nfor _,type in ipairs(types) do\n   local metatable = torch.getmetatable('torch.' .. type .. 'Storage')\n   for funcname, func in pairs(Storage) do\n      rawset(metatable, funcname, func)\n   end\nend\n\nlocal function Tensor__printMatrix(self, indent)\n   local format,scale,sz = Storage__printformat(self:storage())\n   if format:sub(2,4) == 'nan' then format = '%f' end\n--   print('format = ' .. format)\n   scale = scale or 1\n   indent = indent or ''\n   local strt = {indent}\n   local nColumnPerLine = math.floor((80-#indent)/(sz+1))\n--   print('sz = ' .. sz .. ' and nColumnPerLine = ' .. nColumnPerLine)\n   local firstColumn = 1\n   local lastColumn = -1\n   while firstColumn <= self:size(2) do\n      if firstColumn + nColumnPerLine - 1 <= self:size(2) then\n         lastColumn = firstColumn + nColumnPerLine - 1\n      else\n         lastColumn = self:size(2)\n      end\n      if nColumnPerLine < self:size(2) then\n         if firstColumn ~= 1 then\n            table.insert(strt, '\\n')\n         end\n         table.insert(strt, 'Columns ' .. firstColumn .. ' to ' .. lastColumn .. '\\n' .. indent)\n      end\n      if scale ~= 1 then\n         table.insert(strt, string.format('%g', scale) .. ' *\\n ' .. indent)\n      end\n      for l=1,self:size(1) do\n         local row = self:select(1, l)\n         for c=firstColumn,lastColumn do\n            table.insert(strt, string.format(format, row[c]/scale))\n            if c == lastColumn then\n               table.insert(strt, '\\n')\n               if l~=self:size(1) then\n                  if scale ~= 1 then\n                     table.insert(strt, indent .. ' ')\n                  else\n                     table.insert(strt, indent)\n                  end\n               end\n            else\n               table.insert(strt, ' ')\n            end\n         end\n      end\n      firstColumn = lastColumn + 1\n   end\n   local str = table.concat(strt)\n   return str\nend\n\nlocal function Tensor__printTensor(self)\n   local counter = torch.LongStorage(self:nDimension()-2)\n   local strt = {''}\n   local finished\n   counter:fill(1)\n   counter[1] = 0\n   while true do\n      for i=1,self:nDimension()-2 do\n         counter[i] = counter[i] + 1\n         if counter[i] > self:size(i) then\n            if i == self:nDimension()-2 then\n               finished = true\n               break\n            end\n            counter[i] = 1\n         else\n            break\n         end\n      end\n      if finished then\n         break\n      end\n--      print(counter)\n      if #strt > 1 then\n         table.insert(strt, '\\n')\n      end\n      table.insert(strt, '(')\n      local tensor = self\n      for i=1,self:nDimension()-2 do\n         tensor = tensor:select(1, counter[i])\n         table.insert(strt, counter[i] .. ',')\n      end\n      table.insert(strt, '.,.) = \\n')\n      table.insert(strt, Tensor__printMatrix(tensor, ' '))\n   end\n   return table.concat(strt)\nend\n\nfunction Tensor.__tostring__(self)\n   local strt = {''}\n   if self:nDimension() == 0 then\n      table.insert(strt, '[' .. torch.typename(self) .. ' with no dimension]\\n')\n   else\n      local tensor = torch.DoubleTensor():resize(self:size()):copy(self)\n      if tensor:nDimension() == 1 then\n         local format,scale,sz = Storage__printformat(tensor:storage())\n         if format:sub(2,4) == 'nan' then format = '%f' end\n         if scale then\n            table.insert(strt, string.format('%g', scale) .. ' *\\n')\n            for i = 1,tensor:size(1) do\n               table.insert(strt, string.format(format, tensor[i]/scale) .. '\\n')\n            end\n         else\n            for i = 1,tensor:size(1) do\n               table.insert(strt, string.format(format, tensor[i]) .. '\\n')\n            end\n         end\n         table.insert(strt, '[' .. torch.typename(self) .. ' of size ' .. tensor:size(1) .. ']\\n')\n      elseif tensor:nDimension() == 2 then\n         table.insert(strt, Tensor__printMatrix(tensor))\n         table.insert(strt, '[' .. torch.typename(self) .. ' of size ' .. tensor:size(1) .. 'x' .. tensor:size(2) .. ']\\n')\n      else\n         table.insert(strt, Tensor__printTensor(tensor))\n         table.insert(strt, '[' .. torch.typename(self) .. ' of size ')\n         for i=1,tensor:nDimension() do\n            table.insert(strt, tensor:size(i))\n            if i ~= tensor:nDimension() then\n               table.insert(strt, 'x')\n            end\n         end\n         table.insert(strt, ']\\n')\n      end\n   end\n   return table.concat(strt)\nend\n\nfunction Tensor.type(self,type)\n   local current = torch.typename(self)\n   if not type then return current end\n   if type ~= current then\n      local new = torch.getmetatable(type).new()\n      if self:nElement() > 0 then\n         new:resize(self:size()):copy(self)\n      end\n      return new\n   else\n      return self\n   end\nend\n\nfunction Tensor.typeAs(self,tensor)\n   return self:type(tensor:type())\nend\n\nfunction Tensor.byte(self)\n   return self:type('torch.ByteTensor')\nend\n\nfunction Tensor.char(self)\n   return self:type('torch.CharTensor')\nend\n\nfunction Tensor.short(self)\n   return self:type('torch.ShortTensor')\nend\n\nfunction Tensor.int(self)\n   return self:type('torch.IntTensor')\nend\n\nfunction Tensor.long(self)\n   return self:type('torch.LongTensor')\nend\n\nfunction Tensor.float(self)\n   return self:type('torch.FloatTensor')\nend\n\nfunction Tensor.double(self)\n   return self:type('torch.DoubleTensor')\nend\n\nfunction Tensor.half(self)\n   return self:type('torch.HalfTensor')\nend\n\nfunction Tensor.real(self)\n   return self:type(torch.getdefaulttensortype())\nend\n\nfunction Tensor.expand(result,tensor,...)\n   -- get sizes\n   local sizes = {...}\n\n   local t = torch.type(tensor)\n   if (t == 'number' or t == 'torch.LongStorage') then\n      table.insert(sizes,1,tensor)\n      tensor = result\n      result = tensor.new()\n   end\n\n   -- check type\n   local size\n   if torch.type(sizes[1])=='torch.LongStorage' then\n      size = sizes[1]\n   else\n      size = torch.LongStorage(#sizes)\n      for i,s in ipairs(sizes) do\n         size[i] = s\n      end\n   end\n\n   -- get dimensions\n   local tensor_dim = tensor:dim()\n   local tensor_stride = tensor:stride()\n   local tensor_size = tensor:size()\n\n   -- check nb of dimensions\n   if #size ~= tensor:dim() then\n      error('the number of dimensions provided must equal tensor:dim()')\n   end\n\n   -- create a new geometry for tensor:\n   for i = 1,tensor_dim do\n      if tensor_size[i] == 1 then\n         tensor_size[i] = size[i]\n         tensor_stride[i] = 0\n      elseif tensor_size[i] ~= size[i] then\n         error('incorrect size: only supporting singleton expansion (size=1)')\n      end\n   end\n\n   -- create new view, with singleton expansion:\n   result:set(tensor:storage(), tensor:storageOffset(),\n                         tensor_size, tensor_stride)\n   return result\nend\ntorch.expand = Tensor.expand\n\nfunction Tensor.expandAs(result,tensor,template)\n   if template then\n      return result:expand(tensor,template:size())\n   end\n   return result:expand(tensor:size())\nend\ntorch.expandAs = Tensor.expandAs\n\nfunction Tensor.repeatTensor(result,tensor,...)\n   -- get sizes\n   local sizes = {...}\n\n   local t = torch.type(tensor)\n   if (t == 'number' or t == 'torch.LongStorage') then\n      table.insert(sizes,1,tensor)\n      tensor = result\n      result = tensor.new()\n   end\n   -- if not contiguous, then force the tensor to be contiguous\n   if not tensor:isContiguous() then tensor = tensor:clone() end\n\n   -- check type\n   local size\n   if torch.type(sizes[1])=='torch.LongStorage' then\n      size = sizes[1]\n   else\n      size = torch.LongStorage(#sizes)\n      for i,s in ipairs(sizes) do\n         size[i] = s\n      end\n   end\n   if size:size() < tensor:dim() then\n      error('Number of dimensions of repeat dims can not be smaller than number of dimensions of tensor')\n   end\n   local xtensor = tensor.new():set(tensor)\n   local xsize = xtensor:size():totable()\n   for i=1,size:size()-tensor:dim() do\n      table.insert(xsize,1,1)\n   end\n   size = torch.DoubleTensor(xsize):cmul(torch.DoubleTensor(size:totable())):long():storage()\n   xtensor:resize(torch.LongStorage(xsize))\n   result:resize(size)\n   local urtensor = result.new(result)\n   for i=1,xtensor:dim() do\n      urtensor = urtensor:unfold(i,xtensor:size(i),xtensor:size(i))\n   end\n   for i=1,urtensor:dim()-xtensor:dim() do\n      table.insert(xsize,1,1)\n   end\n   xtensor:resize(torch.LongStorage(xsize))\n   local xxtensor = xtensor:expandAs(urtensor)\n   urtensor:copy(xxtensor)\n   return result\nend\ntorch.repeatTensor = Tensor.repeatTensor\n\n--- One of the size elements can be -1,\n --- a new LongStorage is then returned.\n --- The length of the unspecified dimension\n --- is inferred from the number of remaining elements.\nlocal function specifyFully(size, nElements)\n    local nCoveredElements = 1\n    local remainingDim = nil\n    local sizes = size:totable()\n    for i = 1, #sizes do\n        local wantedDimSize = sizes[i]\n        if wantedDimSize == -1 then\n            if remainingDim then\n                error(\"Only one of torch.view dimensions can be -1.\")\n            end\n            remainingDim = i\n        else\n            nCoveredElements = nCoveredElements * wantedDimSize\n        end\n    end\n\n    if not remainingDim then\n        return size\n    end\n\n    assert(nElements % nCoveredElements == 0, \"The number of covered elements is not a multiple of all elements.\")\n    local copy = torch.LongStorage(sizes)\n    copy[remainingDim] = nElements / nCoveredElements\n    return copy\nend\n\n-- TODO : This should be implemented in TH and and wrapped.\nfunction Tensor.view(result, src, ...)\n   local size = ...\n   local view, tensor\n   local function istensor(tensor)\n      return torch.typename(tensor) and torch.typename(tensor):find('torch.*Tensor')\n   end\n   local function isstorage(storage)\n      return torch.typename(storage) and torch.typename(storage) == 'torch.LongStorage'\n   end\n   if istensor(result) and istensor(src) and type(size) == 'number' then\n      size = torch.LongStorage{...}\n      view = result\n      tensor = src\n   elseif istensor(result) and istensor(src) and isstorage(size) then\n      size = size\n      view = result\n      tensor = src\n   elseif istensor(result) and isstorage(src) and size == nil then\n      size = src\n      tensor = result\n      view = tensor.new()\n   elseif istensor(result) and type(src) == 'number' then\n      size = {...}\n      table.insert(size,1,src)\n      size = torch.LongStorage(size)\n      tensor = result\n      view = tensor.new()\n   else\n      local t1 = 'torch.Tensor, torch.Tensor, number [, number ]*'\n      local t2 = 'torch.Tensor, torch.Tensor, torch.LongStorage'\n      local t3 = 'torch.Tensor, torch.LongStorage'\n      local t4 = 'torch.Tensor, number [, number ]*'\n      error(string.format('torch.view, expected (%s) or\\n (%s) or\\n (%s)\\n or (%s)', t1, t2, t3, t4))\n   end\n   local origNElement = tensor:nElement()\n   size = specifyFully(size, origNElement)\n\n   assert(tensor:isContiguous(), \"expecting a contiguous tensor\")\n   view:set(tensor:storage(), tensor:storageOffset(), size)\n   if view:nElement() ~= origNElement then\n      local inputSize = table.concat(tensor:size():totable(), \"x\")\n      local outputSize = table.concat(size:totable(), \"x\")\n      error(string.format(\"Wrong size for view. Input size: %s. Output size: %s\",\n      inputSize, outputSize))\n   end\n   return view\nend\ntorch.view = Tensor.view\n\nfunction Tensor.viewAs(result, src, template)\n   if template and torch.typename(template) then\n      return result:view(src, template:size())\n   elseif template == nil then\n      template = src\n      src = result\n      result = src.new()\n      return result:view(src, template:size())\n   else\n      local t1 = 'torch.Tensor, torch.Tensor, torch.LongStorage'\n      local t2 = 'torch.Tensor, torch.LongStorage'\n      error(string.format('expecting (%s) or (%s)', t1, t2))\n   end\nend\ntorch.viewAs = Tensor.viewAs\n\nfunction Tensor.split(result, tensor, splitSize, dim)\n   if torch.type(result) ~= 'table' then\n      dim = splitSize\n      splitSize = tensor\n      tensor = result\n      result = {}\n   else\n      -- empty existing result table before using it\n      for k,v in pairs(result) do\n         result[k] = nil\n      end\n   end\n   dim = dim or 1\n   local start = 1\n   while start <= tensor:size(dim) do\n      local size = math.min(splitSize, tensor:size(dim) - start + 1)\n      local split = tensor:narrow(dim, start, size)\n      table.insert(result, split)\n      start = start + size\n   end\n   return result\nend\ntorch.split = Tensor.split\n\nfunction Tensor.chunk(result, tensor, nChunk, dim)\n   if torch.type(result) ~= 'table' then\n      dim = nChunk\n      nChunk = tensor\n      tensor = result\n      result = {}\n   end\n   dim = dim or 1\n   local splitSize = math.ceil(tensor:size(dim)/nChunk)\n   return torch.split(result, tensor, splitSize, dim)\nend\ntorch.chunk = Tensor.chunk\n\nfunction Tensor.totable(tensor)\n  local result = {}\n  local dim = tensor:dim()\n  if dim == 1 then\n    tensor:apply(function(i) table.insert(result, i) end)\n  elseif dim > 0 then\n    for i = 1, tensor:size(1) do\n      table.insert(result, tensor[i]:totable())\n    end\n  end\n  return result\nend\ntorch.totable = Tensor.totable\n\nfunction Tensor.permute(tensor, ...)\n  local perm = {...}\n  local nDims = tensor:dim()\n  assert(#perm == nDims, 'Invalid permutation')\n  local j\n  for i, p in ipairs(perm) do\n    if p ~= i and p ~= 0 then\n      j = i\n      repeat\n        assert(0 < perm[j] and perm[j] <= nDims, 'Invalid permutation')\n        tensor = tensor:transpose(j, perm[j])\n        j, perm[j] = perm[j], 0\n      until perm[j] == i\n      perm[j] = j\n    end\n  end\n  return tensor\nend\ntorch.permute = Tensor.permute\n\nfor _,type in ipairs(types) do\n   local metatable = torch.getmetatable('torch.' .. type .. 'Tensor')\n   for funcname, func in pairs(Tensor) do\n      if funcname ~= 'totable' or type ~='Half' then\n         rawset(metatable, funcname, func)\n      else\n         local function Tensor__totable(self)\n            local host_tensor = self:float()\n            return self:float():totable()\n         end\n         rawset(torch.getmetatable('torch.HalfTensor'), 'totable', Tensor__totable)\n      end\n   end\nend\n"
  },
  {
    "path": "TensorMath.lua",
    "content": "local wrap = require 'cwrap'\n\nrequire 'torchcwrap'\n\nlocal interface = wrap.CInterface.new()\nlocal method = wrap.CInterface.new()\nlocal argtypes = wrap.CInterface.argtypes\n\nargtypes['ptrdiff_t'] = wrap.types.ptrdiff_t\n\ninterface:print([[\n#include \"TH.h\"\n#include \"THMath.h\"\n#include \"luaT.h\"\n#include \"utils.h\"\n]])\n\n-- specific to torch: we generate a 'dispatch' function\n-- first we create a helper function\n-- note that it let the \"torch\" table on the stack\ninterface:print([[\nstatic const void* torch_istensortype(lua_State *L, const char *tname)\n{\n  if(!tname)\n    return NULL;\n\n  if(!luaT_pushmetatable(L, tname))\n    return NULL;\n\n  lua_pushstring(L, \"torch\");\n  lua_rawget(L, -2);\n  if(lua_istable(L, -1))\n    return tname;\n  else\n  {\n    lua_pop(L, 2);\n    return NULL;\n  }\n\n  return NULL;\n}\n]])\n\ninterface:print([[\nstatic int torch_isnonemptytable(lua_State *L, int idx)\n{\n  int empty;\n  if (!lua_istable(L, idx)) return 0;\n\n  lua_rawgeti(L, idx, 1);\n  empty = lua_isnil(L, -1);\n  lua_pop(L, 1);\n  return !empty;\n}\n]])\n\n\ninterface:print([[\nstatic const void* torch_istensorarray(lua_State *L, int idx)\n{\n  const char* tname;\n  int tensor_idx;\n  if (!torch_isnonemptytable(L, idx)) return 0;\n\n  lua_checkstack(L, 3);\n  lua_rawgeti(L, idx, 1);\n  tensor_idx = lua_gettop(L);\n  tname = (torch_istensortype(L, luaT_typename(L, -1)));\n  lua_remove(L, tensor_idx);\n  return tname;\n}\n]])\n\ninterface.dispatchregistry = {}\nfunction interface:wrap(name, ...)\n   -- usual stuff\n   wrap.CInterface.wrap(self, name, ...)\n\n   -- dispatch function\n   if not interface.dispatchregistry[name] then\n      interface.dispatchregistry[name] = true\n      table.insert(interface.dispatchregistry, {name=name, wrapname=string.format(\"torch_%s\", name)})\n\n      interface:print(string.gsub([[\nstatic int torch_NAME(lua_State *L)\n{\n  int narg = lua_gettop(L);\n  const void *tname;\n  if(narg >= 1 && (tname = torch_istensortype(L, luaT_typename(L, 1)))) /* first argument is tensor? */\n  {\n  }\n  else if(narg >= 2 && (tname = torch_istensortype(L, luaT_typename(L, 2)))) /* second? */\n  {\n  }\n  else if(narg >= 1 && (tname = torch_istensorarray(L, 1))) /* torch table argument? */\n  {\n  }\n  else if(narg >= 1 && lua_type(L, narg) == LUA_TSTRING\n\t  && (tname = torch_istensortype(L, lua_tostring(L, narg)))) /* do we have a valid tensor type string then? */\n  {\n    lua_remove(L, -2);\n  }\n  else if(!(tname = torch_istensortype(L, torch_getdefaulttensortype(L))))\n    luaL_error(L, \"internal error: the default tensor type does not seem to be an actual tensor\");\n\n  lua_pushstring(L, \"NAME\");\n  lua_rawget(L, -2);\n  if(lua_isfunction(L, -1))\n  {\n    lua_insert(L, 1);\n    lua_pop(L, 2); /* the two tables we put on the stack above */\n    lua_call(L, lua_gettop(L)-1, LUA_MULTRET);\n  }\n  else\n    return luaL_error(L, \"%s does not implement the torch.NAME() function\", tname);\n\n  return lua_gettop(L);\n}\n]], 'NAME', name))\n  end\nend\n\nfunction interface:dispatchregister(name)\n   local txt = self.txt\n   table.insert(txt, string.format('static const struct luaL_Reg %s [] = {', name))\n   for _,reg in ipairs(self.dispatchregistry) do\n      table.insert(txt, string.format('{\"%s\", %s},', reg.name, reg.wrapname))\n   end\n   table.insert(txt, '{NULL, NULL}')\n   table.insert(txt, '};')\n   table.insert(txt, '')\n   self.dispatchregistry = {}\nend\n\ninterface:print('/* WARNING: autogenerated file */')\ninterface:print('')\n\nlocal function wrap(...)\n   local args = {...}\n\n   -- interface\n   interface:wrap(...)\n\n   -- method: we override things possibly in method table field\n   for _,x in ipairs(args) do\n      if type(x) == 'table' then -- ok, now we have a list of args\n         for _, arg in ipairs(x) do\n            if arg.method then\n               for k,v in pairs(arg.method) do\n                  if v == 'nil' then -- special case, we erase the field\n                     arg[k] = nil\n                  else\n                     arg[k] = v\n                  end\n               end\n            end\n         end\n      end\n   end\n   local unpack = unpack or table.unpack\n    method:wrap(unpack(args))\nend\n\nlocal reals = {ByteTensor='unsigned char',\n               CharTensor='char',\n               ShortTensor='short',\n               IntTensor='int',\n               LongTensor='long',\n               FloatTensor='float',\n               HalfTensor='half',\n               DoubleTensor='double'}\n\nlocal accreals = {ByteTensor='long',\n               CharTensor='long',\n               ShortTensor='long',\n               IntTensor='long',\n               LongTensor='long',\n               FloatTensor='double',\n               HalfTensor='float',\n               DoubleTensor='double'}\n\nfor _,Tensor in ipairs({\"ByteTensor\", \"CharTensor\",\n                        \"ShortTensor\", \"IntTensor\", \"LongTensor\",\n                        \"FloatTensor\", \"HalfTensor\", \"DoubleTensor\"}) do\n\n   local real = reals[Tensor]\n   local accreal = accreals[Tensor]\n\n   function interface.luaname2wrapname(self, name)\n      return string.format('torch_%s_%s', Tensor, name)\n   end\n\n   function method.luaname2wrapname(self, name)\n      return string.format('m_torch_%s_%s', Tensor, name)\n   end\n\n   local function cname(name)\n      return string.format('TH%s_%s', Tensor, name)\n   end\n\n   local function lastdim(argn)\n      return function(arg)\n                return string.format(\"TH%s_nDimension(%s)\", Tensor, arg.args[argn]:carg())\n             end\n   end\n\n   local function lastdimarray(argn)\n      return function(arg)\n                return string.format(\"TH%s_nDimension(arg%d_data[0])\", Tensor, arg.args[argn].i)\n             end\n   end\n\n   if Tensor ~= 'HalfTensor' then\n   wrap(\"zero\",\n        cname(\"zero\"),\n        {{name=Tensor, returned=true}})\n\n   wrap(\"fill\",\n        cname(\"fill\"),\n        {{name=Tensor, returned=true},\n         {name=real}})\n\n   wrap(\"zeros\",\n        cname(\"zeros\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=\"LongArg\"}})\n\n   wrap(\"ones\",\n        cname(\"ones\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=\"LongArg\"}})\n\n   wrap(\"reshape\",\n        cname(\"reshape\"),\n        {{name=Tensor, default=true, returned=true},\n         {name=Tensor},\n         {name=\"LongArg\"}})\n\n   wrap(\"gather\",\n        cname(\"gather\"),\n        {{name=Tensor, default=true, returned=true,\n          init=function(arg)\n                  return table.concat(\n                     {\n                        arg.__metatable.init(arg),\n                        string.format(\"THLongStorage* %s_size = THLongTensor_newSizeOf(%s);\", arg:carg(), arg.args[4]:carg()),\n                        string.format(\"TH%s_resize(%s, %s_size, NULL);\", Tensor, arg:carg(), arg:carg()),\n                        string.format(\"THLongStorage_free(%s_size);\", arg:carg())\n                     }, '\\n')\n               end\n         },\n         {name=Tensor},\n         {name=\"index\"},\n         {name=\"IndexTensor\", noreadadd=true}})\n\n   wrap(\"scatter\",\n        cname(\"scatter\"),\n        {{name=Tensor, returned=true},\n         {name=\"index\"},\n         {name=\"IndexTensor\", noreadadd=true},\n         {name=Tensor}},\n        cname(\"scatterFill\"),\n        {{name=Tensor, returned=true},\n         {name=\"index\"},\n         {name=\"IndexTensor\", noreadadd=true},\n         {name=real}})\n\n   wrap(\"dot\",\n        cname(\"dot\"),\n        {{name=Tensor},\n         {name=Tensor},\n         {name=accreal, creturned=true}})\n\n   wrap(\"equal\",\n        cname(\"equal\"),\n        {{name=Tensor},\n         {name=Tensor},\n         {name=\"boolean\", creturned=true}})\n\n   wrap(\"add\",\n        cname(\"add\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=Tensor, method={default=1}},\n         {name=real}},\n        cname(\"cadd\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=Tensor, method={default=1}},\n         {name=real, default=1},\n         {name=Tensor}})\n\n   wrap(\"csub\",\n     cname(\"sub\"),\n     {{name=Tensor, default=true, returned=true, method={default='nil'}},\n       {name=Tensor, method={default=1}},\n       {name=real}},\n     cname(\"csub\"),\n     {{name=Tensor, default=true, returned=true, method={default='nil'}},\n       {name=Tensor, method={default=1}},\n       {name=real, default=1},\n       {name=Tensor}})\n\n   wrap(\"mul\",\n        cname(\"mul\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=Tensor, method={default=1}},\n         {name=real}})\n\n   wrap(\"div\",\n        cname(\"div\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=Tensor, method={default=1}},\n         {name=real}})\n\n   wrap(\"lshift\",\n        cname(\"lshift\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=Tensor, method={default=1}},\n         {name=real}})\n\n   wrap(\"rshift\",\n        cname(\"rshift\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=Tensor, method={default=1}},\n         {name=real}})\n\n   wrap(\"fmod\",\n        cname(\"fmod\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=Tensor, method={default=1}},\n         {name=real}})\n\n   wrap(\"remainder\",\n        cname(\"remainder\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=Tensor, method={default=1}},\n         {name=real}})\n\n   wrap(\"bitand\",\n        cname(\"bitand\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=Tensor, method={default=1}},\n         {name=real}})\n\n   wrap(\"bitor\",\n        cname(\"bitor\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=Tensor, method={default=1}},\n         {name=real}})\n\n   wrap(\"bitxor\",\n        cname(\"bitxor\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=Tensor, method={default=1}},\n         {name=real}})\n\n   -- mod alias\n   wrap(\"mod\",\n        cname(\"fmod\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=Tensor, method={default=1}},\n         {name=real}})\n\n   wrap(\"clamp\",\n        cname(\"clamp\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=Tensor, method={default=1}},\n         {name=real},\n         {name=real}})\n\n\n   wrap(\"match\",\n        cname(\"match\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=Tensor},\n         {name=Tensor},\n         {name=real, default=1}\n        })\n\n   wrap(\"cmul\",\n        cname(\"cmul\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=Tensor, method={default=1}},\n         {name=Tensor}})\n\n   wrap(\"cpow\",\n        cname(\"cpow\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=Tensor, method={default=1}},\n         {name=Tensor}})\n\n   wrap(\"cdiv\",\n        cname(\"cdiv\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=Tensor, method={default=1}},\n         {name=Tensor}})\n\n   wrap(\"clshift\",\n        cname(\"clshift\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=Tensor, method={default=1}},\n         {name=Tensor}})\n\n   wrap(\"crshift\",\n        cname(\"crshift\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=Tensor, method={default=1}},\n         {name=Tensor}})\n\n   wrap(\"cfmod\",\n        cname(\"cfmod\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=Tensor, method={default=1}},\n         {name=Tensor}})\n\n   wrap(\"cremainder\",\n        cname(\"cremainder\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=Tensor, method={default=1}},\n         {name=Tensor}})\n\n   wrap(\"cbitand\",\n        cname(\"cbitand\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=Tensor, method={default=1}},\n         {name=Tensor}})\n\n   wrap(\"cbitor\",\n        cname(\"cbitor\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=Tensor, method={default=1}},\n         {name=Tensor}})\n\n   wrap(\"cbitxor\",\n        cname(\"cbitxor\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=Tensor, method={default=1}},\n         {name=Tensor}})\n\n   -- cmod alias\n   wrap(\"cmod\",\n        cname(\"cfmod\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=Tensor, method={default=1}},\n         {name=Tensor}})\n\n   wrap(\"addcmul\",\n        cname(\"addcmul\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=Tensor, method={default=1}},\n         {name=real, default=1},\n         {name=Tensor},\n         {name=Tensor}})\n\n   wrap(\"addcdiv\",\n        cname(\"addcdiv\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=Tensor, method={default=1}},\n         {name=real, default=1},\n         {name=Tensor},\n         {name=Tensor}})\n\n   wrap(\"mv\",\n        cname(\"addmv\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'},\n          init=function(arg)\n                  return table.concat(\n                     {\n                        arg.__metatable.init(arg),\n                        string.format(\"TH%s_resize1d(%s, %s->size[0]);\", Tensor, arg:carg(), arg.args[5]:carg())\n                     }, '\\n')\n               end,\n          precall=function(arg)\n                  return table.concat(\n                     {\n                        string.format(\"TH%s_zero(%s);\", Tensor, arg:carg()),\n                        arg.__metatable.precall(arg)\n                     }, '\\n')\n               end,\n       },\n         {name=real, default=0, invisible=true},\n         {name=Tensor, default=1, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name=Tensor, dim=2},\n         {name=Tensor, dim=1}}\n     )\n\n   wrap(\"mm\",\n        cname(\"addmm\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'},\n          init=function(arg)\n                  return table.concat(\n                     {\n                        arg.__metatable.init(arg),\n                        string.format(\"TH%s_resize2d(%s, %s->size[0], %s->size[1]);\", Tensor, arg:carg(), arg.args[5]:carg(), arg.args[6]:carg())\n                     }, '\\n')\n               end,\n          precall=function(arg)\n                  return table.concat(\n                     {\n                        string.format(\"TH%s_zero(%s);\", Tensor, arg:carg()),\n                        arg.__metatable.precall(arg)\n                     }, '\\n')\n               end,\n       },\n         {name=real, default=0, invisible=true},\n         {name=Tensor, default=1, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name=Tensor, dim=2},\n         {name=Tensor, dim=2}}\n     )\n\n   wrap(\"bmm\",\n        cname(\"baddbmm\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'},\n          init=function(arg)\n                  return table.concat(\n                     {\n                        arg.__metatable.init(arg),\n                        string.format(\"TH%s_resize3d(%s, %s->size[0], %s->size[1], %s->size[2]);\",\n                                      Tensor, arg:carg(), arg.args[5]:carg(), arg.args[5]:carg(), arg.args[6]:carg())\n                     }, '\\n')\n               end,\n          precall=function(arg)\n                  return table.concat(\n                     {\n                        string.format(\"TH%s_zero(%s);\", Tensor, arg:carg()),\n                        arg.__metatable.precall(arg)\n                     }, '\\n')\n               end,\n       },\n         {name=real, default=0, invisible=true},\n         {name=Tensor, default=1, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name=Tensor, dim=3},\n         {name=Tensor, dim=3}}\n     )\n\n   wrap(\"ger\",\n        cname(\"addr\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'},\n          init=function(arg)\n                  return table.concat(\n                     {\n                        arg.__metatable.init(arg),\n                        string.format(\"TH%s_resize2d(%s, %s->size[0], %s->size[0]);\", Tensor, arg:carg(), arg.args[5]:carg(), arg.args[6]:carg())\n                     }, '\\n')\n               end,\n          precall=function(arg)\n                     return table.concat(\n                        {\n                           string.format(\"TH%s_zero(%s);\", Tensor, arg:carg()),\n                           arg.__metatable.precall(arg)\n                        }, '\\n')\n                  end\n       },\n        {name=real, default=1, invisible=true},\n        {name=Tensor, default=1, invisible=true},\n        {name=real, default=1, invisible=true},\n        {name=Tensor, dim=1},\n        {name=Tensor, dim=1}}\n     )\n\n   for _,f in ipairs({\n                        {name=\"addmv\",   dim1=1, dim2=2, dim3=1},\n                        {name=\"addmm\",   dim1=2, dim2=2, dim3=2},\n                        {name=\"addr\",    dim1=2, dim2=1, dim3=1},\n                        {name=\"addbmm\",  dim1=2, dim2=3, dim3=3},\n                        {name=\"baddbmm\", dim1=3, dim2=3, dim3=3},\n                     }\n                  ) do\n\n      interface:wrap(f.name,\n                     cname(f.name),\n                     {{name=Tensor, default=true, returned=true},\n                      {name=real, default=1},\n                      {name=Tensor, dim=f.dim1},\n                      {name=real, default=1},\n                      {name=Tensor, dim=f.dim2},\n                      {name=Tensor, dim=f.dim3}})\n\n      -- there is an ambiguity here, hence the more complicated setup\n      method:wrap(f.name,\n                  cname(f.name),\n                  {{name=Tensor, returned=true, dim=f.dim1},\n                   {name=real, default=1, invisible=true},\n                   {name=Tensor, default=1, dim=f.dim1},\n                   {name=real, default=1},\n                   {name=Tensor, dim=f.dim2},\n                   {name=Tensor, dim=f.dim3}},\n                  cname(f.name),\n                  {{name=Tensor, returned=true, dim=f.dim1},\n                   {name=real},\n                   {name=Tensor, default=1, dim=f.dim1},\n                   {name=real},\n                   {name=Tensor, dim=f.dim2},\n                   {name=Tensor, dim=f.dim3}})\n   end\n\n   wrap(\"numel\",\n        cname(\"numel\"),\n        {{name=Tensor},\n         {name=\"ptrdiff_t\", creturned=true}})\n\n   for _,name in ipairs({\"cumsum\", \"cumprod\"}) do\n      wrap(name,\n           cname(name),\n           {{name=Tensor, default=true, returned=true},\n            {name=Tensor},\n            {name=\"index\", default=1}})\n   end\n\n   wrap(\"sum\",\n        cname(\"sumall\"),\n        {{name=Tensor},\n         {name=accreal, creturned=true}},\n        cname(\"sum\"),\n        {{name=Tensor, default=true, returned=true},\n         {name=Tensor},\n         {name=\"index\"},\n         {name=\"boolean\", default=true, invisible=true}})\n\n   wrap(\"prod\",\n        cname(\"prodall\"),\n        {{name=Tensor},\n         {name=accreal, creturned=true}},\n        cname(\"prod\"),\n        {{name=Tensor, default=true, returned=true},\n         {name=Tensor},\n         {name=\"index\"},\n         {name=\"boolean\", default=true, invisible=true}})\n\n   for _,name in ipairs({\"min\", \"max\"}) do\n      wrap(name,\n           cname(name .. \"all\"),\n           {{name=Tensor},\n            {name=real, creturned=true}},\n           cname(name),\n           {{name=Tensor, default=true, returned=true},\n            {name=\"IndexTensor\", default=true, returned=true, noreadadd=true},\n            {name=Tensor},\n            {name=\"index\"},\n            {name=\"boolean\", default=true, invisible=true}})\n   end\n\n   for _,name in ipairs({\"cmin\", \"cmax\"}) do\n      wrap(name,\n           cname(name),\n           {{name=Tensor, default=true, returned=true},\n            {name=Tensor, method={default=1}},\n            {name=Tensor}},\n           cname(name .. \"Value\"),\n           {{name=Tensor, default=true, returned=true},\n            {name=Tensor, method={default=1}},\n            {name=real}})\n   end\n\n   wrap(\"trace\",\n        cname(\"trace\"),\n        {{name=Tensor},\n         {name=accreal, creturned=true}})\n\n   wrap(\"cross\",\n        cname(\"cross\"),\n        {{name=Tensor, default=true, returned=true},\n         {name=Tensor},\n         {name=Tensor},\n         {name=\"index\", default=0}})\n\n   wrap(\"diag\",\n        cname(\"diag\"),\n        {{name=Tensor, default=true, returned=true},\n         {name=Tensor},\n         {name=\"long\", default=0}})\n\n   wrap(\"eye\",\n        cname(\"eye\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=\"long\"},\n         {name=\"long\", default=0}})\n\n   wrap(\"range\",\n        cname(\"range\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=accreal},\n         {name=accreal},\n         {name=accreal, default=1}})\n\n   wrap(\"randperm\",\n        cname(\"randperm\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'},\n          postcall=function(arg)\n                      return table.concat(\n                         {\n                            arg.__metatable.postcall(arg),\n                            string.format(\"TH%s_add(%s, %s, 1);\", Tensor, arg:carg(), arg:carg())\n                         }, '\\n')\n                   end},\n         {name=\"Generator\", default=true},\n         {name=\"long\"}})\n\n   wrap(\"sort\",\n        cname(\"sort\"),\n        {{name=Tensor, default=true, returned=true},\n         {name=\"IndexTensor\", default=true, returned=true, noreadadd=true},\n         {name=Tensor},\n         {name=\"index\", default=lastdim(3)},\n         {name=\"boolean\", default=0}})\n\nwrap(\"topk\",\n     cname(\"topk\"),\n     {{name=Tensor, default=true, returned=true},\n        {name=\"IndexTensor\", default=true, returned=true, noreadadd=true},\n        {name=Tensor},\n        {name=\"long\", default=1},\n        {name=\"index\", default=lastdim(3)},\n        {name=\"boolean\", default=0},\n        {name=\"boolean\", default=0}})\n\n   wrap(\"kthvalue\",\n        cname(\"kthvalue\"),\n        {{name=Tensor, default=true, returned=true},\n         {name=\"IndexTensor\", default=true, returned=true, noreadadd=true},\n         {name=Tensor},\n         {name=\"long\"},\n         {name=\"index\", default=lastdim(3)},\n         {name=\"boolean\", default=true, invisible=true}})\n\n   wrap(\"mode\",\n       cname(\"mode\"),\n       {{name=Tensor, default=true, returned=true},\n           {name=\"IndexTensor\", default=true, returned=true, noreadadd=true},\n           {name=Tensor},\n           {name=\"index\", default=lastdim(3)},\n           {name=\"boolean\", default=true, invisible=true}})\n\n   wrap(\"median\",\n        cname(\"median\"),\n        {{name=Tensor, default=true, returned=true},\n         {name=\"IndexTensor\", default=true, returned=true, noreadadd=true},\n         {name=Tensor},\n         {name=\"index\", default=lastdim(3)},\n         {name=\"boolean\", default=true, invisible=true}})\n\n   wrap(\"tril\",\n        cname(\"tril\"),\n        {{name=Tensor, default=true, returned=true},\n         {name=Tensor},\n         {name=\"int\", default=0}})\n\n   wrap(\"triu\",\n        cname(\"triu\"),\n        {{name=Tensor, default=true, returned=true},\n         {name=Tensor},\n         {name=\"int\", default=0}})\n\n   wrap(\"cat\",\n        cname(\"cat\"),\n        {{name=Tensor, default=true, returned=true},\n         {name=Tensor},\n         {name=Tensor},\n         {name=\"index\", default=-1}},\n        cname(\"catArray\"),\n        {{name=Tensor, default=true, returned=true},\n         {name=Tensor .. \"Array\"},\n         {name=\"index\", default=-1}})\n\n   if Tensor == 'ByteTensor' then -- we declare this only once\n      interface:print(\n         [[\nstatic long THRandom_random2__(THGenerator *gen, long a, long b)\n{\n  THArgCheck(b >= a, 2, \"upper bound must be larger than lower bound\");\n  return((THRandom_random(gen) % (b+1-a)) + a);\n}\n\nstatic long THRandom_random1__(THGenerator *gen, long b)\n{\n  THArgCheck(b > 0, 1, \"upper bound must be strictly positive\");\n  return(THRandom_random(gen) % b + 1);\n}\n         ]])\n   end\n\n   interface:print(string.gsub(\n                      [[\nstatic void THTensor_random2__(THTensor *self, THGenerator *gen, long a, long b)\n{\n  THArgCheck(b >= a, 2, \"upper bound must be larger than lower bound\");\n  TH_TENSOR_APPLY(real, self, *self_data = ((THRandom_random(gen) % (b+1-a)) + a);)\n}\n\nstatic void THTensor_random1__(THTensor *self, THGenerator *gen, long b)\n{\n  THArgCheck(b > 0, 1, \"upper bound must be strictly positive\");\n  TH_TENSOR_APPLY(real, self, *self_data = (THRandom_random(gen) % b + 1);)\n}\n]], 'Tensor', Tensor):gsub('real', real))\n\n   wrap('random',\n        'THRandom_random2__',\n        {{name='Generator', default=true},\n         {name='long'},\n         {name='long'},\n         {name='long', creturned=true}},\n        'THRandom_random1__',\n        {{name='Generator', default=true},\n         {name='long'},\n         {name='long', creturned=true}},\n        'THRandom_random',\n        {{name='Generator', default=true},\n         {name='long', creturned=true}},\n        cname(\"random2__\"),\n        {{name=Tensor, returned=true},\n         {name='Generator', default=true},\n         {name='long'},\n         {name='long'}},\n        cname(\"random1__\"),\n        {{name=Tensor, returned=true},\n         {name='Generator', default=true},\n         {name='long'}},\n        cname(\"random\"),\n        {{name=Tensor, returned=true},\n         {name='Generator', default=true}})\n\n   wrap(\"geometric\",\n     \"THRandom_geometric\",\n     {{name=\"Generator\", default=true},\n      {name=\"double\"},\n      {name=\"double\", creturned=true}},\n     cname(\"geometric\"),\n     {{name=Tensor, returned=true},\n      {name=\"Generator\", default=true},\n      {name=\"double\"}})\n\n   wrap(\"bernoulli\",\n      \"THRandom_bernoulli\",\n      {{name=\"Generator\", default=true},\n       {name=\"double\", default=0.5},\n       {name=\"double\", creturned=true}},\n      cname(\"bernoulli\"),\n      {{name=Tensor, returned=true},\n       {name=\"Generator\", default=true},\n       {name=\"double\", default=0.5}},\n      cname(\"bernoulli_FloatTensor\"),\n      {{name=Tensor, returned=true},\n       {name=\"Generator\", default=true},\n       {name=\"FloatTensor\"}},\n      cname(\"bernoulli_DoubleTensor\"),\n      {{name=Tensor, returned=true},\n       {name=\"Generator\", default=true},\n       {name=\"DoubleTensor\"}})\n\n   wrap(\"squeeze\",\n        cname(\"squeeze\"),\n        {{name=Tensor, default=true, returned=true, postcall=function(arg)\n                                                                local txt = {}\n                                                                if arg.returned then\n                                                                   table.insert(txt, string.format('if(arg%d->nDimension == 1 && arg%d->size[0] == 1)', arg.i, arg.i)) -- number\n                                                                   table.insert(txt, string.format('lua_pushnumber(L, (lua_Number)(*TH%s_data(arg%d)));', Tensor, arg.i))\n                                                                end\n                                                                return table.concat(txt, '\\n')\n                                                             end},\n         {name=Tensor}},\n        cname(\"squeeze1d\"),\n        {{name=Tensor, default=true, returned=true,\n\n          postcall=\n             function(arg)\n                local txt = {}\n                if arg.returned then\n                   table.insert(txt, string.format('if(!hasdims && arg%d->nDimension == 1 && arg%d->size[0] == 1)', arg.i, arg.i)) -- number\n                   table.insert(txt, string.format('lua_pushnumber(L, (lua_Number)(*TH%s_data(arg%d)));}', Tensor, arg.i))\n                end\n                return table.concat(txt, '\\n')\n             end},\n\n         {name=Tensor,\n\n          precall=\n             function(arg)\n                return string.format('{int hasdims = arg%d->nDimension > 1;', arg.i)\n             end},\n\n         {name=\"index\"}})\n\n   wrap(\"sign\",\n        cname(\"sign\"),\n        {{name=Tensor, default=true, returned=true, method={default='nil'}},\n         {name=Tensor, method={default=1}}})\n\n   wrap(\"conv2\",\n        cname(\"conv2Dmul\"),\n        {{name=Tensor, default=true, returned=true},\n         {name=real, default=0, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name=Tensor, dim=2},\n         {name=Tensor, dim=2},\n         {name=real, default=1, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name='charoption', values={'V', 'F'}, default='V'},\n         {name='charoption', default=\"C\", invisible=true}},\n        cname(\"conv2Dcmul\"),\n        {{name=Tensor, default=true, returned=true},\n         {name=real, default=0, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name=Tensor, dim=3},\n         {name=Tensor, dim=3},\n         {name=real, default=1, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name='charoption', values={'V', 'F'}, default='V'},\n         {name='charoption', default=\"C\", invisible=true}},\n        cname(\"conv2Dmv\"),\n        {{name=Tensor, default=true, returned=true},\n         {name=real, default=0, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name=Tensor, dim=3},\n         {name=Tensor, dim=4},\n         {name=real, default=1, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name='charoption', values={'V', 'F'}, default='V'},\n         {name='charoption', default=\"C\", invisible=true}}\n     )\n\n   wrap(\"xcorr2\",\n        cname(\"conv2Dmul\"),\n        {{name=Tensor, default=true, returned=true},\n         {name=real, default=0, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name=Tensor, dim=2},\n         {name=Tensor, dim=2},\n         {name=real, default=1, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name='charoption', values={'V', 'F'}, default='V'},\n         {name='charoption', default=\"X\", invisible=true}},\n        cname(\"conv2Dcmul\"),\n        {{name=Tensor, default=true, returned=true},\n         {name=real, default=0, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name=Tensor, dim=3},\n         {name=Tensor, dim=3},\n         {name=real, default=1, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name='charoption', values={'V', 'F'}, default='V'},\n         {name='charoption', default=\"X\", invisible=true}},\n        cname(\"conv2Dmv\"),\n        {{name=Tensor, default=true, returned=true},\n         {name=real, default=0, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name=Tensor, dim=3},\n         {name=Tensor, dim=4},\n         {name=real, default=1, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name='charoption', values={'V', 'F'}, default='V'},\n         {name='charoption', default=\"X\", invisible=true}}\n     )\n\n   wrap(\"conv3\",\n        cname(\"conv3Dmul\"),\n        {{name=Tensor, default=true, returned=true},\n         {name=real, default=0, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name=Tensor, dim=3},\n         {name=Tensor, dim=3},\n         {name=real, default=1, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name='charoption', values={'V', 'F'}, default='V'},\n         {name='charoption', default=\"C\", invisible=true}},\n        cname(\"conv3Dcmul\"),\n        {{name=Tensor, default=true, returned=true},\n         {name=real, default=0, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name=Tensor, dim=4},\n         {name=Tensor, dim=4},\n         {name=real, default=1, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name='charoption', values={'V', 'F'}, default='V'},\n         {name='charoption', default=\"C\", invisible=true}},\n        cname(\"conv3Dmv\"),\n        {{name=Tensor, default=true, returned=true},\n         {name=real, default=0, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name=Tensor, dim=4},\n         {name=Tensor, dim=5},\n         {name=real, default=1, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name='charoption', values={'V', 'F'}, default='V'},\n         {name='charoption', default=\"C\", invisible=true}}\n     )\n\n   wrap(\"xcorr3\",\n        cname(\"conv3Dmul\"),\n        {{name=Tensor, default=true, returned=true},\n         {name=real, default=0, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name=Tensor, dim=3},\n         {name=Tensor, dim=3},\n         {name=real, default=1, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name='charoption', values={'V', 'F'}, default='V'},\n         {name='charoption', default=\"X\", invisible=true}},\n        cname(\"conv3Dcmul\"),\n        {{name=Tensor, default=true, returned=true},\n         {name=real, default=0, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name=Tensor, dim=4},\n         {name=Tensor, dim=4},\n         {name=real, default=1, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name='charoption', values={'V', 'F'}, default='V'},\n         {name='charoption', default=\"X\", invisible=true}},\n        cname(\"conv3Dmv\"),\n        {{name=Tensor, default=true, returned=true},\n         {name=real, default=0, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name=Tensor, dim=4},\n         {name=Tensor, dim=5},\n         {name=real, default=1, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name=real, default=1, invisible=true},\n         {name='charoption', values={'V', 'F'}, default='V'},\n         {name='charoption', default=\"X\", invisible=true}}\n     )\n\n   for _,name in pairs({'lt','gt','le','ge','eq','ne'}) do\n      wrap(name,\n           cname(name .. 'Value'),\n           {{name='ByteTensor',default=true, returned=true},\n            {name=Tensor},\n            {name=real}},\n           cname(name .. 'ValueT'),\n           {{name=Tensor, returned=true},\n            {name=Tensor},\n            {name=real}},\n           cname(name .. 'Tensor'),\n           {{name='ByteTensor',default=true, returned=true},\n            {name=Tensor},\n            {name=Tensor}},\n           cname(name .. 'TensorT'),\n           {{name=Tensor, returned=true},\n            {name=Tensor},\n            {name=Tensor}})\n   end\n\n   wrap(\"nonzero\",\n        cname(\"nonzero\"),\n        {{name=\"IndexTensor\", default=true, returned=true},\n         {name=Tensor}})\n  end  -- ~= HalfTensor\n\n   if Tensor == 'ByteTensor' then\n     -- Logical accumulators only apply to ByteTensor\n      for _,name in ipairs({'all', 'any'}) do\n        wrap(name,\n             cname('logical' .. name),\n             {{name=Tensor},\n\t\t{name=\"boolean\", creturned=true}})\n      end\n   end\n\n   if Tensor == 'IntTensor' then\n         wrap(\"abs\",\n              cname(\"abs\"),\n              {{name=Tensor, default=true, returned=true, method={default='nil'}},\n               {name=Tensor, method={default=1}}},\n              \"abs\",\n              {{name=real},\n               {name=real, creturned=true}})\n   elseif Tensor == 'LongTensor' then\n         wrap(\"abs\",\n              cname(\"abs\"),\n              {{name=Tensor, default=true, returned=true, method={default='nil'}},\n               {name=Tensor, method={default=1}}},\n              \"labs\",\n              {{name=real},\n               {name=real, creturned=true}})\n   end\n\n   if Tensor == 'FloatTensor' or Tensor == 'DoubleTensor' then\n\n      wrap(\"mean\",\n           cname(\"meanall\"),\n           {{name=Tensor},\n            {name=accreal, creturned=true}},\n           cname(\"mean\"),\n           {{name=Tensor, default=true, returned=true},\n            {name=Tensor},\n            {name=\"index\"},\n            {name=\"boolean\", default=true, invisible=true}})\n\n      for _,name in ipairs({\"var\", \"std\"}) do\n         wrap(name,\n              cname(name .. \"all\"),\n              {{name=Tensor},\n               {name=\"boolean\", default=false},\n               {name=accreal, creturned=true}\n              },\n              cname(name),\n              {{name=Tensor, default=true, returned=true},\n               {name=Tensor},\n               {name=\"index\"},\n               {name=\"boolean\", default=false},\n               {name=\"boolean\", default=true, invisible=true}})\n      end\n      wrap(\"histc\",\n           cname(\"histc\"),\n           {{name=Tensor, default=true, returned=true},\n            {name=Tensor},\n            {name=\"long\",default=100},\n            {name=\"double\",default=0},\n            {name=\"double\",default=0}})\n\n      wrap(\"bhistc\",\n           cname(\"bhistc\"),\n           {{name=Tensor, default=true, returned=true},\n            {name=Tensor},\n            {name=\"long\",default=100},\n            {name=\"double\",default=0},\n            {name=\"double\",default=0}})\n\n      wrap(\"norm\",\n           cname(\"normall\"),\n           {{name=Tensor},\n            {name=real, default=2},\n            {name=accreal, creturned=true}},\n           cname(\"norm\"),\n           {{name=Tensor, default=true, returned=true},\n            {name=Tensor},\n            {name=real},\n            {name=\"index\"},\n            {name=\"boolean\", default=true, invisible=true}})\n\n      wrap(\"renorm\",\n           cname(\"renorm\"),\n           {{name=Tensor, default=true, returned=true, method={default='nil'}},\n            {name=Tensor, method={default=1}},\n            {name=real},\n            {name=\"index\"},\n            {name=real}})\n\n      wrap(\"dist\",\n           cname(\"dist\"),\n           {{name=Tensor},\n            {name=Tensor},\n            {name=real, default=2},\n            {name=accreal, creturned=true}})\n\n      wrap(\"linspace\",\n           cname(\"linspace\"),\n           {{name=Tensor, default=true, returned=true, method={default='nil'}},\n            {name=real},\n            {name=real},\n            {name=\"long\", default=100}})\n\n      wrap(\"logspace\",\n           cname(\"logspace\"),\n           {{name=Tensor, default=true, returned=true, method={default='nil'}},\n            {name=real},\n            {name=real},\n            {name=\"long\", default=100}})\n\n      for _,name in ipairs({\"log\", \"log1p\", \"exp\",\n                            \"cos\", \"acos\", \"cosh\",\n                            \"sin\", \"asin\", \"sinh\",\n                            \"tan\", \"atan\", \"tanh\",\n                            \"sqrt\", \"round\", \"ceil\",\n                            \"floor\", \"trunc\", }) do\n         wrap(name,\n              cname(name),\n              {{name=Tensor, default=true, returned=true, method={default='nil'}},\n               {name=Tensor, method={default=1}}},\n              name,\n              {{name=real},\n               {name=real, creturned=true}})\n      end\n\n      wrap(\"abs\",\n           cname(\"abs\"),\n           {{name=Tensor, default=true, returned=true, method={default='nil'}},\n            {name=Tensor, method={default=1}}},\n           \"fabs\",\n           {{name=real},\n            {name=real, creturned=true}})\n\n      wrap(\"frac\",\n           cname(\"frac\"),\n           {{name=Tensor, default=true, returned=true, method={default='nil'}},\n            {name=Tensor, method={default=1}}},\n           \"TH_frac\",\n           {{name=real},\n            {name=real, creturned=true}})\n\n      wrap(\"rsqrt\",\n           cname(\"rsqrt\"),\n           {{name=Tensor, default=true, returned=true, method={default='nil'}},\n            {name=Tensor, method={default=1}}},\n           \"TH_rsqrt\",\n           {{name=real},\n            {name=real, creturned=true}})\n\n      wrap(\"sigmoid\",\n           cname(\"sigmoid\"),\n           {{name=Tensor, default=true, returned=true, method={default='nil'}},\n            {name=Tensor, method={default=1}}},\n           \"TH_sigmoid\",\n           {{name=real},\n            {name=real, creturned=true}})\n\n      wrap(\"neg\",\n           cname(\"neg\"),\n           {{name=Tensor, default=true, returned=true, method={default='nil'}},\n            {name=Tensor, method={default=1}}})\n\n      wrap(\"cinv\",\n           cname(\"cinv\"),\n           {{name=Tensor, default=true, returned=true, method={default='nil'}},\n            {name=Tensor, method={default=1}}})\n\n      wrap(\"lerp\",\n           cname(\"lerp\"),\n           {{name=Tensor, default=true, returned=true, method={default='nil'}},\n            {name=Tensor, method={default=1}},\n            {name=Tensor},\n            {name=real}},\n           \"TH_lerp\",\n           {{name=real},\n            {name=real},\n            {name=real},\n            {name=real, creturned=true}})\n\n      wrap(\"atan2\",\n           cname(\"atan2\"),\n           {{name=Tensor, default=true, returned=true, method={default='nil'}},\n            {name=Tensor, method={default=1}},\n            {name=Tensor}},\n           \"atan2\",\n           {{name=real},\n            {name=real},\n            {name=real, creturned=true}})\n\n      wrap(\"pow\",\n           cname(\"pow\"),\n           {{name=Tensor, default=true, returned=true, method={default='nil'}},\n            {name=Tensor, method={default=1}},\n            {name=real}},\n           cname(\"tpow\"),\n           {{name=Tensor, default=true, returned=true, method={default='nil'}},\n            {name=real},\n            {name=Tensor, method={default=1}}},\n           \"pow\",\n           {{name=real},\n            {name=real},\n            {name=real, creturned=true}})\n\n      wrap(\"rand\",\n           cname(\"rand\"),\n           {{name=Tensor, default=true, returned=true, method={default='nil'}},\n            {name='Generator', default=true},\n            {name=\"LongArg\"}})\n\n      wrap(\"randn\",\n           cname(\"randn\"),\n           {{name=Tensor, default=true, returned=true, method={default='nil'}},\n            {name='Generator', default=true},\n            {name=\"LongArg\"}})\n\n      wrap(\"multinomial\",\n           cname(\"multinomial\"),\n           {{name=\"IndexTensor\", default=true, returned=true, method={default='nil'}},\n              {name='Generator', default=true},\n              {name=Tensor},\n              {name=\"int\"},\n              {name=\"boolean\", default=false}})\n      \n      wrap(\"multinomialAliasSetup_\",\n           cname(\"multinomialAliasSetup\"),\n           {{name=Tensor},\n              {name=\"IndexTensor\", default=true, returned=true, method={default='nil'}},\n              {name=Tensor, default=true, returned=true, method={default='nil'}}})\n      \n      wrap(\"multinomialAlias_\",\n           cname(\"multinomialAliasDraw\"),\n           {{name=\"IndexTensor\", default=true, returned=true, method={default='nil'}},\n              {name='Generator', default=true},\n              {name=\"IndexTensor\"},\n              {name=Tensor}\n              })\n      \n      for _,f in ipairs({{name='uniform', a=0, b=1},\n            {name='normal', a=0, b=1},\n            {name='cauchy', a=0, b=1},\n            {name='logNormal', a=1, b=2}}) do\n         \n         wrap(f.name,\n              string.format(\"THRandom_%s\", f.name),\n              {{name='Generator', default=true},\n               {name=\"double\", default=f.a},\n               {name=\"double\", default=f.b},\n               {name=\"double\", creturned=true}},\n              cname(f.name),\n              {{name=Tensor, returned=true},\n               {name='Generator', default=true},\n               {name=real, default=f.a},\n               {name=real, default=f.b}})\n      end\n\n      for _,f in ipairs({{name='exponential'}}) do\n\n         wrap(f.name,\n              string.format(\"THRandom_%s\", f.name),\n              {{name='Generator', default=true},\n               {name=\"double\", default=f.a},\n               {name=\"double\", creturned=true}},\n              cname(f.name),\n              {{name=Tensor, returned=true},\n               {name='Generator', default=true},\n               {name=real, default=f.a}})\n      end\n\n      for _,name in ipairs({\"gesv\",\"gels\"}) do\n         interface:wrap(name,\n                        cname(name),\n                        {{name=Tensor, returned=true},\n                         {name=Tensor, returned=true},\n                         {name=Tensor},\n                         {name=Tensor}},\n                        cname(name),\n                        {{name=Tensor, default=true, returned=true, invisible=true},\n                         {name=Tensor, default=true, returned=true, invisible=true},\n                         {name=Tensor},\n                         {name=Tensor}}\n                     )\n      end\n      interface:wrap(\"trtrs\",\n                     cname(\"trtrs\"),\n                     {{name=Tensor, returned=true},\n                      {name=Tensor, returned=true},\n                      {name=Tensor},\n                      {name=Tensor},\n                      {name='charoption', values={'U', 'L'}, default='U'},  -- uplo\n                      {name='charoption', values={'N', 'T'}, default='N'},  -- trans\n                      {name='charoption', values={'N', 'U'}, default='N'}}, -- diag\n                     cname(\"trtrs\"),\n                     {{name=Tensor, default=true, returned=true, invisible=true},\n                      {name=Tensor, default=true, returned=true, invisible=true},\n                      {name=Tensor},\n                      {name=Tensor},\n                      {name='charoption', values={'U', 'L'}, default='U'},  -- uplo\n                      {name='charoption', values={'N', 'T'}, default='N'},  -- trans\n                      {name='charoption', values={'N', 'U'}, default='N'}}  -- diag\n                  )\n\n      interface:wrap(\"symeig\",\n                     cname(\"syev\"),\n                     {{name=Tensor, returned=true},\n                      {name=Tensor, returned=true},\n                      {name=Tensor},\n                      {name='charoption', values={'N', 'V'}, default='N'},\n                      {name='charoption', values={'U', 'L'}, default='U'}},\n                     cname(\"syev\"),\n                     {{name=Tensor, default=true, returned=true, invisible=true},\n                      {name=Tensor, default=true, returned=true, invisible=true},\n                      {name=Tensor},\n                      {name='charoption', values={'N', 'V'}, default='N'},\n                      {name='charoption', values={'U', 'L'}, default='U'}}\n                  )\n      interface:wrap(\"eig\",\n                     cname(\"geev\"),\n                     {{name=Tensor, returned=true},\n                      {name=Tensor, returned=true},\n                      {name=Tensor},\n                      {name='charoption', values={'N', 'V'}, default='N'}},\n                     cname(\"geev\"),\n                     {{name=Tensor, default=true, returned=true, invisible=true},\n                      {name=Tensor, default=true, returned=true, invisible=true},\n                      {name=Tensor},\n                      {name='charoption', values={'N', 'V'}, default='N'}}\n                  )\n\n      interface:wrap(\"svd\",\n                     cname(\"gesvd\"),\n                     {{name=Tensor, returned=true},\n                      {name=Tensor, returned=true},\n                      {name=Tensor, returned=true},\n                      {name=Tensor},\n                      {name='charoption', values={'A', 'S'}, default='S'}},\n                     cname(\"gesvd\"),\n                     {{name=Tensor, default=true, returned=true, invisible=true},\n                      {name=Tensor, default=true, returned=true, invisible=true},\n                      {name=Tensor, default=true, returned=true, invisible=true},\n                      {name=Tensor},\n                      {name='charoption', values={'A', 'S'}, default='S'}}\n                  )\n      interface:wrap(\"inverse\",\n                     cname(\"getri\"),\n                     {{name=Tensor, returned=true},\n                      {name=Tensor}},\n                     cname(\"getri\"),\n                     {{name=Tensor, default=true, returned=true, invisible=true},\n                      {name=Tensor}}\n                  )\n      interface:wrap(\"potrf\",\n                     cname(\"potrf\"),\n                     {{name=Tensor, returned=true},\n                      {name=Tensor},\n                      {name='charoption', values={'U', 'L'}, default='U'}}, -- uplo\n                     cname(\"potrf\"),\n                     {{name=Tensor, default=true, returned=true, invisible=true},\n                      {name=Tensor},\n                      {name='charoption', values={'U', 'L'}, default='U'}}\n                  )\n      interface:wrap(\"potrs\",\n                     cname(\"potrs\"),\n                     {{name=Tensor, returned=true},\n                      {name=Tensor},\n                      {name=Tensor},\n                      {name='charoption', values={'U', 'L'}, default='U'}}, -- uplo\n                     cname(\"potrs\"),\n                     {{name=Tensor, default=true, returned=true, invisible=true},\n                      {name=Tensor},\n                      {name=Tensor},\n                      {name='charoption', values={'U', 'L'}, default='U'}}\n                  )\n      interface:wrap(\"potri\",\n                     cname(\"potri\"),\n                     {{name=Tensor, returned=true},\n                      {name=Tensor},\n                      {name='charoption', values={'U', 'L'}, default='U'}}, -- uplo\n                     cname(\"potri\"),\n                     {{name=Tensor, default=true, returned=true, invisible=true},\n                      {name=Tensor},\n                      {name='charoption', values={'U', 'L'}, default='U'}} -- uplo\n                    )\n      interface:wrap(\"pstrf\",\n                     cname(\"pstrf\"),\n                     {{name=Tensor, returned=true},\n                      {name='IntTensor', returned=true},\n                      {name=Tensor},\n                      {name='charoption', values={'U', 'L'}, default='U'},  -- uplo\n                      {name=real, default=-1}},\n                     cname(\"pstrf\"),\n                     {{name=Tensor, default=true, returned=true, invisible=true},\n                      {name='IntTensor', default=true, returned=true, invisible=true},\n                      {name=Tensor},\n                      {name='charoption', values={'U', 'L'}, default='U'},  -- uplo\n                      {name=real, default=-1}}\n                  )\n      interface:wrap(\"qr\",\n                     cname(\"qr\"),\n                     {{name=Tensor, returned=true},\n                      {name=Tensor, returned=true},\n                      {name=Tensor}},\n                     cname(\"qr\"),\n                     {{name=Tensor, default=true, returned=true, invisible=true},\n                      {name=Tensor, default=true, returned=true, invisible=true},\n                      {name=Tensor}}\n                  )\n      interface:wrap(\"geqrf\",\n                     cname(\"geqrf\"),\n                     {{name=Tensor, returned=true},\n                      {name=Tensor, returned=true},\n                      {name=Tensor}},\n                     cname(\"geqrf\"),\n                     {{name=Tensor, default=true, returned=true, invisible=true},\n                      {name=Tensor, default=true, returned=true, invisible=true},\n                      {name=Tensor}}\n                  )\n      interface:wrap(\"orgqr\",\n                     cname(\"orgqr\"),\n                     {{name=Tensor, returned=true},\n                      {name=Tensor},\n                      {name=Tensor}},\n                     cname(\"orgqr\"),\n                     {{name=Tensor, default=true, returned=true, invisible=true},\n                      {name=Tensor},\n                      {name=Tensor}}\n                  )\n      interface:wrap(\"ormqr\",\n                     cname(\"ormqr\"),\n                     {{name=Tensor, returned=true},\n                      {name=Tensor},\n                      {name=Tensor},\n                      {name=Tensor},\n                      {name='charoption', values={'L', 'R'}, default='L'},\n                      {name='charoption', values={'N', 'T'}, default='N'}},\n                     cname(\"ormqr\"),\n                     {{name=Tensor, default=true, returned=true, invisible=true},\n                      {name=Tensor},\n                      {name=Tensor},\n                      {name=Tensor},\n                      {name='charoption', values={'L', 'R'}, default='L'},\n                      {name='charoption', values={'N', 'T'}, default='N'}}\n                  )\n   end\n\n   method:register(string.format(\"m_torch_%sMath__\", Tensor))\n   interface:print(method:tostring())\n   method:clearhistory()\n   interface:register(string.format(\"torch_%sMath__\", Tensor))\n\n   interface:print(string.gsub([[\nstatic void torch_TensorMath_init(lua_State *L)\n{\n  luaT_pushmetatable(L, \"torch.Tensor\");\n\n  /* register methods */\n  luaT_setfuncs(L, m_torch_TensorMath__, 0);\n\n  /* register functions into the \"torch\" field of the tensor metaclass */\n  lua_pushstring(L, \"torch\");\n  lua_newtable(L);\n  luaT_setfuncs(L, torch_TensorMath__, 0);\n  lua_rawset(L, -3);\n  lua_pop(L, 1);\n}\n]], 'Tensor', Tensor))\nend\n\ninterface:dispatchregister(\"torch_TensorMath__\")\n\ninterface:print([[\nvoid torch_TensorMath_init(lua_State *L)\n{\n  torch_ByteTensorMath_init(L);\n  torch_CharTensorMath_init(L);\n  torch_ShortTensorMath_init(L);\n  torch_IntTensorMath_init(L);\n  torch_LongTensorMath_init(L);\n  torch_FloatTensorMath_init(L);\n  torch_DoubleTensorMath_init(L);\n  luaT_setfuncs(L, torch_TensorMath__, 0);\n}\n]])\n\nif arg[1] then\n   interface:tofile(arg[1])\nelse\n   print(interface:tostring())\nend\n"
  },
  {
    "path": "TensorOperator.c",
    "content": "#include \"general.h\"\n\n#define torch_TensorOperator_(NAME) TH_CONCAT_4(torch_,Real,TensorOperator_,NAME)\n#define torch_Tensor_id TH_CONCAT_3(torch_,Real,Tensor_id)\n#define torch_Tensor TH_CONCAT_STRING_3(torch.,Real,Tensor)\n\n#include \"generic/TensorOperator.c\"\n#include \"THGenerateAllTypes.h\"\n"
  },
  {
    "path": "TestSuite.lua",
    "content": "function torch.TestSuite()\n   local obj = {\n      __tests = {},\n      __isTestSuite = true\n   }\n\n   local metatable = {}\n\n   function metatable:__index(key)\n      return self.__tests[key]\n   end\n\n   function metatable:__newindex(key, value)\n      if self.__tests[key] ~= nil then\n         error(\"Test \" .. tostring(key) .. \" is already defined.\")\n      end\n      if type(value) ~= \"function\" then\n         if type(value) == \"table\" then\n            error(\"Nested tables of tests are not supported\")\n         else\n            error(\"Only functions are supported as members of a TestSuite\")\n         end\n      end\n      self.__tests[key] = value\n   end\n\n   setmetatable(obj, metatable)\n\n   return obj\nend\n"
  },
  {
    "path": "Tester.lua",
    "content": "\n-- Lua 5.2 compatibility\nlocal unpack = unpack or table.unpack\n\nlocal check = {} -- helper functions, defined at the bottom of the file\n\nlocal Tester = torch.class('torch.Tester')\n\nfunction Tester:__init()\n   self.errors = {}\n   self.tests = {}\n   self.warnings = {}\n   self._warningCount = {}\n   self.disabledTests = {}\n   self._currentTestName = ''\n\n   -- To maintain backwards compatibility (at least for a short while),\n   -- disable exact dimension checking of tensors when :assertTensorEq is\n   -- called. Thus {{1}} == {1} when this flag is true.\n   --\n   -- Note that other methods that suppose tensor checking (such as\n   -- :assertGeneralEq) ignore this flag, since previously they didn't\n   -- exist or support tensor equality checks at all, so there is no\n   -- old code that uses these functions and relies on the behaviour.\n   --\n   -- Note also that if the dimension check fails with this flag is true, then\n   -- will show a warning.\n   self._assertTensorEqIgnoresDims = true\nend\n\nfunction Tester:setEarlyAbort(earlyAbort)\n   self.earlyAbort = earlyAbort\nend\n\nfunction Tester:setRethrowErrors(rethrow)\n   self.rethrow = rethrow\nend\n\nfunction Tester:setSummaryOnly(summaryOnly)\n   self.summaryOnly = summaryOnly\nend\n\n-- Add a success to the test.\nfunction Tester:_success()\n   local name = self._currentTestName\n   self.assertionPass[name] = self.assertionPass[name] + 1\n   return true\nend\n\nfunction Tester:_addDebugInfo(message)\n   local ss = debug.traceback('tester', 3) or ''\n   ss = ss:match('.-\\n([^\\n]+\\n[^\\n]+)\\n[^\\n]+xpcall') or ''\n   local name = self._currentTestName\n   return (name ~= '' and name .. '\\n' or '') .. message .. '\\n' .. ss\nend\n\n-- Add a failure to the test.\nfunction Tester:_failure(message)\n   if self.rethrow then error(message, 2) end\n   local name = self._currentTestName\n   self.assertionFail[name] = self.assertionFail[name] + 1\n   self.errors[#self.errors + 1] = self:_addDebugInfo(message)\n   return false\nend\n\n-- Add a warning to the test\nfunction Tester:_warning(message)\n   local name = self._currentTestName\n   self._warningCount[name] = (self._warningCount[name] or 0) + 1\n   self.warnings[#self.warnings + 1] = self:_addDebugInfo(message)\nend\n\n-- Call this during a test run with `condition = true` to log a success, or with\n-- `condition = false` to log a failure (using `message`).\nfunction Tester:_assert_sub(condition, message)\n   if condition then\n      return self:_success()\n   else\n      return self:_failure(message)\n   end\nend\n\nlocal function getMessage(message, ...)\n   assert(next{...} == nil, \"Unexpected arguments passed to test function\")\n   if message then\n      assert(type(message) == 'string', 'message parameter must be a string')\n      if message ~= '' then\n         return message .. '\\n'\n      end\n   end\n   return ''\nend\n\n--[[ Historically, some test functions have accepted both a message and a\ntolerance, and some just a message (e.g., assertTableEq). Now assertTableEq\naccepts both a tolerance and a message, so allow the two arguments to be passed\nin either order to maintain backwards compatibility (and more generally,\nfor convenience). (We still document the ordering as \"tolerance, message\" for\nclarity.) This function also sanitizes them (ensures they are non-nil, etc).\n]]\nlocal function getToleranceAndMessage(defaultTolerance, ...)\n   local args = {...}\n   local message = nil\n   local tolerance = nil\n   for _, a in ipairs(args) do\n      if type(a) == 'string' then\n         if message then\n            error(\"Unexpected string argument; already have message\", a)\n         end\n         message = a .. '\\n'\n      elseif type(a) == 'number' then\n         if tolerance then\n            error(\"Unexpected number argument; already have tolerance\", a)\n         end\n         tolerance = a\n         assert(tolerance >= 0, \"tolerance cannot be negative\")\n      else\n         error(\"Unrecognized argument; should be a tolerance or message\", a)\n      end\n   end\n   message = message or ''\n   tolerance = tolerance or defaultTolerance\n   return tolerance, message\nend\n\nfunction Tester:assert(condition, ...)\n   local message = getMessage(...)\n   if type(condition) ~= 'boolean' then\n      self:_warning(\" :assert should only be used for boolean conditions. \"\n                    .. \"To check for non-nil variables, do this explicitly: \"\n                    .. \"Tester:assert(var ~= nil).\")\n   end\n   return self:_assert_sub(condition,\n                           string.format('%sBOOL violation condition=%s',\n                                         message, tostring(condition)))\nend\n\nfunction Tester:assertGeneralEq(got, expected, ...)\n   return self:_eqOrNeq(got, expected, false, ...)\nend\n\nfunction Tester:eq(got, expected, ...)\n   return self:assertGeneralEq(got, expected, ...)\nend\n\nfunction Tester:assertGeneralNe(got, unexpected, ...)\n   return self:_eqOrNeq(got, unexpected, true, ...)\nend\n\nfunction Tester:ne(got, unexpected, ...)\n   return self:assertGeneralNe(got, unexpected, ...)\nend\n\nfunction Tester:_eqOrNeq(got, expected, negate, ...)\n   local tolerance, message = getToleranceAndMessage(0, ...)\n   local success, subMessage = check.areEq(got, expected, tolerance, negate)\n   subMessage = subMessage or ''\n   return self:_assert_sub(success, message .. subMessage)\nend\n\nfunction Tester:assertlt(a, b, ...)\n   local message = getMessage(...)\n   return self:_assert_sub(a < b,\n                           string.format('%sLT failed: %s >= %s',\n                                         message, tostring(a), tostring(b)))\nend\n\nfunction Tester:assertgt(a, b, ...)\n   local message = getMessage(...)\n   return self:_assert_sub(a > b,\n                           string.format('%sGT failed: %s <= %s',\n                                         message, tostring(a), tostring(b)))\nend\n\nfunction Tester:assertle(a, b, ...)\n   local message = getMessage(...)\n   return self:_assert_sub(a <= b,\n                           string.format('%sLE failed: %s > %s',\n                                         message, tostring(a), tostring(b)))\nend\n\nfunction Tester:assertge(a, b, ...)\n   local message = getMessage(...)\n   return self:_assert_sub(a >= b,\n                           string.format('%sGE failed: %s < %s',\n                                         message, tostring(a), tostring(b)))\nend\n\nfunction Tester:assertalmosteq(a, b, ...)\n   local tolerance, message = getToleranceAndMessage(1e-16, ...)\n   local diff = math.abs(a - b)\n   return self:_assert_sub(\n         diff <= tolerance,\n         string.format(\n               '%sALMOST_EQ failed: %s ~= %s with tolerance=%s',\n               message, tostring(a), tostring(b), tostring(tolerance)))\nend\n\nfunction Tester:asserteq(a, b, ...)\n   local message = getMessage(...)\n   return self:_assert_sub(a == b,\n                           string.format('%sEQ failed: %s ~= %s',\n                                         message, tostring(a), tostring(b)))\nend\n\nfunction Tester:assertne(a, b, ...)\n   local message = getMessage(...)\n   if type(a) == type(b) and type(a) == 'table' or type(a) == 'userdata' then\n      self:_warning(\" :assertne should only be used to compare basic lua \"\n                    .. \"objects (numbers, booleans, etc). Consider using \"\n                    .. \"either :assertGeneralNe or :assert(a ~= b).\")\n   end\n   return self:_assert_sub(a ~= b,\n                           string.format('%sNE failed: %s == %s',\n                                         message, tostring(a), tostring(b)))\nend\n\nfunction Tester:assertTensorEq(ta, tb, ...)\n  return self:_assertTensorEqOrNeq(ta, tb, false, ...)\nend\n\nfunction Tester:assertTensorNe(ta, tb, ...)\n  return self:_assertTensorEqOrNeq(ta, tb, true, ...)\nend\n\nfunction Tester:_assertTensorEqOrNeq(ta, tb, negate, ...)\n   assert(torch.isTensor(ta), \"First argument should be a Tensor\")\n   assert(torch.isTensor(tb), \"Second argument should be a Tensor\")\n\n   local tolerance, message = getToleranceAndMessage(0, ...)\n   local success, subMessage =\n         check.areTensorsEq(ta, tb, tolerance, negate,\n                            self._assertTensorEqIgnoresDims)\n   subMessage = subMessage or ''\n\n   if self._assertTensorEqIgnoresDims and (not negate) and success\n         and not ta:isSameSizeAs(tb) then\n     self:_warning(\"Tensors have the same content but different dimensions. \"\n                   .. \"For backwards compatibility, they are considered equal, \"\n                   .. \"but this may change in the future. Consider using :eq \"\n                   .. \"to check for equality instead.\")\n   end\n\n   return self:_assert_sub(success, message .. subMessage)\nend\n\nfunction Tester:assertTableEq(ta, tb, ...)\n   return self:_assertTableEqOrNeq(ta, tb, false, ...)\nend\n\nfunction Tester:assertTableNe(ta, tb, ...)\n   return self:_assertTableEqOrNeq(ta, tb, true, ...)\nend\n\nfunction Tester:_assertTableEqOrNeq(ta, tb, negate, ...)\n   assert(type(ta) == 'table', \"First argument should be a Table\")\n   assert(type(tb) == 'table', \"Second argument should be a Table\")\n   return self:_eqOrNeq(ta, tb, negate, ...)\nend\n\nfunction Tester:assertError(f, ...)\n   return self:assertErrorObj(f, function() return true end, ...)\nend\n\nfunction Tester:assertNoError(f, ...)\n   local message = getMessage(...)\n   local status, err = pcall(f)\n   return self:_assert_sub(status,\n                           string.format('%sERROR violation: err=%s', message,\n                                         tostring(err)))\nend\n\nfunction Tester:assertErrorMsg(f, errmsg, ...)\n   return self:assertErrorObj(f, function(err) return err == errmsg end, ...)\nend\n\nfunction Tester:assertErrorPattern(f, errPattern, ...)\n   local function errcomp(err)\n      return string.find(err, errPattern) ~= nil\n   end\n   return self:assertErrorObj(f, errcomp, ...)\nend\n\nfunction Tester:assertErrorObj(f, errcomp, ...)\n   local message = getMessage(...)\n   local status, err = pcall(f)\n   return self:_assert_sub((not status) and errcomp(err),\n                           string.format('%sERROR violation: err=%s', message,\n                                         tostring(err)))\nend\n\nfunction Tester:add(f, name)\n   if type(f) == \"table\" then\n      assert(name == nil, \"Name parameter is forbidden for a table of tests, \"\n                          .. \"since its use is ambiguous\")\n      if f.__isTestSuite then\n         f = f.__tests\n      else\n         self:_warning(\"Should use TestSuite rather than plain lua table\")\n      end\n      for i, v in pairs(f) do\n         -- We forbid nested tests because the \"expected\" behaviour when a named\n         -- test is run in the case that the named test is in fact a table of\n         -- tests is not supported. Similar issue with _setUp and _tearDown\n         -- functions inside nested tests.\n         assert(type(v) ~= 'table', \"Nested sets of tests are not supported\")\n         self:add(v, i)\n      end\n      return self\n   end\n\n   assert(type(f) == 'function',\n          \"Only tables of functions and functions supported\")\n\n   if name == '_setUp' then\n      assert(not self._setUp, \"Only one set-up function allowed\")\n      self._setUp = f\n   elseif name == '_tearDown' then\n      assert(not self._tearDown, \"Only one tear-down function allowed\")\n      self._tearDown = f\n   else\n      name = name or 'unknown'\n      if self.tests[name] ~= nil then\n         error('Test with name ' .. name .. ' already exists!')\n      end\n      self.tests[name] = f\n   end\n   return self\nend\n\nfunction Tester:disable(testNames)\n   if type(testNames) == 'string' then\n      testNames = {testNames}\n   end\n   assert(type(testNames) == 'table', \"Expecting name or list for disable\")\n   for _, name in ipairs(testNames) do\n      assert(self.tests[name], \"Unrecognized test '\" .. name .. \"'\")\n      self.disabledTests[name] = true\n   end\n   return self\nend\n\nfunction Tester:run(testNames)\n   local tests = self:_getTests(testNames)\n   self.assertionPass = {}\n   self.assertionFail = {}\n   self.haveWarning = {}\n   self.testError = {}\n   for name in pairs(tests) do\n      self.assertionPass[name] = 0\n      self.assertionFail[name] = 0\n      self.testError[name] = 0\n      self._warningCount[name] = 0\n   end\n   self:_run(tests)\n   self:_report(tests)\n\n   -- Throws an error on test failure/error, so that test script returns\n   -- with nonzero return value.\n   for name in pairs(tests) do\n      assert(self.assertionFail[name] == 0,\n             'An error was found while running tests!')\n      assert(self.testError[name] == 0,\n             'An error was found while running tests!')\n   end\n\n   return 0\nend\n\nlocal function pluralize(num, str)\n   local stem = num .. ' ' .. str\n   if num == 1 then\n      return stem\n   else\n      return stem .. 's'\n   end\nend\n\nlocal NCOLS = 80\nlocal coloured\nlocal enable_colors, c = pcall(require, 'sys.colors')\nif arg and enable_colors then  -- have we been invoked from the commandline?\n   coloured = function(str, colour)\n      return colour .. str .. c.none\n   end\nelse\n   c = {}\n   coloured = function(str)\n      return str\n   end\nend\n\nfunction Tester:_run(tests)\n   local ntests = 0\n   for _ in pairs(tests) do\n      ntests = ntests + 1\n   end\n\n   local ntestsAsString = string.format('%u', ntests)\n   local cfmt = string.format('%%%uu/%u ', ntestsAsString:len(), ntestsAsString)\n   local cfmtlen = ntestsAsString:len() * 2 + 2\n\n   local function bracket(str)\n      return '[' .. str .. ']'\n   end\n\n   io.write('Running ' .. pluralize(ntests, 'test') .. '\\n')\n   local i = 1\n   for name, fn in pairs(tests) do\n      self._currentTestName = name\n\n      -- TODO: compute max length of name and cut it down to size if needed\n      local strinit = coloured(string.format(cfmt, i), c.cyan)\n                      .. self._currentTestName .. ' '\n                      .. string.rep('.',\n                                    NCOLS - 6 - 2 -\n                                    cfmtlen - self._currentTestName:len())\n                      .. ' '\n      io.write(strinit .. bracket(coloured('WAIT', c.cyan)))\n      io.flush()\n\n      local status, message, pass, skip\n      if self.disabledTests[name] then\n         skip = true\n      else\n         skip = false\n         if self._setUp then\n            self._setUp(name)\n         end\n         if self.rethrow then\n            status = true\n            local nerr = #self.errors\n            message = fn()\n            pass = nerr == #self.errors\n         else\n            status, message, pass = self:_pcall(fn)\n         end\n         if self._tearDown then\n            self._tearDown(name)\n         end\n      end\n\n      io.write('\\r')\n      io.write(strinit)\n\n      if skip then\n         io.write(bracket(coloured('SKIP', c.yellow)))\n      elseif not status then\n         self.testError[name] = 1\n         io.write(bracket(coloured('ERROR', c.magenta)))\n      elseif not pass then\n         io.write(bracket(coloured('FAIL', c.red)))\n      else\n         io.write(bracket(coloured('PASS', c.green)))\n         if self._warningCount[name] > 0 then\n            io.write('\\n' .. string.rep(' ', NCOLS - 10))\n            io.write(bracket(coloured('+warning', c.yellow)))\n         end\n      end\n      io.write('\\n')\n      io.flush()\n\n      if self.earlyAbort and (i < ntests) and (not status or not pass)\n            and (not skip) then\n         io.write('Aborting on first error, not all tests have been executed\\n')\n         break\n      end\n\n      i = i + 1\n\n      collectgarbage()\n   end\nend\n\nfunction Tester:_pcall(f)\n   local nerr = #self.errors\n   local stat, result = xpcall(f, debug.traceback)\n   if not stat then\n      self.errors[#self.errors + 1] =\n         self._currentTestName .. '\\n Function call failed\\n' .. result .. '\\n'\n   end\n   return stat, result, stat and (nerr == #self.errors)\nend\n\nfunction Tester:_getTests(testNames)\n   if testNames == nil then\n      return self.tests\n   end\n   if type(testNames) == 'string' then\n      testNames = {testNames}\n   end\n   assert(type(testNames) == 'table',\n          \"Only accept a name or table of test names (or nil for all tests)\")\n\n   local function getMatchingNames(pattern)\n      local matchingNames = {}\n      for name in pairs(self.tests) do\n         if string.match(name, pattern) then\n            table.insert(matchingNames, name)\n         end\n      end\n      return matchingNames\n   end\n\n   local tests = {}\n   for _, pattern in ipairs(testNames) do\n      local matchingNames = getMatchingNames(pattern)\n      assert(#matchingNames > 0, \"Couldn't find test '\" .. pattern .. \"'\")\n      for _, name in ipairs(matchingNames) do\n         tests[name] = self.tests[name]\n      end\n   end\n   return tests\nend\n\nfunction Tester:_report(tests)\n   local ntests = 0\n   local nfailures = 0\n   local nerrors = 0\n   local nskipped = 0\n   local nwarnings = 0\n   self.countasserts = 0\n   for name in pairs(tests) do\n      ntests = ntests + 1\n      self.countasserts = self.countasserts + self.assertionFail[name]\n                          + self.assertionPass[name]\n      if self.assertionFail[name] > 0 then\n         nfailures = nfailures + 1\n      end\n      if self.testError[name] > 0 then\n         nerrors = nerrors + 1\n      end\n      if self._warningCount[name] > 0 then\n         nwarnings = nwarnings + 1\n      end\n      if self.disabledTests[name] then\n         nskipped = nskipped + 1\n      end\n   end\n   if self._warningCount[''] then\n      nwarnings = nwarnings + self._warningCount['']\n   end\n\n   io.write('Completed ' .. pluralize(self.countasserts, 'assert'))\n   io.write(' in ' .. pluralize(ntests, 'test') .. ' with ')\n   io.write(coloured(pluralize(nfailures, 'failure'),\n                     nfailures == 0 and c.green or c.red))\n   io.write(' and ')\n   io.write(coloured(pluralize(nerrors, 'error'),\n                     nerrors == 0 and c.green or c.magenta))\n   if nwarnings > 0 then\n      io.write(' and ')\n      io.write(coloured(pluralize(nwarnings, 'warning'), c.yellow))\n   end\n   if nskipped > 0 then\n      io.write(' and ')\n      io.write(coloured(nskipped .. ' disabled', c.yellow))\n   end\n   io.write('\\n')\n\n   -- Prints off a message separated by -----\n   local haveSection = false\n   local function addSection(text)\n      local function printDashes()\n         io.write(string.rep('-', NCOLS) .. '\\n')\n      end\n      if not haveSection then\n         printDashes()\n         haveSection = true\n      end\n      io.write(text .. '\\n')\n      printDashes()\n   end\n\n   if not self.summaryOnly then\n      for _, v in ipairs(self.errors) do\n         addSection(v)\n      end\n      for _, v in ipairs(self.warnings) do\n         addSection(v)\n      end\n   end\nend\n\n\n--[[ Tests for tensor equality between two tensors of matching sizes and types.\n\nTests whether the maximum element-wise difference between `ta` and `tb` is less\nthan or equal to `tolerance`.\n\nArguments:\n* `ta` (tensor)\n* `tb` (tensor)\n* `tolerance` (number) maximum elementwise difference between `ta` and `tb`.\n* `negate` (boolean) if true, we invert success and failure.\n* `storage` (boolean) if true, we print an error message referring to Storages\n    rather than Tensors.\n\nReturns:\n1. success, boolean that indicates success\n2. failure_message, string or nil\n]]\nfunction check.areSameFormatTensorsEq(ta, tb, tolerance, negate, storage)\n   local function ensureHasAbs(t)\n      -- Byte, Char and Short Tensors don't have abs\n      return t.abs and t or t:double()\n   end\n\n   ta = ensureHasAbs(ta)\n   tb = ensureHasAbs(tb)\n\n   local diff = ta:clone():add(-1, tb):abs()\n   local err = diff:max()\n   local success = err <= tolerance\n   if negate then\n      success = not success\n   end\n\n   local errMessage\n   if not success then\n      local prefix = storage and 'Storage' or 'Tensor'\n      local violation = negate and 'NE(==)' or 'EQ(==)'\n      errMessage = string.format('%s%s violation: max diff=%s, tolerance=%s',\n                                 prefix,\n                                 violation,\n                                 tostring(err),\n                                 tostring(tolerance))\n   end\n\n   return success, errMessage\nend\n\n--[[ Tests for tensor equality.\n\nTests whether the maximum element-wise difference between `ta` and `tb` is less\nthan or equal to `tolerance`.\n\nArguments:\n* `ta` (tensor)\n* `tb` (tensor)\n* `tolerance` (number) maximum elementwise difference between `ta` and `tb`.\n* `negate` (boolean) if negate is true, we invert success and failure.\n* `ignoreTensorDims` (boolean, default false) if true, then tensors of the same\n    size but different dimensions can still be considered equal, e.g.,\n    {{1}} == {1}. For backwards compatibility.\n\nReturns:\n1. success, boolean that indicates success\n2. failure_message, string or nil\n]]\nfunction check.areTensorsEq(ta, tb, tolerance, negate, ignoreTensorDims)\n   ignoreTensorDims = ignoreTensorDims or false\n\n   if not ignoreTensorDims and ta:dim() ~= tb:dim() then\n      return negate, 'The tensors have different dimensions'\n   end\n\n   if ta:type() ~= tb:type() then\n      return negate, 'The tensors have different types'\n   end\n\n   -- If we are comparing two empty tensors, return true.\n   -- This is needed because some functions below cannot be applied to tensors\n   -- of dimension 0.\n   if ta:dim() == 0 and tb:dim() == 0 then\n      return not negate, 'Both tensors are empty'\n   end\n\n   local sameSize\n   if ignoreTensorDims then\n      sameSize = ta:nElement() == tb:nElement()\n   else\n      sameSize = ta:isSameSizeAs(tb)\n   end\n   if not sameSize then\n      return negate, 'The tensors have different sizes'\n   end\n\n   return check.areSameFormatTensorsEq(ta, tb, tolerance, negate, false)\nend\n\nlocal typesMatching = {\n      ['torch.ByteStorage'] = torch.ByteTensor,\n      ['torch.CharStorage'] = torch.CharTensor,\n      ['torch.ShortStorage'] = torch.ShortTensor,\n      ['torch.IntStorage'] = torch.IntTensor,\n      ['torch.LongStorage'] = torch.LongTensor,\n      ['torch.FloatStorage'] = torch.FloatTensor,\n      ['torch.DoubleStorage'] = torch.DoubleTensor,\n      ['torch.HalfStorage'] = torch.HalfTensor,\n}\n\n--[[ Tests for storage equality.\n\nTests whether the maximum element-wise difference between `sa` and `sb` is less\nthan or equal to `tolerance`.\n\nArguments:\n* `sa` (storage)\n* `sb` (storage)\n* `tolerance` (number) maximum elementwise difference between `a` and `b`.\n* `negate` (boolean) if negate is true, we invert success and failure.\n\nReturns:\n1. success, boolean that indicates success\n2. failure_message, string or nil\n]]\nfunction check.areStoragesEq(sa, sb, tolerance, negate)\n   if sa:size() ~= sb:size() then\n      return negate, 'The storages have different sizes'\n   end\n\n   local typeOfsa = torch.type(sa)\n   local typeOfsb = torch.type(sb)\n\n   if typeOfsa ~= typeOfsb then\n      return negate, 'The storages have different types'\n   end\n\n   local ta = typesMatching[typeOfsa](sa)\n   local tb = typesMatching[typeOfsb](sb)\n\n   return check.areSameFormatTensorsEq(ta, tb, tolerance, negate, true)\nend\n\n--[[ Tests for general (deep) equality.\n\nThe types of `got` and `expected` must match.\nTables are compared recursively. Keys and types of the associated values must\nmatch, recursively. Numbers are compared with the given tolerance.\nTorch tensors and storages are compared with the given tolerance on their\nelementwise difference. Other types are compared for strict equality with the\nregular Lua == operator.\n\nArguments:\n* `got`\n* `expected`\n* `tolerance` (number) maximum elementwise difference between `a` and `b`.\n* `negate` (boolean) if negate is true, we invert success and failure.\n\nReturns:\n1. success, boolean that indicates success\n2. failure_message, string or nil\n]]\nfunction check.areEq(got, expected, tolerance, negate)\n   local errMessage\n   if type(got) ~= type(expected) then\n      if not negate then\n         errMessage = 'EQ failed: values have different types (first: '\n                      .. type(got) .. ', second: ' .. type(expected) .. ')'\n      end\n      return negate, errMessage\n   elseif type(got) == 'number' then\n      local diff = math.abs(got - expected)\n      local ok = (diff <= tolerance)\n      if negate then\n         ok = not ok\n      end\n      if not ok then\n         if negate then\n            errMessage = string.format(\"NE failed: %s == %s\",\n                                       tostring(got), tostring(expected))\n         else\n            errMessage = string.format(\"EQ failed: %s ~= %s\",\n                                       tostring(got), tostring(expected))\n         end\n         if tolerance > 0 then\n            errMessage = errMessage .. \" with tolerance=\" .. tostring(tolerance)\n         end\n      end\n      return ok, errMessage\n   elseif type(expected) == \"table\" then\n     return check.areTablesEq(got, expected, tolerance, negate)\n   elseif torch.isTensor(got) then\n     return check.areTensorsEq(got, expected, tolerance, negate)\n   elseif torch.isStorage(got) then\n     return check.areStoragesEq(got, expected, tolerance, negate)\n   else\n     -- Below: we have the same type which is either userdata or a lua type\n     -- which is not a number.\n     local ok = (got == expected)\n     if negate then\n        ok = not ok\n     end\n     if not ok then\n        if negate then\n           errMessage = string.format(\"NE failed: %s (%s) == %s (%s)\",\n                                      tostring(got), type(got),\n                                      tostring(expected), type(expected))\n        else\n           errMessage = string.format(\"EQ failed: %s (%s) ~= %s (%s)\",\n                                      tostring(got), type(got),\n                                      tostring(expected), type(expected))\n        end\n     end\n     return ok, errMessage\n   end\nend\n\n--[[ Tests for (deep) table equality.\n\nTables are compared recursively. Keys and types of the associated values must\nmatch, recursively. Numbers are compared with the given tolerance.\nTorch tensors and storages are compared with the given tolerance on their\nelementwise difference. Other types are compared for strict equality with the\nregular Lua == operator.\n\nArguments:\n* `t1` (table)\n* `t2` (table)\n* `tolerance` (number) maximum elementwise difference between `a` and `b`.\n* `negate` (boolean) if negate is true, we invert success and failure.\n\nReturns:\n1. success, boolean that indicates success\n2. failure_message, string or nil\n]]\nfunction check.areTablesEq(t1, t2, tolerance, negate)\n   -- Implementation detail: Instead of doing a depth-first table comparison\n   -- check (for example, using recursion), let's do a breadth-first search\n   -- using a queue. Why? Because if we have two tables that are quite deep\n   -- (e.g., a gModule from nngraph), then if they are different then it's\n   -- more useful to the user to show how they differ at as-shallow-a-depth\n   -- as possible.\n   local queue = {}\n   queue._head = 1\n   queue._tail = 1\n   function queue.isEmpty()\n      return queue._tail == queue._head\n   end\n   function queue.pop()\n      queue._head = queue._head + 1\n      return queue[queue._head - 1]\n   end\n   function queue.push(value)\n      queue[queue._tail] = value\n      queue._tail = queue._tail + 1\n   end\n\n   queue.push({t1, t2})\n   while not queue.isEmpty() do\n      local location\n      t1, t2, location = unpack(queue.pop())\n\n      local function toSublocation(key)\n         local keyAsString = tostring(key)\n         return (location and location .. \".\" .. keyAsString) or keyAsString\n      end\n\n      for key, value1 in pairs(t1) do\n         local sublocation = toSublocation(key)\n         if t2[key] == nil then\n            return negate, string.format(\n                  \"Entry %s missing in second table (is %s in first)\",\n                  sublocation, tostring(value1))\n         end\n         local value2 = t2[key]\n         if type(value1) == 'table' and type(value2) == 'table' then\n            queue.push({value1, value2, sublocation})\n         else\n            local ok, message = check.areEq(value1, value2, tolerance, false)\n            if not ok then\n               message = 'At table location ' .. sublocation .. ': ' .. message\n               return negate, message\n            end\n         end\n      end\n\n      for key, value2 in pairs(t2) do\n         local sublocation = toSublocation(key)\n         if t1[key] == nil then\n             return negate, string.format(\n                   \"Entry %s missing in first table (is %s in second)\",\n                   sublocation, tostring(value2))\n         end\n      end\n   end\n   return not negate, 'The tables are equal'\nend\n"
  },
  {
    "path": "Timer.c",
    "content": "#include \"general.h\"\n\n#ifdef _WIN32\n\n#include <windows.h>\n#include <assert.h>\n#define TimeType __int64\nstatic __declspec( thread ) TimeType ticksPerSecond = 0;\n\n/*\n * There is an example of getrusage for windows in following link:\n * https://github.com/openvswitch/ovs/blob/master/lib/getrusage-windows.c\n */\n\n#else\n\n#include <sys/time.h>\n#include <sys/resource.h>\n#define TimeType double\n\n#endif\n\ntypedef struct _Timer\n{\n    int isRunning;\n\n    TimeType totalrealtime;\n    TimeType totalusertime;\n    TimeType totalsystime;\n\n    TimeType startrealtime;\n    TimeType startusertime;\n    TimeType startsystime;\n} Timer;\n\nstatic TimeType torch_Timer_realtime()\n{\n#ifdef _WIN32\n  TimeType current;\n  QueryPerformanceCounter(&current);\n  return current;\n#else\n  struct timeval current;\n  gettimeofday(&current, NULL);\n  return (current.tv_sec + current.tv_usec/1000000.0);\n#endif\n}\n\nstatic TimeType torch_Timer_usertime()\n{\n#ifdef _WIN32\n  return torch_Timer_realtime();\n#else\n  struct rusage current;\n  getrusage(RUSAGE_SELF, &current);\n  return (current.ru_utime.tv_sec + current.ru_utime.tv_usec/1000000.0);\n#endif\n}\n\nstatic TimeType torch_Timer_systime()\n{\n#ifdef _WIN32\n  return 0;\n#else\n  struct rusage current;\n  getrusage(RUSAGE_SELF, &current);\n  return (current.ru_stime.tv_sec + current.ru_stime.tv_usec/1000000.0);\n#endif\n}\n\nstatic int torch_Timer_new(lua_State *L)\n{\n#ifdef _WIN32\n  if (ticksPerSecond == 0)\n  {\n    assert(sizeof(LARGE_INTEGER) == sizeof(__int64));\n    QueryPerformanceFrequency(&ticksPerSecond);\n  }\n#endif\n  Timer *timer = luaT_alloc(L, sizeof(Timer));\n  timer->isRunning = 1;\n  timer->totalrealtime = 0;\n  timer->totalusertime = 0;\n  timer->totalsystime = 0;\n  timer->startrealtime = torch_Timer_realtime();\n  timer->startusertime = torch_Timer_usertime();\n  timer->startsystime = torch_Timer_systime();\n  luaT_pushudata(L, timer, \"torch.Timer\");\n  return 1;\n}\n\nstatic int torch_Timer_reset(lua_State *L)\n{\n  Timer *timer = luaT_checkudata(L, 1, \"torch.Timer\");\n  timer->totalrealtime = 0;\n  timer->totalusertime = 0;\n  timer->totalsystime = 0;\n  timer->startrealtime = torch_Timer_realtime();\n  timer->startusertime = torch_Timer_usertime();\n  timer->startsystime = torch_Timer_systime();\n  lua_settop(L, 1);\n  return 1;\n}\n\nstatic int torch_Timer_free(lua_State *L)\n{\n  Timer *timer = luaT_checkudata(L, 1, \"torch.Timer\");\n  luaT_free(L, timer);\n  return 0;\n}\n\nstatic int torch_Timer_stop(lua_State *L)\n{\n  Timer *timer = luaT_checkudata(L, 1, \"torch.Timer\");\n  if(timer->isRunning)  \n  {\n    TimeType realtime = torch_Timer_realtime() - timer->startrealtime;\n    TimeType usertime = torch_Timer_usertime() - timer->startusertime;\n    TimeType systime = torch_Timer_systime() - timer->startsystime;\n    timer->totalrealtime += realtime;\n    timer->totalusertime += usertime;\n    timer->totalsystime += systime;\n    timer->isRunning = 0;\n  }\n  lua_settop(L, 1);\n  return 1;  \n}\n\nstatic int torch_Timer_resume(lua_State *L)\n{\n  Timer *timer = luaT_checkudata(L, 1, \"torch.Timer\");\n  if(!timer->isRunning)\n  {\n    timer->isRunning = 1;\n    timer->startrealtime = torch_Timer_realtime();\n    timer->startusertime = torch_Timer_usertime();\n    timer->startsystime = torch_Timer_systime();\n  }\n  lua_settop(L, 1);\n  return 1;  \n}\n\nstatic int torch_Timer_time(lua_State *L)\n{\n  Timer *timer = luaT_checkudata(L, 1, \"torch.Timer\");\n  double realtime = (timer->isRunning ? (timer->totalrealtime + torch_Timer_realtime() - timer->startrealtime) : timer->totalrealtime);\n  double usertime = (timer->isRunning ? (timer->totalusertime + torch_Timer_usertime() - timer->startusertime) : timer->totalusertime);\n  double systime = (timer->isRunning ? (timer->totalsystime + torch_Timer_systime() - timer->startsystime) : timer->totalsystime);\n#ifdef _WIN32\n  realtime /= ticksPerSecond;\n  usertime /= ticksPerSecond;\n  systime  /= ticksPerSecond;\n#endif\n  lua_createtable(L, 0, 3);\n  lua_pushnumber(L, realtime);\n  lua_setfield(L, -2, \"real\");\n  lua_pushnumber(L, usertime);\n  lua_setfield(L, -2, \"user\");\n  lua_pushnumber(L, systime);\n  lua_setfield(L, -2, \"sys\");\n  return 1;\n}\n\nstatic int torch_Timer___tostring__(lua_State *L)\n{\n  Timer *timer = luaT_checkudata(L, 1, \"torch.Timer\");\n  lua_pushfstring(L, \"torch.Timer [status: %s]\", (timer->isRunning ? \"running\" : \"stopped\"));\n  return 1;\n}\n\nstatic const struct luaL_Reg torch_Timer__ [] = {\n  {\"reset\", torch_Timer_reset},\n  {\"stop\", torch_Timer_stop},\n  {\"resume\", torch_Timer_resume},\n  {\"time\", torch_Timer_time},\n  {\"__tostring__\", torch_Timer___tostring__},\n  {NULL, NULL}\n};\n\nvoid torch_Timer_init(lua_State *L)\n{\n  luaT_newmetatable(L, \"torch.Timer\", NULL, torch_Timer_new, torch_Timer_free, NULL);\n  luaT_setfuncs(L, torch_Timer__, 0);\n  lua_pop(L, 1);\n}\n"
  },
  {
    "path": "cmake/TorchConfig.cmake.in",
    "content": "# This (ugly) setup assumes:\n#  CMAKE_PREFIX_PATH = LUA_BINDIR\n#  CMAKE_INSTALL_PREFIX = PREFIX\n\n# Define Torch basic subpaths\nSET(Torch_INSTALL_PREFIX \"@Torch_INSTALL_PREFIX@\")\n\nSET(Torch_INSTALL_BIN_SUBDIR \"@Torch_INSTALL_BIN_SUBDIR@\")\nSET(Torch_INSTALL_MAN_SUBDIR \"@Torch_INSTALL_MAN_SUBDIR@\")\nSET(Torch_INSTALL_LIB_SUBDIR \"@Torch_INSTALL_LIB_SUBDIR@\")\nSET(Torch_INSTALL_SHARE_SUBDIR \"@Torch_INSTALL_SHARE_SUBDIR@\")\nSET(Torch_INSTALL_INCLUDE_SUBDIR \"@Torch_INSTALL_INCLUDE_SUBDIR@\")\nSET(Torch_INSTALL_CMAKE_SUBDIR \"@Torch_INSTALL_CMAKE_SUBDIR@\")\nSET(Torch_INSTALL_LUA_PATH_SUBDIR \"@Torch_INSTALL_LUA_PATH_SUBDIR@\")\nSET(Torch_INSTALL_LUA_CPATH_SUBDIR \"@Torch_INSTALL_LUA_CPATH_SUBDIR@\")\nSET(Torch_INSTALL_CMAKE_RIDBUS \"@Torch_INSTALL_CMAKE_RIDBUS@\")\n\nFILE(RELATIVE_PATH Torch_INSTALL_LUA_PATH_SUBDIR \"${Torch_INSTALL_PREFIX}\" \"${CMAKE_INSTALL_PREFIX}/lua\")\nFILE(RELATIVE_PATH Torch_INSTALL_LUA_CPATH_SUBDIR \"${Torch_INSTALL_PREFIX}\" \"${CMAKE_INSTALL_PREFIX}/lib\")\n\nSET(CMAKE_MODULE_PATH \"${Torch_INSTALL_PREFIX}/${Torch_INSTALL_CMAKE_SUBDIR}\" \"${CMAKE_MODULE_PATH}\")\nSET(CMAKE_INSTALL_PREFIX \"${Torch_INSTALL_PREFIX}\") # override\n\nINCLUDE(TorchPathsInit)\nINCLUDE(TorchPackage)\nINCLUDE(TorchWrap)\n\n# Define Torch basic targets\nINCLUDE(TorchExports)\n\nINCLUDE_DIRECTORIES(\"${Torch_INSTALL_INCLUDE}\")\nINCLUDE_DIRECTORIES(\"${Torch_INSTALL_INCLUDE}/TH\")\nLINK_DIRECTORIES(\"${Torch_INSTALL_LIB}\")\n\nMESSAGE(STATUS \"Found Torch7 in ${Torch_INSTALL_PREFIX}\")\n"
  },
  {
    "path": "cmake/TorchExports.cmake",
    "content": "INSTALL(EXPORT TH-exports\n  DESTINATION \"${Torch_INSTALL_CMAKE_SUBDIR}\"\n  FILE \"TorchExports.cmake\")\n\nCONFIGURE_FILE(\"cmake/TorchConfig.cmake.in\" \"${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/TorchConfig.cmake\" @ONLY)\nCONFIGURE_FILE(\"cmake/TorchWrap.cmake.in\" \"${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/TorchWrap.cmake\" @ONLY)\n\nINSTALL(\n  FILES\n  \"${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/TorchConfig.cmake\"\n  \"${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/TorchWrap.cmake\"\n  \"cmake/TorchPathsInit.cmake\"\n  \"cmake/TorchPackage.cmake\"\n  DESTINATION \"${Torch_INSTALL_CMAKE_SUBDIR}\")\n"
  },
  {
    "path": "cmake/TorchPackage.cmake",
    "content": "# -*- cmake -*-\n\nMACRO(ADD_TORCH_LIBRARY package type src)\n  IF (\"${type}\" STREQUAL \"STATIC\")\n    if (\"${src}\" MATCHES \"cu$\" OR \"${src}\" MATCHES \"cu;\")\n      CUDA_ADD_LIBRARY(${package} STATIC ${src})\n    else()\n      ADD_LIBRARY(${package} STATIC ${src})\n    endif()\n  ELSE()\n    if (\"${src}\" MATCHES \"cu$\" OR \"${src}\" MATCHES \"cu;\")\n      CUDA_ADD_LIBRARY(${package} ${type} ${src})\n    else()\n      ADD_LIBRARY(${package} ${type} ${src})\n    endif()\n  ENDIF()\nENDMACRO()\n\nMACRO(ADD_TORCH_PACKAGE package src luasrc)\n  INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})\n  INCLUDE_DIRECTORIES(${Torch_LUA_INCLUDE_DIR})\n\n ### C/C++ sources\n # As per CMake doc, macro arguments are not variables, so simple test syntax not working\n  IF(NOT \"${src}\" STREQUAL \"\")\n\n    ADD_TORCH_LIBRARY(${package} MODULE \"${src}\")\n\n    ### Torch packages supposes libraries prefix is \"lib\"\n    SET_TARGET_PROPERTIES(${package} PROPERTIES\n      PREFIX \"lib\"\n      IMPORT_PREFIX \"lib\"\n      INSTALL_NAME_DIR \"@executable_path/${Torch_INSTALL_BIN2CPATH}\")\n\n    IF(APPLE)\n      SET_TARGET_PROPERTIES(${package} PROPERTIES\n        LINK_FLAGS \"-undefined dynamic_lookup\")\n    ENDIF()\n\n    IF (BUILD_STATIC OR \"$ENV{STATIC_TH}\" STREQUAL \"YES\")\n      ADD_TORCH_LIBRARY(${package}_static STATIC \"${src}\")\n      SET_TARGET_PROPERTIES(${package}_static PROPERTIES\n        COMPILE_FLAGS \"-fPIC\")\n      SET_TARGET_PROPERTIES(${package}_static PROPERTIES\n        PREFIX \"lib\" IMPORT_PREFIX \"lib\" OUTPUT_NAME \"${package}\")\n    ENDIF()\n\n    INSTALL(TARGETS ${package}\n      RUNTIME DESTINATION ${Torch_INSTALL_LUA_CPATH_SUBDIR}\n      LIBRARY DESTINATION ${Torch_INSTALL_LUA_CPATH_SUBDIR})\n\n  ENDIF(NOT \"${src}\" STREQUAL \"\")\n\n  ### lua sources\n  IF(NOT \"${luasrc}\" STREQUAL \"\")\n    INSTALL(FILES ${luasrc}\n      DESTINATION ${Torch_INSTALL_LUA_PATH_SUBDIR}/${package})\n  ENDIF(NOT \"${luasrc}\" STREQUAL \"\")\n\nENDMACRO(ADD_TORCH_PACKAGE)\n"
  },
  {
    "path": "cmake/TorchPaths.cmake",
    "content": "# workaround another annoying cmake bug\n# http://public.kitware.com/Bug/view.php?id=14462\n# https://awesome.naquadah.org/bugs/index.php?do=details&task_id=869\nMACRO(NORMALIZE_PATH _path_)\n  get_filename_component(${_path_}_abs \"${${_path_}}\" ABSOLUTE)\n  SET(${_path_} \"${${_path_}_abs}\")\nENDMACRO()\n\nNORMALIZE_PATH(LUA_BINDIR)\nNORMALIZE_PATH(LUA_LIBDIR)\nNORMALIZE_PATH(LUA_INCDIR)\nNORMALIZE_PATH(LUADIR)\nNORMALIZE_PATH(LIBDIR)\n\nGET_FILENAME_COMPONENT(CMAKE_INSTALL_PREFIX \"${LUA_BINDIR}\" PATH)\n\nSET(Torch_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX})\nFILE(RELATIVE_PATH Torch_INSTALL_BIN_SUBDIR \"${CMAKE_INSTALL_PREFIX}\" \"${LUA_BINDIR}\")\nFILE(RELATIVE_PATH Torch_INSTALL_LIB_SUBDIR \"${CMAKE_INSTALL_PREFIX}\" \"${LUA_LIBDIR}\")\nFILE(RELATIVE_PATH Torch_INSTALL_INCLUDE_SUBDIR \"${CMAKE_INSTALL_PREFIX}\" \"${LUA_INCDIR}\")\n\nSET(Torch_INSTALL_MAN_SUBDIR \"share/man\" CACHE PATH\n  \"Install dir for man pages (relative to Torch_INSTALL_PREFIX)\")\n\nSET(Torch_INSTALL_SHARE_SUBDIR \"share\" CACHE PATH\n  \"Install dir for data (relative to Torch_INSTALL_PREFIX)\")\n\nSET(Torch_INSTALL_CMAKE_SUBDIR \"share/cmake/torch\" CACHE PATH\n  \"Install dir for .cmake files (relative to Torch_INSTALL_PREFIX)\")\n\nFILE(RELATIVE_PATH Torch_INSTALL_LUA_PATH_SUBDIR \"${CMAKE_INSTALL_PREFIX}\" \"${LUADIR}\")\nFILE(RELATIVE_PATH Torch_INSTALL_LUA_CPATH_SUBDIR \"${CMAKE_INSTALL_PREFIX}\" \"${LIBDIR}\")\n"
  },
  {
    "path": "cmake/TorchPathsInit.cmake",
    "content": "SET(Torch_INSTALL_BIN \"${Torch_INSTALL_PREFIX}/${Torch_INSTALL_BIN_SUBDIR}\")\nSET(Torch_INSTALL_MAN \"${Torch_INSTALL_PREFIX}/${Torch_INSTALL_MAN_SUBDIR}\")\nSET(Torch_INSTALL_LIB \"${Torch_INSTALL_PREFIX}/${Torch_INSTALL_LIB_SUBDIR}\")\nSET(Torch_INSTALL_SHARE \"${Torch_INSTALL_PREFIX}/${Torch_INSTALL_SHARE_SUBDIR}\")\nSET(Torch_INSTALL_INCLUDE \"${Torch_INSTALL_PREFIX}/${Torch_INSTALL_INCLUDE_SUBDIR}\")\n#SET(Torch_INSTALL_DOK \"${Torch_INSTALL_PREFIX}/${Torch_INSTALL_DOK_SUBDIR}\")\n#SET(Torch_INSTALL_HTML \"${Torch_INSTALL_PREFIX}/${Torch_INSTALL_HTML_SUBDIR}\")\nSET(Torch_INSTALL_CMAKE \"${Torch_INSTALL_PREFIX}/${Torch_INSTALL_CMAKE_SUBDIR}\")\nSET(Torch_INSTALL_LUA_PATH \"${Torch_INSTALL_PREFIX}/${Torch_INSTALL_LUA_PATH_SUBDIR}\")\n#SET(Torch_INSTALL_LUA_PKG_PATH \"${Torch_INSTALL_PREFIX}/${Torch_INSTALL_LUA_PKG_PATH_SUBDIR}\")\nSET(Torch_INSTALL_LUA_CPATH \"${Torch_INSTALL_PREFIX}/${Torch_INSTALL_LUA_CPATH_SUBDIR}\")\n#SET(Torch_INSTALL_LUAROCKS_SYSCONF \"${Torch_INSTALL_PREFIX}/${Torch_INSTALL_LUAROCKS_SYSCONF_SUBDIR}\")\n\n# reverse relative path to prefix (ridbus is the palindrom of subdir)\nFILE(RELATIVE_PATH Torch_INSTALL_BIN_RIDBUS \"${Torch_INSTALL_BIN}\" \"${Torch_INSTALL_PREFIX}/.\")\nFILE(RELATIVE_PATH Torch_INSTALL_CMAKE_RIDBUS \"${Torch_INSTALL_CMAKE}\" \"${Torch_INSTALL_PREFIX}/.\")\nGET_FILENAME_COMPONENT(Torch_INSTALL_BIN_RIDBUS \"${Torch_INSTALL_BIN_RIDBUS}\" PATH)\nGET_FILENAME_COMPONENT(Torch_INSTALL_CMAKE_RIDBUS \"${Torch_INSTALL_CMAKE_RIDBUS}\" PATH)\n\nIF(UNIX)\n  OPTION(WITH_RPATH \"Build libraries with executable rpaths\" ON)\n\n  IF(WITH_RPATH)\n    SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)\n    FILE(RELATIVE_PATH Torch_INSTALL_BIN2LIB\n      \"${Torch_INSTALL_BIN}\" \"${Torch_INSTALL_LIB}\")\n    IF(APPLE)\n      SET(CMAKE_MACOSX_RPATH TRUE) # @rpath in libs\n      SET(CMAKE_INSTALL_RPATH \"@executable_path/${Torch_INSTALL_BIN2LIB}\") # exec\n    ELSE()\n      SET(CMAKE_INSTALL_RPATH \"\\$ORIGIN/${Torch_INSTALL_BIN2LIB}\")\n    ENDIF()\n  ELSE()\n    SET(CMAKE_MACOSX_RPATH FALSE) # no @rpath in libs\n  ENDIF()\n\nENDIF(UNIX)\n\nIF (WIN32)\n  SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY \"${CMAKE_BINARY_DIR}\")\n  SET(CMAKE_LIBRARY_OUTPUT_DIRECTORY \"${CMAKE_BINARY_DIR}\")\nENDIF (WIN32)\n"
  },
  {
    "path": "cmake/TorchWrap.cmake",
    "content": "MACRO(ADD_TORCH_WRAP target luafile)\n  INCLUDE_DIRECTORIES(\"${CMAKE_CURRENT_BINARY_DIR}\")\n  GET_FILENAME_COMPONENT(_file_ \"${luafile}\" NAME_WE)\n  SET(cfile \"${_file_}.c\")\n  IF (DEFINED CWRAP_CUSTOM_LUA)\n    ADD_CUSTOM_COMMAND(\n\tOUTPUT \"${CMAKE_CURRENT_BINARY_DIR}/${cfile}\"\n\tCOMMAND ${CWRAP_CUSTOM_LUA} ARGS \"${CMAKE_CURRENT_SOURCE_DIR}/${luafile}\" \"${CMAKE_CURRENT_BINARY_DIR}/${cfile}\"\n    \tWORKING_DIRECTORY \"${CMAKE_CURRENT_SOURCE_DIR}\"\n    \tDEPENDS \"${luafile}\")\n  ELSE (DEFINED CWRAP_CUSTOM_LUA)\n    ADD_CUSTOM_COMMAND(\n\tOUTPUT \"${CMAKE_CURRENT_BINARY_DIR}/${cfile}\"\n      \tCOMMAND ${LUA} ARGS \"${CMAKE_CURRENT_SOURCE_DIR}/${luafile}\" \"${CMAKE_CURRENT_BINARY_DIR}/${cfile}\"\n      \tWORKING_DIRECTORY \"${CMAKE_CURRENT_SOURCE_DIR}\"\n      \tDEPENDS \"${luafile}\")\n  ENDIF (DEFINED CWRAP_CUSTOM_LUA)\nENDMACRO(ADD_TORCH_WRAP)\n"
  },
  {
    "path": "cmake/TorchWrap.cmake.in",
    "content": "MACRO(ADD_TORCH_WRAP target luafile)\n  INCLUDE_DIRECTORIES(\"${CMAKE_CURRENT_BINARY_DIR}\")\n  GET_FILENAME_COMPONENT(_file_ \"${luafile}\" NAME_WE)\n  SET(cfile \"${_file_}.c\")\n  IF (DEFINED CWRAP_CUSTOM_LUA)\n    ADD_CUSTOM_COMMAND(\n\tOUTPUT \"${CMAKE_CURRENT_BINARY_DIR}/${cfile}\"\n\tCOMMAND ${CWRAP_CUSTOM_LUA} ARGS \"${CMAKE_CURRENT_SOURCE_DIR}/${luafile}\" \"${CMAKE_CURRENT_BINARY_DIR}/${cfile}\"\n    \tWORKING_DIRECTORY \"${CMAKE_CURRENT_SOURCE_DIR}\"\n    \tDEPENDS \"${luafile}\")\n  ELSE (DEFINED CWRAP_CUSTOM_LUA)\n    ADD_CUSTOM_COMMAND(\n\tOUTPUT \"${CMAKE_CURRENT_BINARY_DIR}/${cfile}\"\n      \tCOMMAND @LUA@ ARGS \"${CMAKE_CURRENT_SOURCE_DIR}/${luafile}\" \"${CMAKE_CURRENT_BINARY_DIR}/${cfile}\"\n      \tWORKING_DIRECTORY \"${CMAKE_CURRENT_SOURCE_DIR}\"\n      \tDEPENDS \"${luafile}\")\n  ENDIF (DEFINED CWRAP_CUSTOM_LUA)\n  ADD_CUSTOM_TARGET(${target} DEPENDS \"${CMAKE_CURRENT_BINARY_DIR}/${cfile}\")\nENDMACRO(ADD_TORCH_WRAP)\n"
  },
  {
    "path": "doc/cmdline.md",
    "content": "<a name=\"torch.CmdLine.dok\"></a>\n# CmdLine #\n\nThis class provides a parameter parsing framework which is very\nuseful when one needs to run several experiments that rely on\ndifferent parameter settings that are passed in the command line.\nThis class will also override the default print function to direct\nall the output to a log file as well as screen at the same time.\n\nA sample `lua` file is given below that makes use of `CmdLine`\nclass.\n\n```lua\n\ncmd = torch.CmdLine()\ncmd:text()\ncmd:text()\ncmd:text('Training a simple network')\ncmd:text()\ncmd:text('Options')\ncmd:option('-seed',123,'initial random seed')\ncmd:option('-booloption',false,'boolean option')\ncmd:option('-stroption','mystring','string option')\ncmd:text()\n\n-- parse input params\nparams = cmd:parse(arg)\n\nparams.rundir = cmd:string('experiment', params, {dir=true})\npaths.mkdir(params.rundir)\n\n-- create log file\ncmd:log(params.rundir .. '/log', params)\n\n```\n\nWhen this file is run on the th command line as follows\n```shell\n# th myscript.lua\n```\n\nIt will produce the following output:\n\n```\n[program started on Tue Jan 10 15:33:49 2012]\n[command line arguments]\nbooloption\tfalse\nseed\t123\nrundir\texperiment\nstroption\tmystring\n[----------------------]\nbooloption\tfalse\nseed\t123\nrundir\texperiment\nstroption\tmystring\n```\n\nThe same output will also be written to file\n`experiment/log`. Whenever one of the options are passed on the\ncommand line and is different than the default value, the `rundir`\nis name is produced to reflect the parameter setting.\n\n```shell\n# th myscript.lua -seed 456 -stroption mycustomstring\n```\n\nThis will produce the following output:\n\n```\n[program started on Tue Jan 10 15:36:55 2012]\n[command line arguments]\nbooloption\tfalse\nseed\t456\nrundir\texperiment,seed=456,stroption=mycustomstring\nstroption\tmycustomstring\n[----------------------]\nbooloption\tfalse\nseed\t456\nrundir\texperiment,seed=456,stroption=mycustomstring\nstroption\tmycustomstring\n```\n\nand the output will be logged in\n`experiment,seed=456,stroption=mycustomstring/log`\n\n<a name=\"torch.CmdLine.addtime\"></a>\n### addTime([name] [,format]) ###\n\nAdds a prefix to every line in the log file with the date/time in the\ngiven format with an optional name argument. The date/time format is\nthe same as `os.date()`. Note that the prefix is only added to the\nlog file, not the screen output. The default value for name is empty\nand the default format is '%F %T'.\n\nThe final produced output for the following command is:\n\n```lua\n> cmd:addTime('your project name','%F %T')\n> print('Your log message')\n```\n\n```\n2012-02-07 08:21:56[your project name]: Your log message\n```\n\n<a name=\"torch.CmdLine.log\"></a>\n### log(filename, parameter_table) ###\n\nIt sets the log filename to `filename` and prints the values of\nparameters in the `parameter_table`. If filename is an open file\ndescriptor, it will write to the file instead of creating a new one.\n\n<a name=\"torch.CmdLine.option\"></a>\n### option(name, default, help) ###\n\nStores an option argument. The name should always start with '-'.\n\n<a name=\"torch.CmdLine.parse\"></a>\n### [table] parse(arg) ###\n\nParses a given table, `arg` is by default the argument table that \nis created by `lua` using the command line arguments passed to the \nexecutable. Returns a table of option values.\n\n<a name=\"torch.CmdLine.silent\"></a>\n### silent() ###\n\nSilences the output to standard output. The only output is written to\nthe log file.\n\n<a name=\"torch.CmdLine.string\"></a>\n### [string] string(prefix, params, ignore) ###\n\nReturns a string representation of the options by concatenating the\nnon-default options. `ignore` is a table `{dir=true}`, which will\nensure that option named `dir` will be ignored while creating the\nstring representation.\n\nThis function is useful for creating unique experiment directories that\ndepend on the parameter settings.\n\n<a name=\"torch.CmdLine.text\"></a>\n### text(string) ###\n\nLogs a custom text message.\n\n\n\n"
  },
  {
    "path": "doc/diskfile.md",
    "content": "<a name=\"torch.DiskFile.dok\"></a>\n# DiskFile #\n\nParent classes: [File](file.md)\n\nA `DiskFile` is a particular `File` which is able to perform basic read/write operations\non a file stored on disk. It implements all methods described in [File](file.md), and\nsome additional methods relative to _endian_ encoding.\n\nBy default, a `DiskFile` is in [ASCII](file.md#torch.File.ascii) mode. If changed to\nthe [binary](file.md#torch.File.binary) mode, the default endian encoding is the native\ncomputer one.\n\nThe file might be open in read, write, or read-write mode, depending on the parameter\n`mode` (which can take the value `\"r\"`, `\"w\"` or `\"rw\"` respectively)\ngiven to the [torch.DiskFile(fileName, mode)](#torch.DiskFile).\n\n<a name=\"torch.DiskFile\"></a>\n### torch.DiskFile(fileName, [mode], [quiet]) ###\n\n_Constructor_ which opens `fileName` on disk, using the given `mode`. Valid `mode` are\n`\"r\"` (read), `\"w\"` (write) or `\"rw\"` (read-write). Default is read mode.\n\nIf read-write mode, the file _will be created_ if it does not exists. If it\nexists, it will be positioned at the beginning of the file after opening.\n\nIf (and only if) `quiet` is `true`, no error will be raised in case of\nproblem opening the file: instead `nil` will be returned.\n\nThe file is opened in [ASCII](file.md#torch.File.ascii) mode by default.\n\n<a name=\"torch.DiskFile.bigEndianEncoding\"></a>\n### bigEndianEncoding() ###\n\nIn [binary](file.md#torch.File.binary) mode, force encoding in _big endian_.\n(_big end first_: decreasing numeric significance with increasing memory\naddresses)\n\n<a name=\"torch.DiskFile.isBigEndianCPU\"></a>\n### [boolean] isBigEndianCPU() ###\n\nReturns `true` if, and only if, the computer CPU operates in _big endian_.\n_Big end first_: decreasing numeric significance with increasing\nmemory addresses.\n\n<a name=\"torch.DiskFile.isLittleEndianCPU\"></a>\n### [boolean] isLittleEndianCPU() ###\n\nReturns `true` if, and only if, the computer CPU operates in _little endian_.\n_Little end first_: increasing numeric significance with increasing\nmemory addresses.\n\n<a name=\"torch.DiskFile.littleEndianEncoding\"></a>\n### littleEndianEncoding() ###\n\nIn [binary](file.md#torch.File.binary) mode, force encoding in _little endian_.\n(_little end first_: increasing numeric significance with increasing memory\naddresses)\n\n<a name=\"torch.DiskFile.nativeEndianEncoding\"></a>\n### nativeEndianEncoding() ###\n\nIn [binary](file.md#torch.File.binary) mode, force encoding in _native endian_.\n\n<a name=\"torch.DiskFile.longSize\"/></a>\n### longSize([size]) ###\n\nLongs will be written and read from the file as `size` bytes long, which\ncan be 0, 4 or 8. 0 means system default.\n\n<a name=\"torch.DiskFile.noBuffer\"/></a>\n### noBuffer() ###\n\nDisables read and write buffering on the `DiskFile`.\n"
  },
  {
    "path": "doc/file.md",
    "content": "<a name=\"torch.File.dok\"></a>\n# File #\n\nThis is an _abstract_ class. It defines most methods implemented by its\nchild classes, like [DiskFile](diskfile.md),\n[MemoryFile](memoryfile.md) and [PipeFile](pipefile.md).\n\nMethods defined here are intended for basic read/write functionalities.\nRead/write methods might write in [ASCII](#torch.File.ascii) mode or\n[binary](#torch.File.binary) mode.\n\nIn [ASCII](#torch.File.ascii) mode, numbers are converted in human readable\nformat (characters). Booleans are converted into `0` (false) or `1` (true).\nIn [binary](#torch.File.binary) mode, numbers and boolean are directly encoded\nas represented in a register of the computer. While not being human\nreadable and less portable, the binary mode is obviously faster.\n\nIn [ASCII](#torch.File.ascii) mode, if the default option\n[autoSpacing()](#torch.File.autoSpacing) is chosen, a space will be generated\nafter each written number or boolean. A carriage return will also be added\nafter each call to a write method. With this option, the spaces are\nsupposed to exist while reading. This option can be deactivated with\n[noAutoSpacing()](#torch.File.noAutoSpacing).\n\nA `Lua` error might or might not be generated in case of read/write error\nor problem in the file. This depends on the choice made between\n[quiet()](#torch.File.quiet) and [pedantic()](#torch.File.pedantic) options. It\nis possible to query if an error occurred in the last operation by calling\n[hasError()](#torch.File.hasError).\n\n<a name=\"torch.File.read\"></a>\n## Read methods ##\n<a name=\"torch.File.readByte\"></a>\n<a name=\"torch.File.readBool\"></a>\n<a name=\"torch.File.readShort\"></a>\n<a name=\"torch.File.readChar\"></a>\n<a name=\"torch.File.readLong\"></a>\n<a name=\"torch.File.readInt\"></a>\n<a name=\"torch.File.readDouble\"></a>\n<a name=\"torch.File.readFloat\"></a>\n\nThey are three types of reading methods:\n\n  - `[number] readTYPE()`\n  - `[TYPEStorage] readTYPE(n)`\n  - `[number] readTYPE(TYPEStorage)`\n\nwhere `TYPE` can be either `Byte`, `Char`, `Short`, `Int`, `Long`, `Float` or `Double`.\n\nA convenience method also exist for boolean types: `[boolean] readBool()`. It reads\na value on the file with `readInt()` and returns `true` if and only if this value is `1`. It is not possible\nto read storages of booleans.\n\nAll these methods depends on the encoding choice: [ASCII](#torch.File.ascii)\nor [binary](#torch.File.binary) mode.  In [ASCII](#torch.File.ascii) mode, the\noption [autoSpacing()](#torch.File.autoSpacing) and\n[noAutoSpacing()](#torch.File.noAutoSpacing) have also an effect on these\nmethods.\n\nIf no parameter is given, one element is returned. This element is\nconverted to a `Lua` number when reading.\n\nIf `n` is given, `n` values of the specified type are read\nand returned in a new [Storage](storage.md) of that particular type.\nThe storage size corresponds to the number of elements actually read.\n\nIf a `Storage` is given, the method will attempt to read a number of elements\nequals to the size of the given storage, and fill up the storage with these elements.\nThe number of elements actually read is returned.\n\nIn case of read error, these methods will call the `Lua` error function using the default\n[pedantic](#torch.File.pedantic) option, or stay quiet with the [quiet](#torch.File.quiet)\noption. In the latter case, one can check if an error occurred with\n[hasError()](#torch.File.hasError).\n\n<a name=\"torch.File.write\"></a>\n## Write methods ##\n<a name=\"torch.File.writeByte\"></a>\n<a name=\"torch.File.writeBool\"></a>\n<a name=\"torch.File.writeShort\"></a>\n<a name=\"torch.File.writeChar\"></a>\n<a name=\"torch.File.writeLong\"></a>\n<a name=\"torch.File.writeInt\"></a>\n<a name=\"torch.File.writeDouble\"></a>\n<a name=\"torch.File.writeFloat\"></a>\n\nThey are two types of writing methods:\n\n  - `[number] writeTYPE(number)`\n  - `[number] writeTYPE(TYPEStorage)`\n\nwhere `TYPE` can be either `Byte`, `Char`, `Short`, `Int`, `Long`, `Float` or `Double`.\n\nA convenience method also exist for boolean types: `writeBool(value)`. If `value` is `nil` or\nnot `true` a it is equivalent to a `writeInt(0)` call, else to `writeInt(1)`. It is not possible\nto write storages of booleans.\n\nAll these methods depends on the encoding choice: [ASCII](#torch.File.ascii)\nor [binary](#torch.File.ascii) mode.  In [ASCII](#torch.File.ascii) mode, the\noption [autoSpacing()](#torch.File.autoSpacing) and\n[noAutoSpacing()](#torch.File.noAutoSpacing) have also an effect on these\nmethods.\n\nIf one `Lua` number is given, this number is converted according to the\nname of the method when writing (e.g. `writeInt(3.14)` will write `3`).\n\nIf a `Storage` is given, the method will attempt to write all the elements contained\nin the storage.\n\nThese methods return the number of elements actually written.\n\nIn case of write error, these methods will call the `Lua` error function using the default\n[pedantic](#torch.File.pedantic) option, or stay quiet with the [quiet](#torch.File.quiet)\noption. In the latter case, one can check if an error occurred with\n[hasError()](#torch.File.hasError).\n\n<a name=\"torch.File.serialization\"></a>\n## Serialization methods ##\n\nThese methods allow the user to save any serializable objects on disk and\nreload it later in its original state. In other words, it can perform a\n_deep_ copy of an object into a given `File`.\n\nSerializable objects are `Torch` objects having a `read()` and\n`write()` method. `Lua` objects such as `table`, `number` or\n`string` or _pure Lua_ functions are also serializable.\n\nIf the object to save contains several other objects (let say it is a tree\nof objects), then objects appearing several times in this tree will be\n_saved only once_. This saves disk space, speeds up loading/saving and\nrespects the dependencies between objects.\n\nInterestingly, if the `File` is a [MemoryFile](memoryfile.md), it allows\nthe user to easily make a _clone_ of any serializable object:\n```lua\nfile = torch.MemoryFile() -- creates a file in memory\nfile:writeObject(object) -- writes the object into file\nfile:seek(1) -- comes back at the beginning of the file\nobjectClone = file:readObject() -- gets a clone of object\n```\n\n<a name=\"torch.File.readObject\"></a>\n### readObject() ###\n\nReturns the next [serializable](#torch.File.serialization) object saved beforehand\nin the file with [writeObject()](#torch.File.writeObject).\n\nNote that objects which were [written](#torch.File.writeObject) with the same\nreference have still the same reference after loading.\n\nExample:\n```lua\n-- creates an array which contains twice the same tensor\narray = {}\nx = torch.Tensor(1)\ntable.insert(array, x)\ntable.insert(array, x)\n\n-- array[1] and array[2] refer to the same address\n-- x[1] == array[1][1] == array[2][1] == 3.14\narray[1][1] = 3.14\n\n-- write the array on disk\nfile = torch.DiskFile('foo.asc', 'w')\nfile:writeObject(array)\nfile:close() -- make sure the data is written\n\n-- reload the array\nfile = torch.DiskFile('foo.asc', 'r')\narrayNew = file:readObject()\n\n-- arrayNew[1] and arrayNew[2] refer to the same address!\n-- arrayNew[1][1] == arrayNew[2][1] == 3.14\n-- so if we do now:\narrayNew[1][1] = 2.72\n-- arrayNew[1][1] == arrayNew[2][1] == 2.72 !\n```\n\n<a name=\"torch.File.writeObject\"></a>\n### writeObject(object) ###\n\nWrites `object` into the file. This object can be read later using\n[readObject()](#torch.File.readObject). Serializable objects are `Torch`\nobjects having a `read()` and `write()` method. `Lua` objects such as\n`table`, `number` or `string` or pure Lua functions are also serializable.\n\nIf the object has been already written in the file, only a _reference_ to\nthis already saved object will be written: this saves space an speed-up\nwriting; it also allows to keep the dependencies between objects intact.\n\nIn returns, if one writes an object, modifies its member, and writes the\nobject again in the same file, the modifications will not be recorded\nin the file, as only a reference to the original will be written. See\n[readObject()](#torch.File.readObject) for an example.\n\n<a name=\"torch.File.readString\"></a>\n### [string] readString(format) ###\n\nIf `format` starts with `\"*l\"` then returns the next line in the `File`. The end-of-line character is skipped.\n\nIf `format` starts with `\"*a\"` then returns all the remaining contents of the `File`.\n\nIf no data is available, then an error is raised, except if `File` is in [quiet()](#torch.File.quiet) mode where\nit then returns an empty string `''` and after that you'll be able to see that last reading failed due to end of file with your_file:[hasError()](#torch.File.hasError).\n\nBecause Torch is more precise on number typing, the `Lua` format `\"*n\"` is not supported:\ninstead use one of the [number read methods](#torch.File.read).\n\n<a name=\"torch.File.writeString\"></a>\n### [number] writeString(str) ###\n\nWrites the string `str` in the `File`. If the string cannot be written completely an error is raised, except\nif `File` is in [quiet()](#torch.File.quiet) mode where it returns the number of character actually written.\n\n## General Access and Control Methods ##\n\n<a name=\"torch.File.ascii\"></a>\n### ascii() [default] ###\n\nThe data read or written will be in `ASCII` mode: all numbers are converted\nto characters (human readable format) and boolean are converted to `0`\n(false) or `1` (true). The input-output format in this mode depends on the\noptions [autoSpacing()](#torch.File.autoSpacing) and\n[noAutoSpacing()](#torch.File.noAutoSpacing).\n\n<a name=\"torch.File.autoSpacing\"></a>\n### autoSpacing() [default] ###\n\nIn [ASCII](#torch.File.ascii) mode, write additional spaces around the elements\nwritten on disk: if writing a [Storage](storage.md), a space will be\ngenerated between each _element_ and a _return line_ after the last\nelement. If only writing one element, a _return line_ will be generated\nafter this element.\n\nThose spaces are supposed to exist while reading in this mode.\n\nThis is the default behavior. You can de-activate this option with the\n[noAutoSpacing()](#torch.File.noAutoSpacing) method.\n\n<a name=\"torch.File.binary\"></a>\n### binary() ###\n\nThe data read or written will be in binary mode: the representation in the\n`File` is the same that the one in the computer memory/register (not human\nreadable).  This mode is faster than [ASCII](#torch.File.ascii) but less\nportable.\n\n<a name=\"torch.File.clearError\"></a>\n### clearError() ###\n\nClear the error.flag returned by [hasError()](#torch.File.hasError).\n\n<a name=\"torch.File.close\"></a>\n### close() ###\n\nClose the file. Any subsequent operation will generate a `Lua` error.\n\n<a name=\"torch.File.noAutoSpacing\"></a>\n### noAutoSpacing() ###\n\nIn [ASCII](#torch.File.ascii) mode, do not put extra spaces between element\nwritten on disk. This is the contrary of the option\n[autoSpacing()](#torch.File.autoSpacing).\n\n<a name=\"torch.File.synchronize\"></a>\n### synchronize() ###\n\nIf the child class bufferize the data while writing, ensure that the data\nis actually written.\n\n\n<a name=\"torch.File.pedantic\"></a>\n### pedantic() [default] ###\n\nIf this mode is chosen (which is the default), a `Lua` error will be\ngenerated in case of error (which will cause the program to stop).\n\nIt is possible to use [quiet()](#torch.File.quiet) to avoid `Lua` error generation\nand set a flag instead.\n\n<a name=\"torch.File.position\"></a>\n### [number] position() ###\n\nReturns the current position (in bytes) in the file.\nThe first position is `1` (following Lua standard indexing).\n\n<a name=\"torch.File.quiet\"></a>\n### quiet() ###\n\nIf this mode is chosen instead of [pedantic()](#torch.File.pedantic), no `Lua`\nerror will be generated in case of read/write error. Instead, a flag will\nbe raised, readable through [hasError()](#torch.File.hasError). This flag can\nbe cleared with [clearError()](#torch.File.clearError)\n\nChecking if a file is quiet can be performed using [isQuiet()](#torch.File.isQuiet).\n\n<a name=\"torch.File.seek\"></a>\n### seek(position) ###\n\nJump into the file at the given `position` (in byte). Might generate/raise\nan error in case of problem. The first position is `1` (following Lua standard indexing).\n\n<a name=\"torch.File.seekEnd\"></a>\n### seekEnd() ###\n\nJump at the end of the file. Might generate/raise an error in case of\nproblem.\n\n## File state query ##\n\nThese methods allow the user to query the state of the given `File`.\n\n<a name=\"torch.File.hasError\"></a>\n### [boolean] hasError() ###\n\nReturns if an error occurred since the last [clearError()](#torch.File.clearError) call, or since\nthe opening of the file if `clearError()` has never been called.\n\n<a name=\"torch.File.isQuiet\"></a>\n### [boolean] isQuiet() ###\n\nReturns a boolean which tells if the file is in [quiet](#torch.File.quiet) mode or not.\n\n<a name=\"torch.File.isReadable\"></a>\n### [boolean] isReadable() ###\n\nTells if one can read the file or not.\n\n<a name=\"torch.File.isWritable\"></a>\n### [boolean] isWritable() ###\n\nTells if one can write in the file or not.\n\n<a name=\"torch.File.isAutoSpacing\"></a>\n### [boolean] isAutoSpacing() ###\n\nReturn `true` if [autoSpacing](#torch.File.autoSpacing) has been chosen.\n\n<a name=\"torch.File.referenced\"></a>\n### referenced(ref) ###\n\nSets the referenced property of the File to `ref`. `ref` has to be `true`\nor `false`.\n\nBy default `ref` is true, which means that a File object keeps track of\nobjects written (using [writeObject](#torch.File.writeObject) method) or\nread (using [readObject](#torch.File.readObject) method). Objects with the\nsame address will be written or read only once, meaning that this approach\npreserves shared memory structured.\n\nKeeping track of references has a cost: every object which is serialized in\nthe file is kept alive (even if one discards the object after\nwriting/reading) as File needs to track their pointer. This is not always a\ndesirable behavior, especially when dealing with large data structures.\n\nAnother typical example when does not want reference tracking is when\none needs to push the same tensor repeatedly into a file but every time\nchanging its contents: calling `referenced(false)` ensures desired\nbehaviour.\n\n<a name=\"torch.File.isReferenced\"></a>\n### isReferenced() ###\n\nReturns the state set by [referenced](#torch.File.referenced).\n"
  },
  {
    "path": "doc/index.md",
    "content": "<a name=\"torch.reference.dok\"></a>\n# Torch Package Reference Manual #\n\n[![Join the chat at https://gitter.im/torch/torch7](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/torch/torch7?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)\n[![Build Status](https://travis-ci.org/torch/torch7.svg)](https://travis-ci.org/torch/torch7)\n\n__Torch__ is the main package in [Torch7](http://torch.ch) where data\nstructures for multi-dimensional tensors and mathematical operations\nover these are defined. Additionally, it provides many utilities for\naccessing files, serializing objects of arbitrary types and other\nuseful utilities.\n\n<a name=\"torch.reference.dok\"></a>\n## Torch Packages ##\n\n  * Tensor Library\n    * [Tensor](tensor.md) defines the _all powerful_ tensor object that provides multi-dimensional numerical arrays with type templating.\n    * [Mathematical operations](maths.md) that are defined for the tensor object types.\n    * [Storage](storage.md) defines a simple storage interface that controls the underlying storage for any tensor object.\n  * File I/O Interface Library\n    * [File](file.md) is an abstract interface for common file operations.\n    * [Disk File](diskfile.md) defines operations on files stored on disk.\n    * [Memory File](memoryfile.md) defines operations on stored in RAM.\n    * [Pipe File](pipefile.md) defines operations for using piped commands.\n    * [High-Level File operations](serialization.md) defines higher-level serialization functions.\n  * Useful Utilities\n    * [Timer](timer.md) provides functionality for _measuring time_.\n    * [Tester](tester.md) is a generic tester framework.\n    * [CmdLine](cmdline.md) is a command line argument parsing utility.\n    * [Random](random.md) defines a random number generator package with various distributions.\n    * Finally useful [utility](utility.md) functions are provided for easy handling of torch tensor types and class inheritance.\n\n"
  },
  {
    "path": "doc/maths.md",
    "content": "<a name=\"torch.maths.dok\"></a>\n# Math Functions #\n\nTorch provides MATLAB-like functions for manipulating [`Tensor`](tensor.md) objects.  Functions fall into several types of categories:\n\n  * [Constructors](#torch.construction.dok) like [`zeros`](#torch.zeros), [`ones`](#torch.ones);\n  * Extractors like [`diag`](#torch.diag)  and [`triu`](#torch.triu);\n  * [Element-wise](#torch.elementwise.dok) mathematical operations like [`abs`](#torch.abs) and [`pow`](#torch.pow);\n  * [BLAS](#torch.basicoperations.dok) operations;\n  * [Column or row-wise operations](#torch.columnwise.dok) like [`sum`](#torch.sum) and [`max`](#torch.max);\n  * [Matrix-wide operations](#torch.matrixwide.dok) like [`trace`](#torch.trace) and [`norm`](#torch.norm);\n  * [Convolution and cross-correlation](#torch.conv.dok) operations like [`conv2`](#torch.conv2);\n  * [Basic linear algebra operations](#torch.linalg.dok) like [`eig`](#torch.eig);\n  * [Logical operations](#torch.logical.dok) on `Tensor`s.\n\nBy default, all operations allocate a new `Tensor` to return the result.\nHowever, all functions also support passing the target `Tensor`(s) as the first argument(s), in which case the target `Tensor`(s) will be resized accordingly and filled with result.\nThis property is especially useful when one wants have tight control over when memory is allocated.\n\nThe *Torch* package adopts the same concept, so that calling a function directly on the `Tensor` itself using an object-oriented syntax is equivalent to passing the `Tensor` as the optional resulting `Tensor`.\nThe following two calls are equivalent.\n\n```lua\ntorch.log(x, x)\nx:log()\n```\n\nSimilarly, `torch.conv2` function can be used in the following manner.\n\n```lua\n> x = torch.rand(100, 100)\n> k = torch.rand(10, 10)\n> res1 = torch.conv2(x, k)   -- case 1\n\n> res2 = torch.Tensor()\n> torch.conv2(res2, x, k)     -- case 2\n\n> res2:dist(res1)\n0\n```\n\nThe advantage of second case is, same `res2` `Tensor` can be used successively in a loop without any new allocation.\n\n```lua\n-- no new memory allocations...\n> for i = 1, 100 do\n     torch.conv2(res2, x, k)\n  end\n\n> res2:dist(res1)\n0\n```\n\n<a name=\"torch.construction.dok\"></a>\n## Construction or extraction functions ##\n\n<a name=\"torch.cat\"></a>\n### [res] torch.cat( [res,] x_1, x_2, [dimension] ) ###\n### [res] torch.cat( [res,] {x_1, x_2, ...}, [dimension] ) ###\n<a name=\"torch.cat\"></a>\n`x = torch.cat(x_1, x_2, [dimension])` returns a `Tensor` `x` which is the concatenation of `Tensor`s `x_1` and `x_2` along dimension `dimension`.\n\nIf `dimension` is not specified or if it is `-1`, it is the maximum last dimension over all input tensors, except if all tensors are empty, then it is `1`.\n\nThe other dimensions of `x_1` and `x_2` have to be equal.\n\nAlso supports arrays with arbitrary numbers of `Tensor`s as inputs.\n\nEmpty tensors are ignored during catting, and thus do not throw an error. Performing cat on empty tensors only will always result in an empty tensor.\n\nExamples:\n```lua\n> torch.cat(torch.ones(3), torch.zeros(2))\n 1\n 1\n 1\n 0\n 0\n[torch.DoubleTensor of size 5]\n\n> torch.cat(torch.ones(3, 2), torch.zeros(2, 2), 1)\n 1  1\n 1  1\n 1  1\n 0  0\n 0  0\n[torch.DoubleTensor of size 5x2]\n\n> torch.cat(torch.ones(2, 2), torch.zeros(2, 2), 1)\n 1  1\n 1  1\n 0  0\n 0  0\n[torch.DoubleTensor of size 4x2]\n\n> torch.cat(torch.ones(2, 2), torch.zeros(2, 2), 2)\n 1  1  0  0\n 1  1  0  0\n[torch.DoubleTensor of size 2x4]\n\n> torch.cat(torch.cat(torch.ones(2, 2), torch.zeros(2, 2), 1), torch.rand(3, 2), 1)\n 1.0000  1.0000\n 1.0000  1.0000\n 0.0000  0.0000\n 0.0000  0.0000\n 0.3227  0.0493\n 0.9161  0.1086\n 0.2206  0.7449\n[torch.DoubleTensor of size 7x2]\n\n> torch.cat({torch.ones(2, 2), torch.zeros(2, 2), torch.rand(3, 2)}, 1)\n 1.0000  1.0000\n 1.0000  1.0000\n 0.0000  0.0000\n 0.0000  0.0000\n 0.3227  0.0493\n 0.9161  0.1086\n 0.2206  0.7449\n[torch.DoubleTensor of size 7x2]\n\n> torch.cat({torch.Tensor(), torch.rand(3, 2)}, 1)\n 0.3227  0.0493\n 0.9161  0.1086\n 0.2206  0.7449\n[torch.DoubleTensor of size 3x2]\n\n```\n\n\n<a name=\"torch.diag\"></a>\n### [res] torch.diag([res,] x [,k]) ###\n<a name=\"torch.diag\"></a>\n\n`y = torch.diag(x)` when `x` is of dimension 1 returns a diagonal matrix with diagonal elements constructed from `x`.\n\n`y = torch.diag(x)` when `x` is of dimension 2 returns a `Tensor` of dimension 1 with elements constructed from the diagonal of `x`.\n\n`y = torch.diag(x, k)` returns the k-th diagonal of `x`, where `k = 0` is the main diagonal, `k > 0` is above the main diagonal and `k < 0` is below the main diagonal.\n\n<a name=\"torch.eye\"></a>\n### [res] torch.eye([res,] n [,m]) ###\n<a name=\"torch.eye\"></a>\n\n`y = torch.eye(n)` returns the `n × n` identity matrix.\n\n`y = torch.eye(n, m)` returns an `n × m` identity matrix with ones on the diagonal and zeros elsewhere.\n\n\n<a name=\"torch.histc\"></a>\n### [res] torch.histc([res,] x [,nbins, min_value, max_value]) ###\n<a name=\"torch.histc\"></a>\n\n`y = torch.histc(x)` returns the histogram of the elements in `x`.\nBy default the elements are sorted into 100 equally spaced bins between the minimum and maximum values of `x`.\n\n`y = torch.histc(x, n)` same as above with `n` bins.\n\n`y = torch.histc(x, n, min, max)` same as above with `n` bins and `[min, max]` as elements range.\n\n\n<a name=\"torch.bhistc\"></a>\n### [res] torch.bhistc([res,] x [,nbins, min_value, max_value]) ###\n<a name=\"torch.bhistc\"></a>\n\n`y = torch.bhistc(x)` returns the histogram of the elements in 2d tensor `x` along the last dimension.\nBy default the elements are sorted into 100 equally spaced bins between the minimum and maximum values of `x`.\n\n`y = torch.bhistc(x, n)` same as above with `n` bins.\n\n`y = torch.bhistc(x, n, min, max)` same as above with `n` bins and `[min, max]` as elements range.\n\n```lua\nx = torch.Tensor(3, 6)\n\n> x[1] = torch.Tensor{ 2, 4, 2, 2, 5, 4 }\n> x[2] = torch.Tensor{ 3, 5, 1, 5, 3, 5 }\n> x[3] = torch.Tensor{ 3, 4, 2, 5, 5, 1 }\n\n> x\n 2  4  2  2  5  4\n 3  5  1  5  3  5\n 3  4  2  5  5  1\n[torch.DoubleTensor of size 3x6]\n\n> torch.bhistc(x, 5, 1, 5)\n 0  3  0  2  1\n 1  0  2  0  3\n 1  1  1  1  2\n[torch.DoubleTensor of size 3x5]\n\n> y = torch.Tensor(1, 6):copy(x[1])\n\n> torch.bhistc(y, 5)\n 3  0  2  0  1\n[torch.DoubleTensor of size 1x5]\n```\n\n<a name=\"torch.linspace\"></a>\n### [res] torch.linspace([res,] x1, x2, [,n]) ###\n<a name=\"torch.linspace\"></a>\n\n`y = torch.linspace(x1, x2)` returns a one-dimensional `Tensor` of size 100 equally spaced points between `x1` and `x2`.\n\n`y = torch.linspace(x1, x2, n)` returns a one-dimensional `Tensor` of `n` equally spaced points between `x1` and `x2`.\n\n\n<a name=\"torch.logspace\"></a>\n### [res] torch.logspace([res,] x1, x2, [,n]) ###\n<a name=\"torch.logspace\"></a>\n\n`y = torch.logspace(x1, x2)` returns a one-dimensional `Tensor` of `100` logarithmically eqally spaced points between `10^x1` and `10^x2`.\n\n`y = torch.logspace(x1, x2, n)` returns a one-dimensional `Tensor` of `n` logarithmically equally spaced points between `10^x1` and `10^x2`.\n\n<a name=\"torch.multinomial\"></a>\n### [res] torch.multinomial([res,], p, n, [,replacement]) ###\n<a name=\"torch.multinomial\"></a>\n\n`y = torch.multinomial(p, n)` returns a `Tensor` `y` where each row contains `n` indices sampled from the [multinomial probability distribution](http://en.wikipedia.org/wiki/Multinomial_distribution) located in the corresponding row of `Tensor` `p`.\n\nThe rows of `p` do not need to sum to one (in which case we use the values as weights), but must be non-negative and have a non-zero sum.\nIndices are ordered from left to right according to when each was sampled (first samples are placed in first column).\n\nIf `p` is a vector, `y` is a vector size `n`.\n\nIf `p` is a m-rows matrix, `y` is an `m × n` matrix.\n\nIf `replacement` is `true`, samples are drawn **with replacement**.\nIf not, they are drawn **without replacement**, which means that when a sample index is drawn for a row, it cannot be drawn again for that row.\nThis implies the constraint that `n` must be lower than `p` length (or number of columns of `p` if it is a matrix).\n\nThe default value for `replacement` is `false`.\n\n\n```lua\np = torch.Tensor{1, 1, 0.5, 0}\na = torch.multinomial(p, 10000, true)\n\n> a\n...\n[torch.LongTensor of dimension 10000]\n\n> for i = 1, 4 do print(a:eq(i):sum()) end\n3967\n4016\n2017\n0\n```\n\nNote: If you use the function with a given result `Tensor`, i.e. of the function prototype: `torch.multinomial(res, p, n [, replacement])` then you will have to call it slightly differently as:\n\n```lua\np.multinomial(res, p, n, replacement) -- p.multinomial instead of torch.multinomial\n```\n\nThis is due to the fact that the result here is of a `LongTensor` type, and we do not define a `torch.multinomial` over long `Tensor`s.\n\n<a name=\"torch.multinomialAlias()\"></a>\n### [state] torch.multinomialAliasSetup(probs) ###\n### [res] torch.multinomialAlias(output, state)\n`state = torch.multinomialAliasSetup(probs)` returns a table `state` consisting of two `tensors` : `probability table` and an `alias table`. This is required once for each `probs` vectors. We can then sample from the multinomial distribution multiple times by consulting these tensors `state` table.\n\n`torch.multinomialAlias(output, state)` returns `output` filled with indices drawn from the multinomial distribution `probs`. `output` itself is filled with the indices and it is not necessary to get the return value of the statement.\n\nThe sampling is done through a technique defined in a very simple way in this blog about [The Alias Method](https://hips.seas.harvard.edu/blog/2013/03/03/the-alias-method-efficient-sampling-with-many-discrete-outcomes/). The paper that describes this technique is present [here](http://www.tandfonline.com/doi/abs/10.1080/00031305.1979.10482697). This can only sample with replacement.\n\nThe `output` `Tensor` that is fed into the `multinomialAlias` method need not be contiguous. The `output` tensor can only be a 1d tensor. If you are required to fill a nd tensor enter a 1d view of the same tensor. This method is exceptionally faster than `torch.multinomial` when you want to sample a lot of samples from the same distrbution or sample from the same distribution a large number of times. `torch.multinomial` is faster for sampling few samples from a distribution once because the `multinomialAliasSetup` method takes some time in this case. To see and compare how these two methods differ in speed run `th test/test_aliasMultinomial.lua`.\n\n```lua\n> state = torch.multinomialAliasSetup(probs)\n> state\n{\n  1 : LongTensor - size: 4\n  2 : DoubleTensor - size: 4\n}\n> output = torch.LongTensor(2,3)\n> torch.multinomialAlias(output:view(-1), state)\n 4\n 1\n 2\n 3\n 2\n 2\n[torch.LongTensor of size 6]\n> output\n 4  1  2\n 3  2  2\n[torch.LongTensor of size 2x3]\n```\n\nYou can also allocate memory and reuse it for the state table.\n\n```lua\n> state = {torch.LongTensor(), torch.DoubleTensor()}\n> probs = torch.DoubleTensor({0.2, 0.3, 0.5})\n> state = torch.multinomialAliasSetup(probs, state)\n> state\n{\n  1 : LongTensor - size: 3\n  2 : DoubleTensor - size: 3\n}\n> output = torch.LongTensor(7)\n> torch.multinomialAlias(output, state)\n 2\n 2\n 3\n 1\n 2\n 2\n 2\n[torch.LongTensor of size 7]\n```\n\n<a name=\"torch.ones\"></a>\n### [res] torch.ones([res,] m [,n...]) ###\n<a name=\"torch.ones\"></a>\n\n`y = torch.ones(n)` returns a one-dimensional `Tensor` of size `n` filled with ones.\n\n`y = torch.ones(m, n)` returns a `m × n` `Tensor` filled with ones.\n\nFor more than `4` dimensions, you can use a storage as argument: `y = torch.ones(torch.LongStorage{m, n, k, l, o})`.\n\n\n<a name=\"torch.rand\"></a>\n### [res] torch.rand([res,] [gen,] m [,n...]) ###\n<a name=\"torch.rand\"></a>\n\n`y = torch.rand(n)` returns a one-dimensional `Tensor` of size `n` filled with random numbers from a uniform distribution on the interval `[0, 1)`.\n\n`y = torch.rand(m, n)` returns a `m × n` `Tensor` of random numbers from a uniform distribution on the interval `[0, 1)`.\n\nFor more than 4 dimensions, you can use a storage as argument: `y = torch.rand(torch.LongStorage{m, n, k, l, o})`.\n\n`y = torch.rand(gen, m, n)` returns a `m × n` `Tensor` of random numbers from a uniform distribution on the interval `[0, 1)`, using a non-global random number generator `gen` created by [torch.Generator()](random.md#torch.Generator).\n\n<a name=\"torch.randn\"></a>\n### [res] torch.randn([res,] [gen,] m [,n...]) ###\n<a name=\"torch.randn\"></a>\n\n`y = torch.randn(n)` returns a one-dimensional `Tensor` of size `n` filled with random numbers from a normal distribution with mean zero and variance one.\n\n`y = torch.randn(m, n)` returns a `m × n` `Tensor` of random numbers from a normal distribution with mean zero and variance one.\n\nFor more than 4 dimensions, you can use a storage as argument: `y = torch.randn(torch.LongStorage{m, n, k, l, o})`.\n\n`y = torch.randn(gen, m, n)` returns a `m × n` `Tensor` of random numbers from a normal distribution with mean zero and variance one, using a non-global random number generator `gen` created by [torch.Generator()](random.md#torch.Generator).\n\n<a name=\"torch.range\"></a>\n### [res] torch.range([res,] x, y [,step]) ###\n<a name=\"torch.range\"></a>\n\n`y = torch.range(x, y)` returns a `Tensor` of size `floor((y - x) / step) + 1` with values from `x` to `y` with step `step` (default to 1).\n\n```lua\n> torch.range(2, 5)\n 2\n 3\n 4\n 5\n[torch.DoubleTensor of size 4]\n\n> torch.range(2, 5, 1.2)\n 2.0000\n 3.2000\n 4.4000\n[torch.DoubleTensor of size 3]\n```\n\n\n<a name=\"torch.randperm\"></a>\n### [res] torch.randperm([res,] [gen,] n) ###\n<a name=\"torch.randperm\"></a>\n\n`y = torch.randperm(n)` returns a random permutation of integers from 1 to `n`.\n\n`y = torch.randperm(gen, n)` returns a random permutation of integers from 1 to `n`, using a non-global random number generator `gen` created by [torch.Generator()](random.md#torch.Generator).\n\n<a name=\"torch.reshape\"></a>\n### [res] torch.reshape([res,] x, m [,n...]) ###\n<a name=\"torch.reshape\"></a>\n\n`y = torch.reshape(x, m, n)` returns a new `m × n` `Tensor` y whose elements are taken rowwise from `x`, which must have `m * n` elements. The elements are copied into the new `Tensor`.\n\nFor more than 4 dimensions, you can use a storage: `y = torch.reshape(x, torch.LongStorage{m, n, k, l, o})`.\n\n\n<a name=\"torch.tril\"></a>\n### [res] torch.tril([res,] x [,k]) ###\n<a name=\"torch.tril\"></a>\n\n`y = torch.tril(x)` returns the lower triangular part of `x`, the other elements of `y` are set to 0.\n\n`torch.tril(x, k)` returns the elements on and below the k-th diagonal of `x` as non-zero.\n`k = 0` is the main diagonal, `k > 0` is above the main diagonal and `k < 0` is below the main diagonal.\n\n\n<a name=\"torch.triu\"></a>\n### [res] torch.triu([res,] x, [,k]) ###\n<a name=\"torch.triu\"></a>\n\n`y = torch.triu(x)` returns the upper triangular part of `x`, the other elements of `y` are set to 0.\n\n`torch.triu(x, k)` returns the elements on and above the k-th diagonal of `x` as non-zero.\n`k = 0` is the main diagonal, `k > 0` is above the main diagonal and `k < 0` is below the main diagonal.\n\n\n<a name=\"torch.zeros\"></a>\n### [res] torch.zeros([res,] x) ###\n<a name=\"torch.zeros\"></a>\n\n`y = torch.zeros(n)` returns a one-dimensional `Tensor` of size n filled with zeros.\n\n`y = torch.zeros(m, n)` returns a `m × n` `Tensor` filled with zeros.\n\nFor more than 4 dimensions, you can use a storage: `y = torch.zeros(torch.LongStorage{m, n, k, l, o})`.\n\n\n<a name=\"torch.elementwise.dok\"></a>\n## Element-wise Mathematical Operations ##\n\n<a name=\"torch.abs\"></a>\n### [res] torch.abs([res,] x) ###\n<a name=\"torch.abs\"></a>\n\n`y = torch.abs(x)` returns a new `Tensor` with the absolute values of the elements of `x`.\n\n`x:abs()` replaces all elements in-place with the absolute values of the elements of `x`.\n\n\n<a name=\"torch.sign\"></a>\n### [res] torch.sign([res,] x) ###\n<a name=\"torch.sign\"></a>\n\n`y = torch.sign(x)` returns a new `Tensor` with the sign (`+/- 1`) of the elements of `x`.\n\n`x:sign()` replaces all elements in-place with the sign of the elements of `x`.\n\n\n<a name=\"torch.acos\"></a>\n### [res] torch.acos([res,] x) ###\n<a name=\"torch.acos\"></a>\n\n`y = torch.acos(x)` returns a new `Tensor` with the arcosine of the elements of `x`.\n\n`x:acos()` replaces all elements in-place with the arcosine of the elements of `x`.\n\n\n<a name=\"torch.asin\"></a>\n### [res] torch.asin([res,] x) ###\n<a name=\"torch.asin\"></a>\n\n`y = torch.asin(x)` returns a new `Tensor` with the arcsine  of the elements of `x`.\n\n`x:asin()` replaces all elements in-place with the arcsine  of the elements of `x`.\n\n\n<a name=\"torch.atan\"></a>\n### [res] torch.atan([res,] x) ###\n<a name=\"torch.atan\"></a>\n\n`y = torch.atan(x)` returns a new `Tensor` with the arctangent of the elements of `x`.\n\n`x:atan()` replaces all elements in-place with the arctangent of the elements of `x`.\n\n<a name=\"torch.atan2\"></a>\n### [res] torch.atan2([res,] x, y) ###\n<a name=\"torch.atan2\"></a>\n\n`y = torch.atan2(x, y)` returns a new `Tensor` with the arctangent of the elements of `x` and `y`. \nNote that the arctangent of the elements `x` and `y` refers to the signed angle in radians between the rays ending at origin where the first one starts at (1, 0) and the second at (y, x).\n\n`x:atan2()` replaces all elements in-place with the arctangent of the elements of `x` and `y`.\n\n<a name=\"torch.ceil\"></a>\n### [res] torch.ceil([res,] x) ###\n<a name=\"torch.ceil\"></a>\n\n`y = torch.ceil(x)` returns a new `Tensor` with the values of the elements of `x` rounded up to the nearest integers.\n\n`x:ceil()` replaces all elements in-place with the values of the elements of `x` rounded up to the nearest integers.\n\n\n<a name=\"torch.cos\"></a>\n### [res] torch.cos([res,] x) ###\n<a name=\"torch.cos\"></a>\n\n`y = torch.cos(x)` returns a new `Tensor` with the cosine of the elements of `x`.\n\n`x:cos()` replaces all elements in-place with the cosine of the elements of `x`.\n\n\n<a name=\"torch.cosh\"></a>\n### [res] torch.cosh([res,] x) ###\n<a name=\"torch.cosh\"></a>\n\n`y = torch.cosh(x)` returns a new `Tensor` with the hyberbolic cosine of the elements of `x`.\n\n`x:cosh()` replaces all elements in-place with the hyberbolic cosine of the elements of `x`.\n\n\n<a name=\"torch.exp\"></a>\n### [res] torch.exp([res,] x) ###\n<a name=\"torch.exp\"></a>\n\n`y = torch.exp(x)` returns, for each element in `x`,  *e* (*Neper number*, the base of natural logarithms) raised to the power of the element in `x`.\n\n`x:exp()` returns, for each element in `x`,  *e* raised to the power of the element in `x`.\n\n\n<a name=\"torch.floor\"></a>\n### [res] torch.floor([res,] x) ###\n<a name=\"torch.floor\"></a>\n\n`y = torch.floor(x)` returns a new `Tensor` with the values of the elements of `x` rounded down to the nearest integers.\n\n`x:floor()` replaces all elements in-place with the values of the elements of `x` rounded down to the nearest integers.\n\n\n<a name=\"torch.log\"></a>\n### [res] torch.log([res,] x) ###\n<a name=\"torch.log\"></a>\n\n`y = torch.log(x)` returns a new `Tensor` with the natural logarithm of the elements of `x`.\n\n`x:log()` replaces all elements in-place with the natural logarithm of the elements of `x`.\n\n\n<a name=\"torch.log1p\"></a>\n### [res] torch.log1p([res,] x) ###\n<a name=\"torch.log1p\"></a>\n\n`y = torch.log1p(x)` returns a new `Tensor` with the natural logarithm of the elements of `x + 1`.\n\n`x:log1p()` replaces all elements in-place with the natural logarithm of the elements of `x + 1`.\nThis function is more accurate than [`log`](#torch.log) for small values of `x`.\n\n\n<a name=\"torch.neg\"></a>\n### x:neg() ###\n\n`x:neg()` replaces all elements in-place with the sign-reversed values of the elements of `x`.\n\n<a name=\"torch.cinv\"></a>\n### x:cinv() ###\n<a name=\"torch.cinv\"></a>\n\n`x:cinv()` replaces all elements in-place with `1.0 / x`.\n\n<a name=\"torch.pow\"></a>\n### [res] torch.pow([res,] x, n) ###\n<a name=\"torch.pow\"></a>\n\nLet `x` be a `Tensor` and `n` a number.\n\n`y = torch.pow(x, n)` returns a new `Tensor` with the elements of `x` to the power of `n`.\n\n`y = torch.pow(n, x)` returns, a new `Tensor` with `n` to the power of the elements of `x`.\n\n`x:pow(n)` replaces all elements in-place with the elements of `x` to the power of `n`.\n\n`torch.pow(x, n, x)` replaces all elements in-place with `n` to the power of the elements of `x`.\n\n<a name=\"torch.round\"></a>\n### [res] torch.round([res,] x) ###\n<a name=\"torch.round\"></a>\n\n`y = torch.round(x)` returns a new `Tensor` with the values of the elements of `x` rounded to the nearest integers.\n\n`x:round()` replaces all elements in-place with the values of the elements of `x` rounded to the nearest integers.\n\n\n<a name=\"torch.sin\"></a>\n### [res] torch.sin([res,] x) ###\n<a name=\"torch.sin\"></a>\n\n`y = torch.sin(x)` returns a new `Tensor` with the sine of the elements of `x`.\n\n`x:sin()` replaces all elements in-place with the sine of the elements of `x`.\n\n\n<a name=\"torch.sinh\"></a>\n### [res] torch.sinh([res,] x) ###\n<a name=\"torch.sinh\"></a>\n\n`y = torch.sinh(x)` returns a new `Tensor` with the hyperbolic sine of the elements of `x`.\n\n`x:sinh()` replaces all elements in-place with the hyperbolic sine of the elements of `x`.\n\n\n<a name=\"torch.sqrt\"></a>\n### [res] torch.sqrt([res,] x) ###\n<a name=\"torch.sqrt\"></a>\n\n`y = torch.sqrt(x)` returns a new `Tensor` with the square root of the elements of `x`.\n\n`x:sqrt()` replaces all elements in-place with the square root of the elements of `x`.\n\n\n<a name=\"torch.rsqrt\"></a>\n### [res] torch.rsqrt([res,] x) ###\n<a name=\"torch.rsqrt\"></a>\n\n`y = torch.rsqrt(x)` returns a new `Tensor` with the reciprocal of the square root of the elements of `x`.\n\n`x:rsqrt()` replaces all elements in-place with the reciprocal of the square root of the elements of `x`.\n\n\n<a name=\"torch.tan\"></a>\n### [res] torch.tan([res,] x) ###\n<a name=\"torch.tan\"></a>\n\n`y = torch.tan(x)` returns a new `Tensor` with the tangent of the elements of `x`.\n\n`x:tan()` replaces all elements in-place with the tangent of the elements of `x`.\n\n\n<a name=\"torch.tanh\"></a>\n### [res] torch.tanh([res,] x) ###\n<a name=\"torch.tanh\"></a>\n\n`y = torch.tanh(x)` returns a new `Tensor` with the hyperbolic tangent of the elements of `x`.\n\n`x:tanh()` replaces all elements in-place with the hyperbolic tangent of the elements of `x`.\n\n\n<a name=\"torch.sigmoid\"></a>\n### [res] torch.sigmoid([res,] x) ###\n<a name=\"torch.sigmoid\"></a>\n\n`y = torch.sigmoid(x)` returns a new `Tensor` with the sigmoid of the elements of `x`.\n\n`x:sigmoid()` replaces all elements in-place with the sigmoid of the elements of `x`.\n\n\n<a name=\"torch.trunc\"></a>\n### [res] torch.trunc([res,] x) ###\n<a name=\"torch.trunc\"></a>\n\n`y = torch.trunc(x)` returns a new `Tensor` with the truncated integer values of the elements of `x`.\n\n`x:trunc()` replaces all elements in-place with the truncated integer values of the elements of `x`.\n\n\n<a name=\"torch.frac\"></a>\n### [res] torch.frac([res,] x) ###\n<a name=\"torch.frac\"></a>\n\n`y = torch.frac(x)` returns a new `Tensor` with the fractional portion of the elements of `x`.\n\n`x:frac()` replaces all elements in-place with the fractional portion of the elements of `x`.\n\n\n<a name=\"torch.basicoperations.dok\"></a>\n## Basic operations ##\n\nIn this section, we explain basic mathematical operations for `Tensor`s.\n\n<a name=\"torch.equal\"></a>\n### [boolean] equal([tensor1,] tensor2) ###\n<a name=\"torch.equal\"></a>\n\nReturns `true` iff the dimensions and values of `tensor1` and `tensor2` are exactly the same.\n\n```lua\nx = torch.Tensor{1,2,3}\ny = torch.Tensor{1,2,3}\n> x:equal(y)\ntrue\n\ny = torch.Tensor{1,2,4}\n> x:equal(y)\nfalse\n```\n\nNote that `a:equal(b)` is more efficient that `a:eq(b):all()` as it avoids allocation of a temporary tensor and can short-circuit.\n\n<a name=\"torch.add\"></a>\n### [res] torch.add([res,] tensor, value) ###\n<a name=\"torch.add\"></a>\n\nAdd the given value to all elements in the `Tensor`.\n\n`y = torch.add(x, value)` returns a new `Tensor`.\n\n`x:add(value)` add `value` to all elements in place.\n\n\n<a name=\"torch.add\"></a>\n### [res] torch.add([res,] tensor1, tensor2) ###\n<a name=\"torch.add\"></a>\n\nAdd `tensor1` to `tensor2` and put result into `res`.\nThe number of elements must match, but sizes do not matter.\n\n```lua\n> x = torch.Tensor(2, 2):fill(2)\n> y = torch.Tensor(4):fill(3)\n> x:add(y)\n> x\n 5  5\n 5  5\n[torch.DoubleTensor of size 2x2]\n```\n\n`y = torch.add(a, b)` returns a new `Tensor`.\n\n`torch.add(y, a, b)` puts `a + b` in `y`.\n\n`a:add(b)` accumulates all elements of `b` into `a`.\n\n`y:add(a, b)` puts `a + b` in `y`.\n\n\n<a name=\"torch.add\"></a>\n### [res] torch.add([res,] tensor1, value, tensor2) ###\n<a name=\"torch.add\"></a>\n\nMultiply elements of `tensor2` by the scalar `value` and add it to `tensor1`.\nThe number of elements must match, but sizes do not matter.\n\n```lua\n> x = torch.Tensor(2, 2):fill(2)\n> y = torch.Tensor(4):fill(3)\n> x:add(2, y)\n> x\n 8  8\n 8  8\n[torch.DoubleTensor of size 2x2]\n```\n\n`x:add(value, y)` multiply-accumulates values of `y` into `x`.\n\n`z:add(x, value, y)` puts the result of `x + value * y` in `z`.\n\n`torch.add(x, value, y)` returns a new `Tensor` `x + value * y`.\n\n`torch.add(z, x, value, y)` puts the result of `x + value * y` in `z`.\n\n\n<a name=\"torch.csub\"></a>\n### tensor:csub(value) ###\n<a name=\"torch.csub\"></a>\n\nSubtracts the given value from all elements in the `Tensor`, in place.\n\n<a name=\"torch.csub\"></a>\n### tensor:csub(tensor2) ###\n<a name=\"torch.csub\"></a>\n\nSubtracts `tensor2` from `tensor`, in place.\nThe number of elements must match, but sizes do not matter.\n\n```lua\n> x = torch.Tensor(2, 2):fill(8)\n> y = torch.Tensor(4):fill(3)\n> x:csub(y)\n> x\n 5  5\n 5  5\n[torch.DoubleTensor of size 2x2]\n```\n\n`a:csub(b)` put `a - b` into `a`.\n\n\n<a name=\"torch.mul\"></a>\n### [res] torch.mul([res,] tensor1, value) ###\n<a name=\"torch.mul\"></a>\n\nMultiply all elements in the `Tensor` by the given `value`.\n\n`z = torch.mul(x, 2)` will return a new `Tensor` with the result of `x * 2`.\n\n`torch.mul(z, x, 2)` will put the result of `x * 2` in `z`.\n\n`x:mul(2)` will multiply all elements of `x` with `2` in-place.\n\n`z:mul(x, 2)` will put the result of `x * 2` in `z`.\n\n\n<a name=\"torch.clamp\"></a>\n### [res] torch.clamp([res,] tensor, min_value, max_value) ###\n<a name=\"torch.mul\"></a>\n\nClamp all elements in the `Tensor` into the range `[min_value, max_value]`.  ie:\n\n```\n      ⎧ min_value, if x_i < min_value\ny_i = ⎨ x_i,       if min_value ≤ x_i ≤ max_value\n      ⎩ max_value, if x_i > max_value\n```\n\n`z = torch.clamp(x, 0, 1)` will return a new `Tensor` with the result of `x` bounded between `0` and `1`.\n\n`torch.clamp(z, x, 0, 1)` will put the result in `z`.\n\n`x:clamp(0, 1)` will perform the clamp operation in place (putting the result in `x`).\n\n`z:clamp(x, 0, 1)` will put the result in `z`.\n\n\n<a name=\"torch.cmul\"></a>\n### [res] torch.cmul([res,] tensor1, tensor2) ###\n<a name=\"torch.cmul\"></a>\n\nElement-wise multiplication of `tensor1` by `tensor2`.\nThe number of elements must match, but sizes do not matter.\n\n```lua\n> x = torch.Tensor(2, 2):fill(2)\n> y = torch.Tensor(4):fill(3)\n> x:cmul(y)\n> = x\n 6  6\n 6  6\n[torch.DoubleTensor of size 2x2]\n```\n\n`z = torch.cmul(x, y)` returns a new `Tensor`.\n\n`torch.cmul(z, x, y)` puts the result in `z`.\n\n`y:cmul(x)` multiplies all elements of `y` with corresponding elements of `x`.\n\n`z:cmul(x, y)` puts the result in `z`.\n\n\n<a name=\"torch.cpow\"></a>\n### [res] torch.cpow([res,] tensor1, tensor2) ###\n<a name=\"torch.cpow\"></a>\n\nElement-wise power operation, taking the elements of `tensor1` to the powers given by elements of `tensor2`.\nThe number of elements must match, but sizes do not matter.\n\n```lua\n> x = torch.Tensor(2, 2):fill(2)\n> y = torch.Tensor(4):fill(3)\n> x:cpow(y)\n> x\n 8  8\n 8  8\n[torch.DoubleTensor of size 2x2]\n```\n\n`z = torch.cpow(x, y)` returns a new `Tensor`.\n\n`torch.cpow(z, x, y)` puts the result in `z`.\n\n`y:cpow(x)` takes all elements of `y` to the powers given by the corresponding elements of `x`.\n\n`z:cpow(x, y)` puts the result in `z`.\n\n\n<a name=\"torch.addcmul\"></a>\n### [res] torch.addcmul([res,] x [,value], tensor1, tensor2) ###\n<a name=\"torch.addcmul\"></a>\n\nPerforms the element-wise multiplication of `tensor1` by `tensor2`, multiply the result by the scalar `value` (1 if not present) and add it to `x`.\nThe number of elements must match, but sizes do not matter.\n\n```lua\n> x = torch.Tensor(2, 2):fill(2)\n> y = torch.Tensor(4):fill(3)\n> z = torch.Tensor(2, 2):fill(5)\n> x:addcmul(2, y, z)\n> x\n 32  32\n 32  32\n[torch.DoubleTensor of size 2x2]\n```\n\n`z:addcmul(value, x, y)` accumulates the result in `z`.\n\n`torch.addcmul(z, value, x, y)` returns a new `Tensor` with the result.\n\n`torch.addcmul(z, z, value, x, y)` puts the result in `z`.\n\n\n<a name=\"torch.div\"></a>\n### [res] torch.div([res,] tensor, value) ###\n<a name=\"torch.div\"></a>\n\nDivide all elements in the `Tensor` by the given `value`.\n\n`z = torch.div(x, 2)` will return a new `Tensor` with the result of `x / 2`.\n\n`torch.div(z, x, 2)` will put the result of `x / 2` in `z`.\n\n`x:div(2)` will divide all elements of `x` with `2` in-place.\n\n`z:div(x, 2)` puts the result of `x / 2` in `z`.\n\n\n<a name=\"torch.cdiv\"></a>\n### [res] torch.cdiv([res,] tensor1, tensor2) ###\n<a name=\"torch.cdiv\"></a>\n\nPerforms the element-wise division of `tensor1` by `tensor2`.\nThe number of elements must match, but sizes do not matter.\n\n```lua\n> x = torch.Tensor(2, 2):fill(1)\n> y = torch.range(1, 4)\n> x:cdiv(y)\n> x\n 1.0000  0.5000\n 0.3333  0.2500\n[torch.DoubleTensor of size 2x2]\n```\n\n`z = torch.cdiv(x, y)` returns a new `Tensor`.\n\n`torch.cdiv(z, x, y)` puts the result in `z`.\n\n`y:cdiv(x)` divides all elements of `y` with corresponding elements of `x`.\n\n`z:cdiv(x, y)` puts the result in `z`.\n\n<a name=\"torch.lshift\"></a>\n### [res] torch.lshift([res,] tensor, value) ###\n<a name=\"torch.lshift\"></a>\n\nLeft shift all elements in the `Tensor` by the given `value`.\n\n`z = torch.lshift(x, 2)` will return a new `Tensor` with the result of `x << 2`.\n\n`torch.lshift(z, x, 2)` will put the result of `x << 2` in `z`.\n\n`x:lshift(2)` will perform left shift operation all elements of `x` by `2` bits.\n\n`z:lshift(x, 2)` puts the result of `x << 2` in `z`.\n\nNote: For float type tensors, `x:lshift(value)` evaluates `x:mul(math.pow(2, value))` internally.\n\n<a name=\"torch.clshift\"></a>\n### [res] torch.clshift([res,] tensor1, tensor2) ###\n<a name=\"torch.clshift\"></a>\n\nPerforms the left shift operation of each element in `tensor1` by each element in `tensor2`.\nThe number of elements must match, but sizes do not matter.\n\n```lua\n> x = torch.LongTensor(2, 2):fill(1)\n> y = torch.LongTensor(2, 2):range(1, 4)\n> x:clshift(y)\n> x\n 2  4\n 8 16\n[torch.LongTensor of size 2x2]\n```\n\n`z = torch.clshift(x, y)` returns a new `Tensor`.\n\n`torch.clshift(z, x, y)` puts the result in `z`.\n\n`y:clshift(x)` left shifts all elements of `y` with corresponding elements of `x`.\n\n`z:clshift(x, y)` puts the result in `z`.\n\n<a name=\"torch.rshift\"></a>\n### [res] torch.rshift([res,] tensor, value) ###\n<a name=\"torch.rshift\"></a>\n\nRight shift all elements in the `Tensor` by the given `value`.\n\n`z = torch.rshift(x, 2)` will return a new `Tensor` with the result of `x >> 2`.\n\n`torch.rshift(z, x, 2)` will put the result of `x >> 2` in `z`.\n\n`x:rshift(2)` will perform right shift operation all elements of `x` by `2` bits.\n\n`z:rshift(x, 2)` puts the result of `x >> 2` in `z`.\n\nNote: For float type tensors, `x:lshift(value)` evaluates `x:div(math.pow(2, value))` internally.\n\n<a name=\"torch.crshift\"></a>\n### [res] torch.crshift([res,] tensor1, tensor2) ###\n<a name=\"torch.crshift\"></a>\n\nPerforms the right shift operation of each element in `tensor1` by each element in `tensor2`.\nThe number of elements must match, but sizes do not matter.\n\n```lua\n> x = torch.LongTensor(2, 2):fill(32)\n> y = torch.LongTensor(2, 2):range(1, 4)\n> x:crshift(y)\n> x\n 16 8\n  4 2\n[torch.LongTensor of size 2x2]\n```\n\n`z = torch.crshift(x, y)` returns a new `Tensor`.\n\n`torch.crshift(z, x, y)` puts the result in `z`.\n\n`y:crshift(x)` right shifts all elements of `y` with corresponding elements of `x`.\n\n`z:crshift(x, y)` puts the result in `z`.\n\n<a name=\"torch.addcdiv\"></a>\n### [res] torch.addcdiv([res,] x [,value], tensor1, tensor2) ###\n<a name=\"torch.addcdiv\"></a>\n\nPerforms the element-wise division of `tensor1` by `tensor2`, multiply the result by the scalar `value` and add it to `x`.\nThe number of elements must match, but sizes do not matter.\n\n```lua\n> x = torch.Tensor(2, 2):fill(1)\n> y = torch.range(1, 4)\n> z = torch.Tensor(2, 2):fill(5)\n> x:addcdiv(2, y, z)\n> x\n 1.4000  1.8000\n 2.2000  2.6000\n[torch.DoubleTensor of size 2x2]\n```\n\n`z:addcdiv(value, x, y)` accumulates the result in `z`.\n\n`torch.addcdiv(z, value, x, y)` returns a new `Tensor` with the result.\n\n`torch.addcdiv(z, z, value, x, y)` puts the result in `z`.\n\n\n<a name=\"torch.fmod\"></a>\n### [res] torch.fmod([res,] tensor, value) ###\n<a name=\"torch.fmod\"></a>\n\nComputes remainder of division (rounded towards zero) of all elements in the `Tensor` by `value`.\nThis works both for integer and floating point numbers. It behaves the same as Lua bulit-in function `math.fmod()` and a little bit different from `torch.remainder()` and `%` operator. For example:\n\n```lua\n> x = torch.Tensor({-3, 3})\n> torch.fmod(x, 2)\n-1\n 1\n[torch.DoubleTensor of size 2]\n\n> torch.fmod(x, -2)\n-1\n 1\n[torch.DoubleTensor of size 2]\n\n> torch.remainder(x, 2)\n 1\n 1\n[torch.DoubleTensor of size 2]\n\n> torch.remainder(x, -2)\n-1\n-1\n[torch.DoubleTensor of size 2]\n```\n\n`z = torch.fmod(x, 2)` will return a new `Tensor` with the result of `math.fmod(x, 2)`.\n\n`torch.fmod(z, x, 2)` will put the result of `math.fmod(x, 2)` in `z`.\n\n`x:fmod(2)` will replace all elements of `x` the result of `math.fmod(x, 2)` in-place.\n\n`z:fmod(x, 2)` puts the result of `math.fmod(x, 2)` in `z`.\n\n\n<a name=\"torch.remainder\"></a>\n### [res] torch.remainder([res,] tensor, value) ###\n<a name=\"torch.remainder\"></a>\n\nComputes remainder of division (rounded to nearest) of all elements in the `Tensor` by `value`.\nThis works both for integer and floating point numbers. It behaves the same as `%` operator and can be expressed as `a % b = a - b * floor(a/b)`. See `torch.fmod()` for comparison.\n\n`z = torch.remainder(x, 2)` will return a new `Tensor` with the result of `x % 2`.\n\n`torch.remainder(z, x, 2)` will put the result of `x % 2` in `z`.\n\n`x:remainder(2)` will replace all elements of `x` the result of `x % 2` in-place.\n\n`z:remainder(x, 2)` puts the result of `x % 2` in `z`.\n\n\n<a name=\"torch.mod\"></a>\n### [res] torch.mod([res,] tensor, value) ###\n<a name=\"torch.mod\"></a>\n\nThis function is deprecated and exists only for compatibility with previous versions. Please use `torch.fmod()` or `torch.remainder()` instead.\n\n\n<a name=\"torch.cfmod\"></a>\n### [res] torch.cfmod([res,] tensor1, tensor2) ###\n<a name=\"torch.cfmod\"></a>\n\nComputes the element-wise remainder of the division (rounded towards zero) of `tensor1` by `tensor2`.\nThe number of elements must match, but sizes do not matter.\n\n```lua\n> x = torch.Tensor({{3, 3}, {-3, -3}})\n> y = torch.Tensor({{2, -2}, {2, -2}})\n> x:cfmod(y)\n 1  1\n-1 -1\n[torch.DoubleTensor of size 2x2]\n```\n\n`z = torch.cfmod(x, y)` returns a new `Tensor`.\n\n`torch.cfmod(z, x, y)` puts the result in `z`.\n\n`y:cfmod(x)` replaces all elements of `y` by their remainders of division (rounded towards zero) by\ncorresponding elements of `x`.\n\n`z:cfmod(x, y)` puts the result in `z`.\n\n\n<a name=\"torch.cremainder\"></a>\n### [res] torch.cremainder([res,] tensor1, tensor2) ###\n<a name=\"torch.cremainder\"></a>\n\nComputes element-wise remainder of the division (rounded to nearest) of `tensor1` by `tensor2`.\nThe number of elements must match, but sizes do not matter.\n\n```lua\n> x = torch.Tensor({{3, 3}, {-3, -3}})\n> y = torch.Tensor({{2, -2}, {2, -2}})\n> x:cfmod(y)\n 1  1\n-1 -1\n[torch.DoubleTensor of size 2x2]\n```\n\n`z = torch.cremainder(x, y)` returns a new `Tensor`.\n\n`torch.cremainder(z, x, y)` puts the result in `z`.\n\n`y:cremainder(x)` replaces all elements of `y` by their remainders of division (rounded to nearest) by\ncorresponding elements of `x`.\n\n`z:cremainder(x, y)` puts the result in `z`.\n\n\n<a name=\"torch.cmod\"></a>\n### [res] torch.cmod([res,] tensor1, tensor2) ###\n<a name=\"torch.cmod\"></a>\n\nThis function is deprecated and exists only for compatibility with previous versions. Please use `torch.cfmod()` or `torch.cremainder()` instead.\n\n<a name=\"torch.bitand\"></a>\n### [res] torch.bitand([res,] tensor, value) ###\n<a name=\"torch.bitand\"></a>\n\nPerforms bitwise `and` operation on all elements in the `Tensor` by the given `value`.\n\n`z = torch.bitand(x, value)` will return a new `Tensor` with the result of `x & value`.\n\n`torch.bitand(z, x, value)` will put the result of `x & value` in `z`.\n\n`x:bitand(value)` will perform right shift operation all elements of `x` by `value` bits.\n\n`z:bitand(x, value)` puts the result of `x & value` in `z`.\n\nNote: This function is only supported for [Int|Long|Byte]Tensors\n\n<a name=\"torch.cbitand\"></a>\n### [res] torch.cbitand([res,] tensor1, tensor2) ###\n<a name=\"torch.cbitand\"></a>\n\nPerforms bitwise `and` operation of each element in `tensor1` by each element in `tensor2`.\nThe number of elements must match, but sizes do not matter.\n\n```lua\n> x = torch.LongTensor(4):fill(6)\n> y = torch.LongTensor{1, 2, 4, 8}\n> x:cbitand(y)\n> x\n  0\n  2\n  4\n  0\n[torch.LongTensor of size 4]\n```\n`z = torch.cbitand(x, y)` returns a new `Tensor`.\n\n`torch.cbitand(z, x, y)` puts the result in `z`.\n\n`y:cbitand(x)` performs bitwise `and` all elements of `y` with corresponding elements of `x`.\n\n`z:cbitand(x, y)` puts the result in `z`.\n\n\nNote: This function is only supported for [Int|Long|Byte]Tensors\n\n<a name=\"torch.bitor\"></a>\n### [res] torch.bitor([res,] tensor, value) ###\n<a name=\"torch.bitor\"></a>\n\nPerforms bitwise `or` operation on all elements in the `Tensor` by the given `value`.\n\n`z = torch.bitor(x, value)` will return a new `Tensor` with the result of `x & value`.\n\n`torch.bitor(z, x, value)` will put the result of `x | value` in `z`.\n\n`x:bitor(value)` will perform right shift operation all elements of `x` by `value` bits.\n\n`z:bitor(x, value)` puts the result of `x | value` in `z`.\n\nNote: This function is only supported for [Int|Long|Byte]Tensors\n\n<a name=\"torch.cbitor\"></a>\n### [res] torch.cbitor([res,] tensor1, tensor2) ###\n<a name=\"torch.cbitor\"></a>\n\nPerforms bitwise `or` operation of each element in `tensor1` by each element in `tensor2`.\nThe number of elements must match, but sizes do not matter.\n\n```lua\n> x = torch.LongTensor(4):fill(3)\n> y = torch.LongTensor{1, 2, 4, 8}\n> x:cbitor(y)\n> x\n  3\n  3\n  7\n 11\n[torch.LongTensor of size 4]\n```\n`z = torch.cbitor(x, y)` returns a new `Tensor`.\n\n`torch.cbitor(z, x, y)` puts the result in `z`.\n\n`y:cbitor(x)` performs bitwise `or` all elements of `y` with corresponding elements of `x`.\n\n`z:cbitor(x, y)` puts the result in `z`.\n\nNote: This function is only supported for [Int|Long|Byte]Tensors\n\n<a name=\"torch.bitxor\"></a>\n### [res] torch.bitxor([res,] tensor, value) ###\n<a name=\"torch.bitxor\"></a>\n\nPerforms bitwise `xor` operation on all elements in the `Tensor` by the given `value`.\n\n`z = torch.bitxor(x, value)` will return a new `Tensor` with the result of `x & value`.\n\n`torch.bitxor(z, x, value)` will put the result of `x ^ value` in `z`.\n\n`x:bitxor(value)` will perform right shift operation all elements of `x` by `value` bits.\n\n`z:bitxor(x, value)` puts the result of `x ^ value` in `z`.\n\nNote: This function is only supported for [Int|Long|Byte]Tensors\n\n<a name=\"torch.cbitxor\"></a>\n### [res] torch.cbitxor([res,] tensor1, tensor2) ###\n<a name=\"torch.cbitxor\"></a>\n\nPerforms bitwise `xor` operation of each element in `tensor1` by each element in `tensor2`.\nThe number of elements must match, but sizes do not matter.\n\n```lua\n> x = torch.LongTensor(4):fill(15)\n> y = torch.LongTensor{1, 2, 4, 8}\n> x:cbitxor(y)\n> x\n  14\n  13\n  11\n   7\n[torch.LongTensor of size 4]\n```\n`z = torch.cbitxor(x, y)` returns a new `Tensor`.\n\n`torch.cbitxor(z, x, y)` puts the result in `z`.\n\n`y:cbitxor(x)` performs bitwise `xor` all elements of `y` with corresponding elements of `x`.\n\n`z:cbitxor(x, y)` puts the result in `z`.\n\nNote: This function is only supported for [Int|Long|Byte]Tensors\n\n<a name=\"torch.dot\"></a>\n### [number] torch.dot(tensor1, tensor2) ###\n<a name=\"torch.dot\"></a>\n\nPerforms the dot product between `tensor1` and `tensor2`.\nThe number of elements must match: both `Tensor`s are seen as a 1D vector.\n\n```lua\n> x = torch.Tensor(2, 2):fill(2)\n> y = torch.Tensor(4):fill(3)\n> x:dot(y)\n24\n```\n\n`torch.dot(x, y)` returns dot product of `x` and `y`.\n`x:dot(y)` returns dot product of `x` and `y`.\n\n\n<a name=\"torch.addmv\"></a>\n### [res] torch.addmv([res,] [v1,] vec1, [v2,] mat, vec2) ###\n<a name=\"torch.addmv\"></a>\n\nPerforms a matrix-vector multiplication between `mat` (2D `Tensor`) and `vec2` (1D `Tensor`) and add it to `vec1`.\n\nOptional values `v1` and `v2` are scalars that multiply `vec1` and `vec2` respectively.\n\nIn other words,\n\n```\nres = (v1 * vec1) + (v2 * (mat * vec2))\n```\n\nSizes must respect the matrix-multiplication operation: if `mat` is a `n × m` matrix, `vec2` must be vector of size `m` and `vec1` must be a vector of size `n`.\n\n```lua\n> x = torch.Tensor(3):fill(0)\n> M = torch.Tensor(3, 2):fill(3)\n> y = torch.Tensor(2):fill(2)\n> x:addmv(M, y)\n> x\n 12\n 12\n 12\n[torch.DoubleTensor of size 3]\n```\n\n`torch.addmv(x, y, z)` returns a new `Tensor` with the result.\n\n`torch.addmv(r, x, y, z)` puts the result in `r`.\n\n**Differences when used as a method**\n\n`x:addmv(y, z)` does `x = x + y * z`\n\n`r:addmv(x, y, z)`  does `r = x + y * z` if x is a vector\n\n`r:addmv(s, y, z)`   does `r = r + s * y * z` if `s` is a scalar.\n\n`r:addmv(x, s, y, z)`   does `r = x + s * y * z` if `s` is a scalar and `x` is a vector.\n\n`r:addmv(s1, s2, y, z)`   does `r = s1 * r + s2 * y * z` if `s1` and `s2` are scalars.\n\nThe last example does not accurately fit into the function signature, and needs a special mention. It changes the function signature to:\n\n`[vec1] = vec1:addmv([v1,] [v2,] mat, vec2)`\n\n<a name=\"torch.addr\"></a>\n### [res] torch.addr([res,] [v1,] mat, [v2,] vec1, vec2) ###\n<a name=\"torch.addr\"></a>\n\nPerforms the outer-product between `vec1` (1D `Tensor`) and `vec2` (1D `Tensor`).\n\nOptional values `v1` and `v2` are scalars that multiply `mat` and `vec1 [out] vec2` respectively.\n\nIn other words,\n\n```\nres_ij = (v1 * mat_ij) + (v2 * vec1_i * vec2_j)\n```\n\nIf `vec1` is a vector of size `n` and `vec2` is a vector of size `m`, then `mat` must be a matrix of size `n × m`.\n\n```lua\n> x = torch.range(1, 3)\n> y = torch.range(1, 2)\n> M = torch.Tensor(3, 2):zero()\n> M:addr(x, y)\n 1  2         --     |0 0|     |1 2|\n 2  4         -- = 1*|0 0| + 1*|2 4|\n 3  6         --     |0 0|     |3 6|\n[torch.DoubleTensor of size 3x2]\n-- default values of v1 and v2 are 1.\n\n> M:addr(2, 1, x, y)\n  3   6        --     |1 2|     |1 2|\n  6  12        -- = 2*|2 4| + 1*|2 4|\n  9  18        --     |3 6|     |3 6|\n[torch.DoubleTensor of size 3x2]\n\n> A = torch.range(1, 6):resize(3, 2)\n> A\n 1  2\n 3  4\n 5  6\n[torch.DoubleTensor of size 3x2]\n> M:addr(2, A, 1, x, y)\n  3   6        --   |1 2|     |1 2|\n  8  12        -- 2*|3 4| + 1*|2 4|\n 13  18        --   |5 6|     |3 6|\n[torch.DoubleTensor of size 3x2]\n```\n\n`torch.addr(M, x, y)` returns the result in a new `Tensor`.\n\n`torch.addr(r, M, x, y)` puts the result in `r`.\n\n`M:addr(x, y)` puts the result in `M`.\n\n`r:addr(M, x, y)` puts the result in `r`.\n\n\n<a name=\"torch.addmm\"></a>\n### [res] torch.addmm([res,] [v1,] M, [v2,] mat1, mat2) ###\n<a name=\"torch.addmm\"></a>\n\nPerforms a matrix-matrix multiplication between `mat1` (2D `Tensor`) and `mat2` (2D `Tensor`).\n\nOptional values `v1` and `v2` are scalars that multiply `M` and `mat1 * mat2` respectively.\n\nIn other words,\n\n```\nres = (v1 * M) + (v2 * mat1 * mat2)\n```\n\nIf `mat1` is a `n × m` matrix, `mat2` a `m × p` matrix, `M` must be a `n × p` matrix.\n\n`torch.addmm(M, mat1, mat2)` returns the result in a new `Tensor`.\n\n`torch.addmm(r, M, mat1, mat2)` puts the result in `r`.\n\n**Differences when used as a method**\n\n`M:addmm(mat1, mat2)` does `M = M + mat1 * mat2`.\n\n`r:addmm(M, mat1, mat2)`  does `r = M + mat1 * mat2`.\n\n`r:addmm(v1, M, v2, mat1, mat2)` does `r = (v1 * M) + (v2 * mat1 * mat2)`.\n\n`M:addmm(v1, v2, mat1, mat2)` does `M = (v1 * M) + (v2 * mat1 * mat2)`.\n\nThe last example does not accurately fit into the function signature, and needs a special mention. It changes the function signature to:\n\n`[M] = M:addmm([v1,] [v2,] mat1, mat2)`\n\n\n<a name=\"torch.addbmm\"></a>\n### [res] torch.addbmm([res,] [v1,] M, [v2,] batch1, batch2) ###\n<a name=\"torch.addbmm\"></a>\n\nBatch matrix matrix product of matrices stored in `batch1` and `batch2`, with a reduced add step (all matrix multiplications get accumulated in a single place).\n\n`batch1` and `batch2` must be 3D `Tensor`s each containing the same number of matrices.\nIf `batch1` is a `b × n × m` `Tensor`, `batch2` a `b × m × p` `Tensor`, res will be a `n × p` `Tensor`.\n\nIn other words,\n\n```\nres = (v1 * M) + (v2 * sum(batch1_i * batch2_i, i = 1, b))\n```\n\n`torch.addbmm(M, x, y)` puts the result in a new `Tensor`.\n\n`M:addbmm(x, y)` puts the result in `M`, resizing `M` if necessary.\n\n`M:addbmm(beta, M2, alpha, x, y)` puts the result in `M`, resizing `M` if necessary.\n\n\n<a name=\"torch.baddbmm\"></a>\n### [res] torch.baddbmm([res,] [v1,] M, [v2,] batch1, batch2) ###\n<a name=\"torch.baddbmm\"></a>\n\nBatch matrix matrix product of matrices stored in `batch1` and `batch2`, with batch add.\n\n`batch1` and `batch2` must be 3D `Tensor`s each containing the same number of matrices.\nIf `batch1` is a `b × n × m` `Tensor`, `batch2` a `b × m × p` `Tensor`, res will be a `b × n × p` `Tensor`.\n\nIn other words,\n\n```\nres_i = (v1 * M_i) + (v2 * batch1_i * batch2_i)\n```\n\n`torch.baddbmm(M, x, y)` puts the result in a new `Tensor`.\n\n`M:baddbmm(x, y)` puts the result in `M`, resizing `M` if necessary.\n\n`M:baddbmm(beta, M2, alpha, x, y)` puts the result in `M`, resizing `M` if necessary.\n\n\n<a name=\"torch.mv\"></a>\n### [res] torch.mv([res,] mat, vec) ###\n<a name=\"torch.mv\"></a>\n\nMatrix vector product of `mat` and `vec`.\nSizes must respect the matrix-multiplication operation: if `mat` is a `n × m` matrix, `vec` must be vector of size `m` and `res` must be a vector of size `n`.\n\n`torch.mv(x, y)` puts the result in a new `Tensor`.\n\n`torch.mv(M, x, y)` puts the result in `M`.\n\n`M:mv(x, y)` puts the result in `M`.\n\n\n<a name=\"torch.mm\"></a>\n### [res] torch.mm([res,] mat1, mat2) ###\n<a name=\"torch.mm\"></a>\n\nMatrix matrix product of `mat1` and `mat2`.\nIf `mat1` is a `n × m` matrix, `mat2` a `m × p` matrix, `res` must be a `n × p` matrix.\n\n`torch.mm(x, y)` puts the result in a new `Tensor`.\n\n`torch.mm(M, x, y)` puts the result in `M`.\n\n`M:mm(x, y)` puts the result in `M`.\n\n\n<a name=\"torch.bmm\"></a>\n### [res] torch.bmm([res,] batch1, batch2) ###\n<a name=\"torch.bmm\"></a>\n\nBatch matrix matrix product of matrices stored in `batch1` and `batch2`.\n`batch1` and `batch2` must be 3D `Tensor`s each containing the same number of matrices.\nIf `batch1` is a `b × n × m` `Tensor`, `batch2` a `b × m × p` `Tensor`, `res` will be a `b × n × p` `Tensor`.\n\n`torch.bmm(x, y)` puts the result in a new `Tensor`.\n\n`torch.bmm(M, x, y)` puts the result in `M`, resizing `M` if necessary.\n\n`M:bmm(x, y)` puts the result in `M`, resizing `M` if necessary.\n\n\n<a name=\"torch.ger\"></a>\n### [res] torch.ger([res,] vec1, vec2) ###\n<a name=\"torch.ger\"></a>\n\nOuter product of `vec1` and `vec2`.\nIf `vec1` is a vector of size `n` and `vec2` is a vector of size `m`, then `res` must be a matrix of size `n × m`.\n\n`torch.ger(x, y)` puts the result in a new `Tensor`.\n\n`torch.ger(M, x, y)` puts the result in `M`.\n\n`M:ger(x, y)` puts the result in `M`.\n\n\n<a name=\"torch.lerp\"></a>\n### [res] torch.lerp([res,] a, b, weight) ###\n<a name=\"torch.lerp\"></a>\n\nLinear interpolation of two scalars or tensors based on a weight: `res = a + weight * (b - a)`\n\n`torch.lerp(a, b, weight)` puts the result in a new `Tensor` if `a` and `b` are tensors. If `a` and `b` are scalars the functions returns a number.\n\n`torch.lerp(M, a, b, weight)` puts the result in `M`.\n\n`M:lerp(a, b, weight)` puts the result in `M`.\n\n\n## Overloaded operators ##\n\nIt is possible to use basic mathematical operators like `+`, `-`, `/`, `*` and `%` with `Tensor`s.\nThese operators are provided as a convenience.\nWhile they might be handy, they create and return a new `Tensor` containing the results.\nThey are thus not as fast as the operations available in the [previous section](#torch.BasicOperations.dok).\n\nAnother important point to note is that these operators are only overloaded when the first operand is a `Tensor`.\nFor example, this will NOT work:\n\n```lua\n> x = 5 + torch.rand(3)\n```\n\n\n### Addition and subtraction ###\n\nYou can add a `Tensor` to another one with the `+` operator.\nSubtraction is done with `-`.\nThe number of elements in the `Tensor`s must match, but the sizes do not matter.\nThe size of the returned `Tensor` will be the size of the first `Tensor`.\n\n```lua\n> x = torch.Tensor(2, 2):fill(2)\n> y = torch.Tensor(4):fill(3)\n> = x + y\n 5  5\n 5  5\n[torch.DoubleTensor of size 2x2]\n\n> = y - x\n 1\n 1\n 1\n 1\n[torch.DoubleTensor of size 4]\n```\n\nA scalar might also be added or subtracted to a `Tensor`.\nThe scalar needs to be on the right of the operator.\n\n```lua\n> x = torch.Tensor(2, 2):fill(2)\n> = x + 3\n 5  5\n 5  5\n[torch.DoubleTensor of size 2x2]\n```\n\n\n### Negation ###\n\nA `Tensor` can be negated with the `-` operator placed in front:\n\n```lua\n> x = torch.Tensor(2, 2):fill(2)\n> = -x\n-2 -2\n-2 -2\n[torch.DoubleTensor of size 2x2]\n```\n\n\n### Multiplication ###\n\nMultiplication between two `Tensor`s is supported with the `*` operators.\nThe result of the multiplication depends on the sizes of the `Tensor`s.\n\n - 1D and 1D: Returns the dot product between the two `Tensor`s (scalar).\n - 2D and 1D: Returns the matrix-vector operation between the two `Tensor`s (1D `Tensor`).\n - 2D and 2D: Returns the matrix-matrix operation between the two `Tensor`s (2D `Tensor`).\n\nSizes must be conformant for the corresponding operation.\n\nA `Tensor` might also be multiplied by a scalar.\nThe scalar might be on the right or left of the operator.\n\nExamples:\n\n```lua\n> M = torch.Tensor(2, 2):fill(2)\n> N = torch.Tensor(2, 4):fill(3)\n> x = torch.Tensor(2):fill(4)\n> y = torch.Tensor(2):fill(5)\n> = x * y -- dot product\n40\n\n> = M * x --- matrix-vector\n 16\n 16\n[torch.DoubleTensor of size 2]\n\n> = M * N -- matrix-matrix\n 12  12  12  12\n 12  12  12  12\n[torch.DoubleTensor of size 2x4]\n```\n\n\n### Division and Modulo (remainder) ###\n\nOnly the division of a `Tensor` by a scalar is supported with the operator `/`.\n\nExample:\n\n```lua\n> x = torch.Tensor(2, 2):fill(2)\n> = x/3\n 0.6667  0.6667\n 0.6667  0.6667\n[torch.DoubleTensor of size 2x2]\n```\n\nSimilarly, the remainder of the division of a `Tensor`s elements by a scalar\ncan be obtained with the operator `%`.\n\nExample:\n> x = torch.Tensor{{1,2},{3,4}}\n> = x % 3\n 1  2\n 0  1\n[torch.Tensor of size 2x2]\n\n\n<a name=\"torch.columnwise.dok\"></a>\n## Column or row-wise operations  (dimension-wise operations) ##\n\n\n<a name=\"torch.cross\"></a>\n### [res] torch.cross([res,] a, b [,n]) ###\n\n`y = torch.cross(a, b)` returns the cross product of `a` and `b` along the first dimension of length 3.\n\n`y = torch.cross(a, b, n)`  returns the cross product of vectors in dimension `n` of `a` and `b`.\n\n`a` and `b` must have the same size, and both `a:size(n)` and `b:size(n)` must be 3.\n\n\n<a name=\"torch.cumprod\"></a>\n### [res] torch.cumprod([res,] x [,dim]) ###\n\n`y = torch.cumprod(x)` returns the cumulative product of the elements of `x`, performing the operation over the last dimension.\n\n`y = torch.cumprod(x, n)` returns the cumulative product of the elements of `x`, performing the operation over dimension `n`.\n\n```lua\n-- 1. cumulative product for a vector\n> A = torch.range(1, 5)\n> A\n 1\n 2\n 3\n 4\n 5\n[torch.DoubleTensor of size 5]\n\n> B = torch.cumprod(A)\n> B\n   1     -- B(1) = A(1) = 1\n   2     -- B(2) = A(1)*A(2) = 1*2 = 2\n   6     -- B(3) = A(1)*A(2)*A(3) = 1*2*3 = 6\n  24     -- B(4) = A(1)*A(2)*A(3)*A(4) = 1*2*3*4 = 24\n 120     -- B(5) = A(1)*A(2)*A(3)*A(4)*A(5) =1*2*3*4*5 = 120\n[torch.DoubleTensor of size 5]\n\n-- 2. cumulative product for a matrix\n> A = torch.LongTensor{{1, 4, 7}, {2, 5, 8}, {3, 6, 9}}\n> A\n 1  4  7\n 2  5  8\n 3  6  9\n[torch.LongTensor of size 3x3]\n\n> B = torch.cumprod(A)\n> B\n   1    4    7\n   2   20   56\n   6  120  504\n[torch.LongTensor of size 3x3]\n\n-- Why?\n-- B(1, 1) = A(1, 1) = 1\n-- B(2, 1) = A(1, 1)*A(2, 1) = 1*2 = 2\n-- B(3, 1) = A(1, 1)*A(2, 1)*A(3, 1) = 1*2*3 = 6\n-- B(1, 2) = A(1, 2) = 4\n-- B(2, 2) = A(1, 2)*A(2, 2) = 4*5 = 20\n-- B(3, 2) = A(1, 2)*A(2, 2)*A(3, 2) = 4*5*6 = 120\n-- B(1, 3) = A(1, 3) = 7\n-- B(2, 3) = A(1, 3)*A(2, 3) = 7*8 = 56\n-- B(3, 3) = A(1, 3)*A(2, 3)*A(3, 3) = 7*8*9 = 504\n\n-- 3. cumulative product along 2-dim\n> B = torch.cumprod(A, 2)\n> B\n   1    4   28\n   2   10   80\n   3   18  162\n[torch.LongTensor of size 3x3]\n\n-- Why?\n-- B(1, 1) = A(1, 1) = 1\n-- B(1, 2) = A(1, 1)*A(1, 2) = 1*4 = 4\n-- B(1, 3) = A(1, 1)*A(1, 2)*A(1, 3) = 1*4*7 = 28\n-- B(2, 1) = A(2, 1) = 2\n-- B(2, 2) = A(2, 1)*A(2, 2) = 2*5 = 10\n-- B(2, 3) = A(2, 1)*A(2, 2)*A(2, 3) = 2*5*8 = 80\n-- B(3, 1) = A(3, 1) = 3\n-- B(3, 2) = A(3, 1)*A(2, 3) = 3*6 = 18\n-- B(3, 3) = A(3, 1)*A(2, 3)*A(3, 3) = 3*6*9 = 162\n```\n\n\n<a name=\"torch.cumsum\"></a>\n### [res] torch.cumsum([res,] x [,dim]) ###\n\n`y = torch.cumsum(x)` returns the cumulative sum of the elements of `x`, performing the operation over the first dimension.\n\n`y = torch.cumsum(x, n)` returns the cumulative sum of the elements of `x`, performing the operation over dimension `n`.\n\n\n<a name=\"torch.max\"></a>\n### torch.max([resval, resind,] x [,dim]) ###\n\n`y = torch.max(x)` returns the single largest element of `x`.\n\n`y, i = torch.max(x, 1)` returns the largest element in each column (across rows) of `x`, and a `Tensor` `i` of their corresponding indices in `x`.\n\n`y, i = torch.max(x, 2)` performs the max operation for each row.\n\n`y, i = torch.max(x, n)` performs the max operation over the dimension `n`.\n\n```lua\n> x = torch.randn(3, 3)\n> x\n 1.1994 -0.6290  0.6888\n-0.0038 -0.0908 -0.2075\n 0.3437 -0.9948  0.1216\n[torch.DoubleTensor of size 3x3]\n\n> torch.max(x)\n1.1993977428735\n\n> torch.max(x, 1)\n 1.1994 -0.0908  0.6888\n[torch.DoubleTensor of size 1x3]\n\n 1  2  1\n[torch.LongTensor of size 1x3]\n\n> torch.max(x, 2)\n 1.1994\n-0.0038\n 0.3437\n[torch.DoubleTensor of size 3x1]\n\n 1\n 1\n 1\n[torch.LongTensor of size 3x1]\n```\n\n\n<a name=\"torch.mean\"></a>\n### [res] torch.mean([res,] x [,dim]) ###\n\n`y = torch.mean(x)` returns the mean of all elements of `x`.\n\n`y = torch.mean(x, 1)` returns a `Tensor` `y` of the mean of the elements in each column of `x`.\n\n`y = torch.mean(x, 2)` performs the mean operation for each row.\n\n`y = torch.mean(x, n)` performs the mean operation over the dimension `n`.\n\n\n<a name=\"torch.min\"></a>\n### torch.min([resval, resind,] x [,dim]) ###\n\n`y = torch.min(x)` returns the single smallest element of `x`.\n\n`y, i = torch.min(x, 1)` returns the smallest element in each column (across rows) of `x`, and a `Tensor` `i` of their corresponding indices in `x`.\n\n`y, i = torch.min(x, 2)` performs the min operation for each row.\n\n`y, i = torch.min(x, n)` performs the min operation over the dimension `n`.\n\n\n<a name=\"torch.cmax\"></a>\n### [res] torch.cmax([res,] tensor1, tensor2) ###\n\nCompute the maximum of each pair of values in `tensor1` and `tensor2`.\n\n`c = torch.cmax(a, b)` returns a new `Tensor` containing the element-wise maximum of `a` and `b`.\n\n`a:cmax(b)` stores the element-wise maximum of `a` and `b` in `a`.\n\n`c:cmax(a, b)` stores the element-wise maximum of `a` and `b` in `c`.\n\n```lua\n> a = torch.Tensor{1, 2, 3}\n> b = torch.Tensor{3, 2, 1}\n> torch.cmax(a, b)\n 3\n 2\n 3\n[torch.DoubleTensor of size 3]\n```\n\n\n<a name=\"torch.cmax\"></a>\n### [res] torch.cmax([res,] tensor, value) ###\n\nCompute the maximum between each value in `tensor` and `value`.\n\n`c = torch.cmax(a, v)` returns a new `Tensor` containing the maxima of each element in `a` and `v`.\n\n`a:cmax(v)` stores the maxima of each element in `a` and `v` in `a`.\n\n`c:cmax(a, v)` stores the maxima of each element in `a` and `v` in `c`.\n\n```lua\n> a = torch.Tensor{1, 2, 3}\n> torch.cmax(a, 2)\n 2\n 2\n 3\n[torch.DoubleTensor of size 3]\n```\n\n\n<a name=\"torch.cmin\"></a>\n### [res] torch.cmin([res,] tensor1, tensor2) ###\n\nCompute the minimum of each pair of values in `tensor1` and `tensor2`.\n\n`c = torch.cmin(a, b)` returns a new `Tensor` containing the element-wise minimum of `a` and `b`.\n\n`a:cmin(b)` stores the element-wise minimum of `a` and `b` in `a`.\n\n`c:cmin(a, b)` stores the element-wise minimum of `a` and `b` in `c`.\n\n```lua\n> a = torch.Tensor{1, 2, 3}\n> b = torch.Tensor{3, 2, 1}\n> torch.cmin(a, b)\n 1\n 2\n 1\n[torch.DoubleTensor of size 3]\n```\n\n\n<a name=\"torch.cmin\"></a>\n### [res] torch.cmin([res,] tensor, value) ###\n\nCompute the minimum between each value in `tensor` and `value`.\n\n`c = torch.cmin(a, v)` returns a new `Tensor` containing the minima of each element in `a` and `v`.\n\n`a:cmin(v)` stores the minima of each element in `a` and `v` in `a`.\n\n`c:cmin(a, v)` stores the minima of each element in `a` and `v` in `c`.\n\n```lua\n> a = torch.Tensor{1, 2, 3}\n> torch.cmin(a, 2)\n 1\n 2\n 2\n[torch.DoubleTensor of size 3]\n```\n\n\n<a name=\"torch.median\"></a>\n### torch.median([resval, resind,] x [,dim]) ###\n\n`y = torch.median(x)` performs the median operation over the last dimension of `x` (one-before-middle in the case of an even number of elements).\n\n`y, i = torch.median(x, 1)` returns the median element in each column (across rows) of `x`, and a `Tensor` `i` of their corresponding indices in `x`.\n\n`y, i = torch.median(x, 2)` performs the median operation for each row.\n\n`y, i = torch.median(x, n)` performs the median operation over the dimension `n`.\n\n```lua\n> x = torch.randn(3, 3)\n> x\n 0.7860  0.7687 -0.9362\n 0.0411  0.5407 -0.3616\n-0.0129 -0.2499 -0.5786\n[torch.DoubleTensor of size 3x3]\n\n> y, i = torch.median(x)\n> y\n 0.7687\n 0.0411\n-0.2499\n[torch.DoubleTensor of size 3x1]\n\n> i\n 2\n 1\n 2\n[torch.LongTensor of size 3x1]\n\n> y, i = torch.median(x, 1)\n> y\n 0.0411  0.5407 -0.5786\n[torch.DoubleTensor of size 1x3]\n\n> i\n 2  2  3\n[torch.LongTensor of size 1x3]\n\n> y, i = torch.median(x, 2)\n> y\n 0.7687\n 0.0411\n-0.2499\n[torch.DoubleTensor of size 3x1]\n\n> i\n 2\n 1\n 2\n[torch.LongTensor of size 3x1]\n```\n\n\n<a name=\"torch.mode\"></a>\n### torch.mode([resval, resind,] x [,dim]) ###\n\n`y = torch.mode(x)` returns the most frequent element of `x` over its last dimension.\n\n`y, i = torch.mode(x, 1)` returns the mode element in each column (across rows) of `x`, and a `Tensor` `i` of their corresponding indices in `x`.\n\n`y, i = torch.mode(x, 2)` performs the mode operation for each row.\n\n`y, i = torch.mode(x, n)` performs the mode operation over the dimension `n`.\n\n\n<a name=\"torch.kthvalue\"></a>\n### torch.kthvalue([resval, resind,] x, k [,dim]) ###\n\n`y = torch.kthvalue(x, k)` returns the `k`-th smallest element of `x` over its last dimension.\n\n`y, i = torch.kthvalue(x, k, 1)` returns the `k`-th smallest element in each column (across rows) of `x`, and a `Tensor` `i` of their corresponding indices in `x`.\n\n`y, i = torch.kthvalue(x, k, 2)` performs the `k`-th value operation for each row.\n\n`y, i = torch.kthvalue(x, k, n)` performs the `k`-th value operation over the dimension `n`.\n\n\n<a name=\"torch.prod\"></a>\n### [res] torch.prod([res,] x [,n]) ###\n\n`y = torch.prod(x)` returns the product of all elements in `x`.\n\n`y = torch.prod(x, n)` returns a `Tensor` `y` whom size in dimension `n` is 1 and where elements are the product of elements of `x` with respect to dimension `n`.\n\n```lua\n> a = torch.Tensor{{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}}\n> a\n(1,.,.) =\n  1  2\n  3  4\n\n(2,.,.) =\n  5  6\n  7  8\n[torch.DoubleTensor of dimension 2x2x2]\n\n> torch.prod(a, 1)\n(1,.,.) =\n   5  12\n  21  32\n[torch.DoubleTensor of dimension 1x2x2]\n\n> torch.prod(a, 2)\n(1,.,.) =\n   3   8\n\n(2,.,.) =\n  35  48\n[torch.DoubleTensor of size 2x1x2]\n\n> torch.prod(a, 3)\n(1,.,.) =\n   2\n  12\n\n(2,.,.) =\n  30\n  56\n[torch.DoubleTensor of size 2x2x1]\n```\n\n\n<a name=\"torch.sort\"></a>\n### torch.sort([resval, resind,] x [,d] [,flag]) ###\n\n`y, i = torch.sort(x)` returns a `Tensor` `y` where all entries are sorted along the last dimension, in **ascending** order.\nIt also returns a `Tensor` `i` that provides the corresponding indices from `x`.\n\n`y, i = torch.sort(x, d)` performs the sort operation along a specific dimension `d`.\n\n`y, i = torch.sort(x)` is therefore equivalent to `y, i = torch.sort(x, x:dim())`\n\n`y, i = torch.sort(x, d, true)` performs the sort operation along a specific dimension `d`, in **descending** order.\n\n```lua\n> x = torch.randn(3, 3)\n> x\n-1.2470 -0.4288 -0.5337\n 0.8836 -0.1622  0.9604\n 0.6297  0.2397  0.0746\n[torch.DoubleTensor of size 3x3]\n\n> torch.sort(x)\n-1.2470 -0.5337 -0.4288\n-0.1622  0.8836  0.9604\n 0.0746  0.2397  0.6297\n[torch.DoubleTensor of size 3x3]\n\n 1  3  2\n 2  1  3\n 3  2  1\n[torch.LongTensor of size 3x3]\n```\n\n<a name=\"torch.topk\"></a>\n### torch.topk([resval, resind,] x, k, [,dim] [,dir] [,sort]) ###\n\n`y, i = torch.topk(x, k)` returns all `k` smallest elements in `x` over its last dimension including their indices, in unsorted order.\n\n`y, i = torch.topk(x, k, dim)` performs the same operation except over dimension `dim`.\n\n`y, i = torch.topk(x, k, dim, dir)` adds a sorting direction that has the same sense as `torch.sort`; `false` returns the `k` smallest elements in the slice, `true` returns the `k` largest elements in the slice.\n\n`y, i = torch.topk(x, k, dim, dir, true)` specifies that the results in `y` should be sorted with respect to `dir`; by default, the results are potentially unsorted since the computation may be faster, but if sorting is desired, the sort flag may be passed, in which case the results are returned from smallest to `k`-th smallest (`dir == false`) or highest to `k`-th highest (`dir == true`).\n\nThe implementation provides no guarantee of the order of selection (indices) among equivalent elements (e.g., topk `k == 2` selection of a vector `{1, 2, 1, 1}`; the values returned could be any pair of `1` entries in the vector).\n\n<a name=\"torch.std\"></a>\n### [res] torch.std([res,] x, [,dim] [,flag]) ###\n\n`y = torch.std(x)` returns the standard deviation of the elements of `x`.\n\n`y = torch.std(x, dim)` performs the `std` operation over the dimension `dim`.\n\n`y = torch.std(x, dim, false)` performs the `std` operation normalizing by `n-1` (this is the default).\n\n`y = torch.std(x, dim, true)` performs the `std` operation normalizing by `n` instead of `n-1`.\n\n\n<a name=\"torch.sum\"></a>\n### [res] torch.sum([res,] x) ###\n\n`y = torch.sum(x)` returns the sum of the elements of `x`.\n\n`y = torch.sum(x, 2)` performs the sum operation for each row.\n\n`y = torch.sum(x, n)` performs the sum operation over the dimension `n`.\n\n\n<a name=\"torch.var\"></a>\n### [res] torch.var([res,] x [,dim] [,flag]) ###\n\n`y = torch.var(x)` returns the variance of the elements of `x`.\n\n`y = torch.var(x, dim)` performs the `var` operation over the dimension `dim`.\n\n`y = torch.var(x, dim, false)` performs the `var` operation normalizing by `n-1` (this is the default).\n\n`y = torch.var(x, dim, true)` performs the `var` operation normalizing by `n` instead of `n-1`.\n\n\n<a name=\"torch.matrixwide.dok\"></a>\n## Matrix-wide operations  (`Tensor`-wide operations) ##\n\nNote that many of the operations in [dimension-wise operations](#torch.columnwise.dok) can also be used as matrix-wide operations, by just omitting the `dim` parameter.\n\n\n<a name=\"torch.norm\"></a>\n### torch.norm(x [,p] [,dim]) ###\n\n`y = torch.norm(x)` returns the `2`-norm of the `Tensor` `x`.\n\n`y = torch.norm(x, p)` returns the `p`-norm of the `Tensor` `x`.\n\n`y = torch.norm(x, p, dim)` returns the `p`-norms of the `Tensor` `x` computed over the dimension `dim`.\n\n\n<a name=\"torch.renorm\"></a>\n### torch.renorm([res], x, p, dim, maxnorm) ###\n\nRenormalizes the sub-`Tensor`s along dimension `dim` such that they do not exceed norm `maxnorm`.\n\n`y = torch.renorm(x, p, dim, maxnorm)` returns a version of `x` with `p`-norms lower than `maxnorm` over non-`dim` dimensions.\nThe `dim` argument is not to be confused with the argument of the same name in function [`norm`](#torch.norm).\nIn this case, the `p`-norm is measured for each `i`-th sub-`Tensor` `x:select(dim, i)`.\nThis function is equivalent to (but faster than) the following:\n\n```lua\nfunction renorm(matrix, value, dim, maxnorm)\n   local m1 = matrix:transpose(dim, 1):contiguous()\n   -- collapse non-dim dimensions:\n   m2 = m1:reshape(m1:size(1), m1:nElement()/m1:size(1))\n   local norms = m2:norm(value, 2)\n   -- clip\n   local new_norms = norms:clone()\n   new_norms[torch.gt(norms, maxnorm)] = maxnorm\n   new_norms:cdiv(norms:add(1e-7))\n   -- renormalize\n   m1:cmul(new_norms:expandAs(m1))\n   return m1:transpose(dim, 1)\nend\n```\n\n`x:renorm(p, dim, maxnorm)` returns the equivalent of `x:copy(torch.renorm(x, p, dim, maxnorm))`.\n\nNote: this function is particularly useful as a regularizer for constraining the norm of parameter `Tensor`s.\nSee [Hinton et al. 2012, p. 2](http://arxiv.org/pdf/1207.0580.pdf).\n\n\n<a name=\"torch.dist\"></a>\n### torch.dist(x, y) ###\n\n`y = torch.dist(x, y)` returns the `2`-norm of `x - y`.\n\n`y = torch.dist(x, y, p)` returns the `p`-norm of `x - y`.\n\n\n<a name=\"torch.numel\"></a>\n### torch.numel(x) ###\n\n`y = torch.numel(x)` returns the count of the number of elements in the matrix `x`.\n\n\n<a name=\"torch.trace\"></a>\n### torch.trace(x) ###\n\n`y = torch.trace(x)` returns the trace (sum of the diagonal elements) of a matrix `x`.\nThis is equal to the sum of the eigenvalues of `x`.\nThe returned value `y` is a number, not a `Tensor`.\n\n\n<a name=\"torch.conv.dok\"></a>\n## Convolution Operations ##\n\nThese functions implement convolution or cross-correlation of an input image (or set of input images) with a kernel (or set of kernels).\nThe convolution function in Torch can handle different types of input/kernel dimensions and produces corresponding outputs.\nThe general form of operations always remain the same.\n\n\n<a name=\"torch.conv2\"></a>\n### [res] torch.conv2([res,] x, k, [, 'F' or 'V']) ###\n<a name=\"torch.conv2\"></a>\n\nThis function computes 2 dimensional convolutions between `x` and `k`.\nThese operations are similar to BLAS operations when number of dimensions of input and kernel are reduced by `2`.\n\n  * `x`  and `k` are 2D: convolution of a single image with a single kernel (2D output). This operation is similar to multiplication of two scalars.\n  * `x` (`p × m × n`)  and `k` (`p × ki × kj`) are 3D: convolution of each input slice with corresponding kernel (3D output).\n  * `x` (`p × m × n`) 3D, `k` (`q × p × ki × kj`) 4D: convolution of all input slices with the corresponding slice of kernel. Output is 3D (`q × m × n`). This operation is similar to matrix vector product of matrix `k` and vector `x`.\n\nThe last argument controls if the convolution is a full (`'F'`) or valid (`'V'`) convolution.\nThe default is **valid** convolution.\n\n```lua\nx = torch.rand(100, 100)\nk = torch.rand(10, 10)\nc = torch.conv2(x, k)\n> c:size()\n 91\n 91\n[torch.LongStorage of size 2]\n\nc = torch.conv2(x, k, 'F')\n> c:size()\n 109\n 109\n[torch.LongStorage of size 2]\n```\n\n\n<a name=\"torch.xcorr2\"></a>\n### [res] torch.xcorr2([res,] x, k, [, 'F' or 'V']) ###\n<a name=\"torch.xcorr2\"></a>\n\nThis function operates with same options and input/output configurations as [`torch.conv2`](#torch.conv2), but performs cross-correlation of the input with the kernel `k`.\n\n\n<a name=\"torch.conv3\"></a>\n### [res] torch.conv3([res,] x, k, [, 'F' or 'V']) ###\n<a name=\"torch.conv3\"></a>\n\nThis function computes 3 dimensional convolutions between `x` and `k`.\nThese operations are similar to BLAS operations when number of dimensions of input and kernel are reduced by `3`.\n\n  * `x`  and `k` are 3D: convolution of a single image with a single kernel (3D output). This operation is similar to multiplication of two scalars.\n  * `x` (`p × m × n × o`)  and `k` (`p × ki × kj × kk`) are 4D: convolution of each input slice with corresponding kernel (4D output).\n  * `x` (`p × m × n × o`) 4D, `k` (`q × p × ki × kj × kk`) 5D: convolution of all input slices with the corresponding slice of kernel. Output is 4D `q × m × n × o`. This operation is similar to matrix vector product of matrix `k` and vector `x`.\n\nThe last argument controls if the convolution is a full (`'F'`) or valid (`'V'`) convolution.\nThe default is **valid** convolution.\n\n```lua\nx = torch.rand(100, 100, 100)\nk = torch.rand(10, 10, 10)\nc = torch.conv3(x, k)\n> c:size()\n 91\n 91\n 91\n[torch.LongStorage of size 3]\n\nc = torch.conv3(x, k, 'F')\n> c:size()\n 109\n 109\n 109\n[torch.LongStorage of size 3]\n\n```\n\n\n<a name=\"torch.xcorr3\"></a>\n### [res] torch.xcorr3([res,] x, k, [, 'F' or 'V']) ###\n<a name=\"torch.xcorr3\"></a>\n\nThis function operates with same options and input/output configurations as [`torch.conv3`](#torch.conv3), but performs cross-correlation of the input with the kernel `k`.\n\n\n<a name=\"torch.linalg.dok\"></a>\n## Eigenvalues, SVD, Linear System Solution ##\n\nFunctions in this section are implemented with an interface to [LAPACK](https://en.wikipedia.org/wiki/LAPACK) libraries.\nIf LAPACK libraries are not found during compilation step, then these functions will not be available.\n\n\n<a name=\"torch.gesv\"></a>\n### [x, lu] torch.gesv([resb, resa,] B, A) ###\n\n`X, LU = torch.gesv(B, A)` returns the solution of `AX = B` and `LU` contains `L` and `U` factors for `LU` factorization of `A`.\n\n`A` has to be a square and non-singular matrix (2D `Tensor`).\n`A` and `LU` are `m × m`, `X` is `m × k` and `B` is `m × k`.\n\nIf `resb` and `resa` are given, then they will be used for temporary storage and returning the result.\n\n  * `resa` will contain `L` and `U` factors for `LU` factorization of `A`.\n  * `resb` will contain the solution `X`.\n\nNote: Irrespective of the original strides, the returned matrices `resb` and `resa` will be transposed, i.e. with strides `1, m` instead of `m, 1`.\n\n```lua\n> a = torch.Tensor({{6.80, -2.11,  5.66,  5.97,  8.23},\n                  {-6.05, -3.30,  5.36, -4.44,  1.08},\n                  {-0.45,  2.58, -2.70,  0.27,  9.04},\n                  {8.32,  2.71,  4.35,  -7.17,  2.14},\n                  {-9.67, -5.14, -7.26,  6.08, -6.87}}):t()\n\n> b = torch.Tensor({{4.02,  6.19, -8.22, -7.57, -3.03},\n                  {-1.56,  4.00, -8.67,  1.75,  2.86},\n                  {9.81, -4.09, -4.57, -8.61,  8.99}}):t()\n\n> b\n 4.0200 -1.5600  9.8100\n 6.1900  4.0000 -4.0900\n-8.2200 -8.6700 -4.5700\n-7.5700  1.7500 -8.6100\n-3.0300  2.8600  8.9900\n[torch.DoubleTensor of dimension 5x3]\n\n> a\n 6.8000 -6.0500 -0.4500  8.3200 -9.6700\n-2.1100 -3.3000  2.5800  2.7100 -5.1400\n 5.6600  5.3600 -2.7000  4.3500 -7.2600\n 5.9700 -4.4400  0.2700 -7.1700  6.0800\n 8.2300  1.0800  9.0400  2.1400 -6.8700\n[torch.DoubleTensor of dimension 5x5]\n\n\n> x = torch.gesv(b, a)\n> x\n-0.8007 -0.3896  0.9555\n-0.6952 -0.5544  0.2207\n 0.5939  0.8422  1.9006\n 1.3217 -0.1038  5.3577\n 0.5658  0.1057  4.0406\n[torch.DoubleTensor of dimension 5x3]\n\n> b:dist(a * x)\n1.1682163181673e-14\n```\n\n\n<a name=\"torch.trtrs\"></a>\n### [x] torch.trtrs([resb, resa,] b, a [, 'U' or 'L'] [, 'N' or 'T'] [, 'N' or 'U']) ###\n\n`X = torch.trtrs(B, A)` returns the solution of `AX = B` where `A` is upper-triangular.\n\n`A` has to be a square, triangular, non-singular matrix (2D `Tensor`).\n`A` and `resa` are `m × m`, `X` and `B` are `m × k`.\n(To be very precise: `A` does not have to be triangular and non-singular, rather only its upper or lower triangle will be taken into account and that part has to be non-singular.)\n\nThe function has several options:\n\n* `uplo` (`'U'` or `'L'`) specifies whether `A` is upper or lower triangular; the default value is `'U'`.\n* `trans` (`'N'` or `'T`') specifies the system of equations: `'N'` for `A * X = B` (no transpose), or `'T'` for `A^T * X = B` (transpose); the default value is `'N'`.\n* `diag` (`'N'` or `'U'`) `'U'` specifies that `A` is unit triangular, i.e., it has ones on its diagonal; `'N'` specifies that `A` is not (necessarily) unit triangular; the default value is `'N'`.\n\nIf `resb` and `resa` are given, then they will be used for temporary storage and returning the result.\n`resb` will contain the solution `X`.\n\nNote: Irrespective of the original strides, the returned matrices `resb` and `resa` will be transposed, i.e. with strides `1, m` instead of `m, 1`.\n\n```lua\n> a = torch.Tensor({{6.80, -2.11,  5.66,  5.97,  8.23},\n                  {0, -3.30,  5.36, -4.44,  1.08},\n                  {0,  0, -2.70,  0.27,  9.04},\n                  {0,  0,  0,  -7.17,  2.14},\n                  {0,  0,  0,  0, -6.87}})\n\n> b = torch.Tensor({{4.02,  6.19, -8.22, -7.57, -3.03},\n                  {-1.56,  4.00, -8.67,  1.75,  2.86},\n                  {9.81, -4.09, -4.57, -8.61,  8.99}}):t()\n\n> b\n 4.0200 -1.5600  9.8100\n 6.1900  4.0000 -4.0900\n-8.2200 -8.6700 -4.5700\n-7.5700  1.7500 -8.6100\n-3.0300  2.8600  8.9900\n[torch.DoubleTensor of dimension 5x3]\n\n> a\n 6.8000 -2.1100  5.6600  5.9700  8.2300\n 0.0000 -3.3000  5.3600 -4.4400  1.0800\n 0.0000  0.0000 -2.7000  0.2700  9.0400\n 0.0000  0.0000  0.0000 -7.1700  2.1400\n 0.0000  0.0000  0.0000  0.0000 -6.8700\n[torch.DoubleTensor of dimension 5x5]\n\n> x = torch.trtrs(b, a)\n> x\n-3.5416 -0.2514  3.0847\n 4.2072  2.0391 -4.5146\n 4.6399  1.7804 -2.6077\n 1.1874 -0.3683  0.8103\n 0.4410 -0.4163 -1.3086\n[torch.DoubleTensor of size 5x3]\n\n> b:dist(a*x)\n4.1895292266754e-15\n```\n\n\n<a name=\"torch.potrf\"></a>\n### torch.potrf([res,] A [, 'U' or 'L'] ) ###\n\nCholesky Decomposition of 2D `Tensor` `A`.\nThe matrix `A` has to be a positive-definite and either symmetric or complex Hermitian.\n\nThe factorization has the form\n\n     A = U**T * U,   if UPLO = 'U', or\n     A = L  * L**T,  if UPLO = 'L',\n\nwhere `U` is an upper triangular matrix and `L` is lower triangular.\n\nThe optional character `uplo` = {'U', 'L'} specifies whether the upper or lower triangulardecomposition should be returned. By default, `uplo` = 'U'.\n\n`U = torch.potrf(A, 'U')` returns the upper triangular Cholesky decomposition of `A`.\n\n`L = torch.potrf(A, 'L')` returns the lower triangular Cholesky decomposition of `A`.\n\nIf `Tensor` `res` is provided, the resulting decomposition will be stored therein.\n\n```lua\n> A = torch.Tensor({\n    {1.2705,  0.9971,  0.4948,  0.1389,  0.2381},\n    {0.9971,  0.9966,  0.6752,  0.0686,  0.1196},\n    {0.4948,  0.6752,  1.1434,  0.0314,  0.0582},\n    {0.1389,  0.0686,  0.0314,  0.0270,  0.0526},\n    {0.2381,  0.1196,  0.0582,  0.0526,  0.3957}})\n\n> chol = torch.potrf(A)\n> chol\n 1.1272  0.8846  0.4390  0.1232  0.2112\n 0.0000  0.4626  0.6200 -0.0874 -0.1453\n 0.0000  0.0000  0.7525  0.0419  0.0738\n 0.0000  0.0000  0.0000  0.0491  0.2199\n 0.0000  0.0000  0.0000  0.0000  0.5255\n[torch.DoubleTensor of size 5x5]\n\n> torch.potrf(chol, A, 'L')\n> chol\n 1.1272  0.0000  0.0000  0.0000  0.0000\n 0.8846  0.4626  0.0000  0.0000  0.0000\n 0.4390  0.6200  0.7525  0.0000  0.0000\n 0.1232 -0.0874  0.0419  0.0491  0.0000\n 0.2112 -0.1453  0.0738  0.2199  0.5255\n[torch.DoubleTensor of size 5x5]\n```\n\n<a name=\"torch.pstrf\"></a>\n### torch.pstrf([res, piv, ] A [, 'U' or 'L'] ) ###\n\nCholesky factorization with complete pivoting of a real symmetric positive semidefinite 2D `Tensor` `A`.\nThe matrix `A` has to be a positive semi-definite and symmetric. The factorization has the form\n\n    P**T * A * P = U**T * U ,  if UPLO = 'U',\n    P**T * A * P = L  * L**T,  if UPLO = 'L',\n\nwhere `U` is an upper triangular matrix and `L` is lower triangular, and\n`P` is stored as the vector `piv`. More specifically, `piv` is such that the nonzero entries are `P[piv[k], k] = 1`.\n\nThe optional character argument `uplo` = {'U', 'L'} specifies whether the upper or lower triangular decomposition should be returned. By default, `uplo` = 'U'.\n\n`U, piv = torch.sdtrf(A, 'U')` returns the upper triangular Cholesky decomposition of `A`\n\n`L, piv = torch.potrf(A, 'L')` returns the lower triangular Cholesky decomposition of `A`.\n\nIf tensors `res` and `piv` (an `IntTensor`) are provided, the resulting decomposition will be stored therein.\n\n```lua\n> A = torch.Tensor({\n    {1.2705,  0.9971,  0.4948,  0.1389,  0.2381},\n    {0.9971,  0.9966,  0.6752,  0.0686,  0.1196},\n    {0.4948,  0.6752,  1.1434,  0.0314,  0.0582},\n    {0.1389,  0.0686,  0.0314,  0.0270,  0.0526},\n    {0.2381,  0.1196,  0.0582,  0.0526,  0.3957}})\n\n> U, piv = torch.pstrf(A)\n> U\n 1.1272  0.4390  0.2112  0.8846  0.1232\n 0.0000  0.9750 -0.0354  0.2942 -0.0233\n 0.0000  0.0000  0.5915 -0.0961  0.0435\n 0.0000  0.0000  0.0000  0.3439 -0.0854\n 0.0000  0.0000  0.0000  0.0000  0.0456\n[torch.DoubleTensor of size 5x5]\n\n> piv\n 1\n 3\n 5\n 2\n 4\n[torch.IntTensor of size 5]\n\n> Ap = U:t() * U\n> Ap\n 1.2705  0.4948  0.2381  0.9971  0.1389\n 0.4948  1.1434  0.0582  0.6752  0.0314\n 0.2381  0.0582  0.3957  0.1196  0.0526\n 0.9971  0.6752  0.1196  0.9966  0.0686\n 0.1389  0.0314  0.0526  0.0686  0.0270\n[torch.DoubleTensor of size 5x5]\n\n> -- Permute rows and columns\n> Ap:indexCopy(1, piv:long(), Ap:clone())\n> Ap:indexCopy(2, piv:long(), Ap:clone())\n> (Ap - A):norm()\n1.5731560566382e-16\n```\n\n<a name=\"torch.potrs\"></a>\n### torch.potrs([res,] B, chol [, 'U' or 'L'] ) ###\n\nReturns the solution to linear system `AX = B` using the Cholesky decomposition `chol` of 2D `Tensor` `A`.\n\nSquare matrix `chol` should be triangular; and, righthand side matrix `B` should be of full rank.\n\nOptional character `uplo` = {'U', 'L'} specifies matrix `chol` as either upper or lower triangular; and, by default, equals 'U'.\n\nIf `Tensor` `res` is provided, the resulting decomposition will be stored therein.\n\n```lua\n> A = torch.Tensor({\n    {1.2705,  0.9971,  0.4948,  0.1389,  0.2381},\n    {0.9971,  0.9966,  0.6752,  0.0686,  0.1196},\n    {0.4948,  0.6752,  1.1434,  0.0314,  0.0582},\n    {0.1389,  0.0686,  0.0314,  0.0270,  0.0526},\n    {0.2381,  0.1196,  0.0582,  0.0526,  0.3957}})\n\n> B = torch.Tensor({\n    {0.6219,  0.3439,  0.0431},\n    {0.5642,  0.1756,  0.0153},\n    {0.2334,  0.8594,  0.4103},\n    {0.7556,  0.1966,  0.9637},\n    {0.1420,  0.7185,  0.7476}})\n\n> chol = torch.potrf(A)\n> chol\n 1.1272  0.8846  0.4390  0.1232  0.2112\n 0.0000  0.4626  0.6200 -0.0874 -0.1453\n 0.0000  0.0000  0.7525  0.0419  0.0738\n 0.0000  0.0000  0.0000  0.0491  0.2199\n 0.0000  0.0000  0.0000  0.0000  0.5255\n[torch.DoubleTensor of size 5x5]\n\n> solve = torch.potrs(B, chol)\n> solve\n  12.1945   61.8622   92.6882\n -11.1782  -97.0303 -138.4874\n -15.3442  -76.6562 -116.8218\n   6.1930   13.5238   25.2056\n  29.9678  251.7346  360.2301\n[torch.DoubleTensor of size 5x3]\n\n> A*solve\n 0.6219  0.3439  0.0431\n 0.5642  0.1756  0.0153\n 0.2334  0.8594  0.4103\n 0.7556  0.1966  0.9637\n 0.1420  0.7185  0.7476\n[torch.DoubleTensor of size 5x3]\n\n> B:dist(A*solve)\n4.6783066076306e-14\n```\n\n\n<a name=\"torch.potri\"></a>\n### torch.potri([res,] chol [, 'U' or 'L'] ) ###\n\nReturns the inverse of 2D `Tensor` `A` given its Cholesky decomposition `chol`.\n\nSquare matrix `chol` should be triangular.\n\nOptional character `uplo` = {'U', 'L'} specifies matrix `chol` as either upper or lower triangular; and, by default, equals 'U'.\n\nIf `Tensor` `res` is provided, the resulting inverse will be stored therein.\n\n```lua\n> A = torch.Tensor({\n    {1.2705,  0.9971,  0.4948,  0.1389,  0.2381},\n    {0.9971,  0.9966,  0.6752,  0.0686,  0.1196},\n    {0.4948,  0.6752,  1.1434,  0.0314,  0.0582},\n    {0.1389,  0.0686,  0.0314,  0.0270,  0.0526},\n    {0.2381,  0.1196,  0.0582,  0.0526,  0.3957}})\n\n> chol = torch.potrf(A)\n> chol\n 1.1272  0.8846  0.4390  0.1232  0.2112\n 0.0000  0.4626  0.6200 -0.0874 -0.1453\n 0.0000  0.0000  0.7525  0.0419  0.0738\n 0.0000  0.0000  0.0000  0.0491  0.2199\n 0.0000  0.0000  0.0000  0.0000  0.5255\n[torch.DoubleTensor of size 5x5]\n\n> inv = torch.potri(chol)\n> inv\n  42.2781  -39.0824    8.3019 -133.4998    2.8980\n -39.0824   38.1222   -8.7468  119.4247   -2.5944\n   8.3019   -8.7468    3.1104  -25.1405    0.5327\n-133.4998  119.4247  -25.1405  480.7511  -15.9747\n   2.8980   -2.5944    0.5327  -15.9747    3.6127\n[torch.DoubleTensor of size 5x5]\n\n> inv:dist(torch.inverse(A))\n2.8525852877633e-12\n```\n\n\n<a name=\"torch.gels\"></a>\n### torch.gels([resb, resa,] b, a) ###\n\nSolution of least squares and least norm problems for a full rank `m × n` matrix `A`.\n\n  * If `n ≤ m`, then solve `||AX-B||_F`.\n  * If `n > m` , then solve `min ||X||_F` s.t. `AX = B`.\n\nOn return, first `n` rows of `x` matrix contains the solution and the rest contains residual information.\nSquare root of sum squares of elements of each column of `x` starting at row `n + 1` is the residual for corresponding column.\n\nNote: Irrespective of the original strides, the returned matrices `resb` and `resa` will be transposed, i.e. with strides `1, m` instead of `m, 1`.\n\n```lua\n> a = torch.Tensor({{ 1.44, -9.96, -7.55,  8.34,  7.08, -5.45},\n                  {-7.84, -0.28,  3.24,  8.09,  2.52, -5.70},\n                  {-4.39, -3.24,  6.27,  5.28,  0.74, -1.19},\n                  {4.53,  3.83, -6.64,  2.06, -2.47,  4.70}}):t()\n\n> b = torch.Tensor({{8.58,  8.26,  8.48, -5.28,  5.72,  8.93},\n                  {9.35, -4.43, -0.70, -0.26, -7.36, -2.52}}):t()\n\n> a\n 1.4400 -7.8400 -4.3900  4.5300\n-9.9600 -0.2800 -3.2400  3.8300\n-7.5500  3.2400  6.2700 -6.6400\n 8.3400  8.0900  5.2800  2.0600\n 7.0800  2.5200  0.7400 -2.4700\n-5.4500 -5.7000 -1.1900  4.7000\n[torch.DoubleTensor of dimension 6x4]\n\n> b\n 8.5800  9.3500\n 8.2600 -4.4300\n 8.4800 -0.7000\n-5.2800 -0.2600\n 5.7200 -7.3600\n 8.9300 -2.5200\n[torch.DoubleTensor of dimension 6x2]\n\n> x = torch.gels(b, a)\n> x\n -0.4506   0.2497\n -0.8492  -0.9020\n  0.7066   0.6323\n  0.1289   0.1351\n 13.1193  -7.4922\n -4.8214  -7.1361\n[torch.DoubleTensor of dimension 6x2]\n\n> b:dist(a*x:narrow(1, 1, 4))\n17.390200628863\n\n> math.sqrt(x:narrow(1, 5, 2):pow(2):sumall())\n17.390200628863\n```\n\n\n<a name=\"torch.symeig\"></a>\n### torch.symeig([rese, resv,] a [, 'N' or 'V'] [, 'U' or 'L']) ###\n\n`e, V = torch.symeig(A)` returns eigenvalues and eigenvectors of a symmetric real matrix `A`.\n\n`A` and `V` are `m × m` matrices and `e` is a `m` dimensional vector.\n\nThis function calculates all eigenvalues (and vectors) of `A` such that `A = V diag(e) V'`.\n\nThird argument defines computation of eigenvectors or eigenvalues only.\nIf it is `'N'`, only eigenvalues are computed.\nIf it is `'V'`, both eigenvalues and eigenvectors are computed.\n\nSince the input matrix `A` is supposed to be symmetric, only upper triangular portion is used by default.\nIf the 4th argument is `'L'`, then lower triangular portion is used.\n\nNote: Irrespective of the original strides, the returned matrix `V` will be transposed, i.e. with strides `1, m` instead of `m, 1`.\n\n```lua\n> a = torch.Tensor({{ 1.96,  0.00,  0.00,  0.00,  0.00},\n                  {-6.49,  3.80,  0.00,  0.00,  0.00},\n                  {-0.47, -6.39,  4.17,  0.00,  0.00},\n                  {-7.20,  1.50, -1.51,  5.70,  0.00},\n                  {-0.65, -6.34,  2.67,  1.80, -7.10}}):t()\n\n> a\n 1.9600 -6.4900 -0.4700 -7.2000 -0.6500\n 0.0000  3.8000 -6.3900  1.5000 -6.3400\n 0.0000  0.0000  4.1700 -1.5100  2.6700\n 0.0000  0.0000  0.0000  5.7000  1.8000\n 0.0000  0.0000  0.0000  0.0000 -7.1000\n[torch.DoubleTensor of dimension 5x5]\n\n> e = torch.symeig(a)\n> e\n-11.0656\n -6.2287\n  0.8640\n  8.8655\n 16.0948\n[torch.DoubleTensor of dimension 5]\n\n> e, v = torch.symeig(a, 'V')\n> e\n-11.0656\n -6.2287\n  0.8640\n  8.8655\n 16.0948\n[torch.DoubleTensor of dimension 5]\n\n> v\n-0.2981 -0.6075  0.4026 -0.3745  0.4896\n-0.5078 -0.2880 -0.4066 -0.3572 -0.6053\n-0.0816 -0.3843 -0.6600  0.5008  0.3991\n-0.0036 -0.4467  0.4553  0.6204 -0.4564\n-0.8041  0.4480  0.1725  0.3108  0.1622\n[torch.DoubleTensor of dimension 5x5]\n\n> v*torch.diag(e)*v:t()\n 1.9600 -6.4900 -0.4700 -7.2000 -0.6500\n-6.4900  3.8000 -6.3900  1.5000 -6.3400\n-0.4700 -6.3900  4.1700 -1.5100  2.6700\n-7.2000  1.5000 -1.5100  5.7000  1.8000\n-0.6500 -6.3400  2.6700  1.8000 -7.1000\n[torch.DoubleTensor of dimension 5x5]\n\n> a:dist(torch.triu(v*torch.diag(e)*v:t()))\n1.0219480822443e-14\n```\n\n\n<a name=\"torch.eig\"></a>\n### torch.eig([rese, resv,] a [, 'N' or 'V']) ###\n\n`e, V = torch.eig(A)` returns eigenvalues and eigenvectors of a general real square matrix `A`.\n\n`A` and `V` are `m × m` matrices and `e` is a `m` dimensional vector.\n\nThis function calculates all right eigenvalues (and vectors) of `A` such that `A = V diag(e) V'`.\n\nThird argument defines computation of eigenvectors or eigenvalues only.\nIf it is `'N'`, only eigenvalues are computed.\nIf it is `'V'`, both eigenvalues and eigenvectors are computed.\n\nThe eigen values returned follow [LAPACK convention](https://software.intel.com/sites/products/documentation/hpc/mkl/mklman/GUID-16EB5901-5644-4DA6-A332-A052309010C4.htm) and are returned as complex (real/imaginary) pairs of numbers (`2 * m` dimensional `Tensor`).\n\nNote: Irrespective of the original strides, the returned matrix `V` will be transposed, i.e. with strides `1, m` instead of `m, 1`.\n\n```lua\n> a = torch.Tensor({{ 1.96,  0.00,  0.00,  0.00,  0.00},\n                  {-6.49,  3.80,  0.00,  0.00,  0.00},\n                  {-0.47, -6.39,  4.17,  0.00,  0.00},\n                  {-7.20,  1.50, -1.51,  5.70,  0.00},\n                  {-0.65, -6.34,  2.67,  1.80, -7.10}}):t()\n\n> a\n 1.9600 -6.4900 -0.4700 -7.2000 -0.6500\n 0.0000  3.8000 -6.3900  1.5000 -6.3400\n 0.0000  0.0000  4.1700 -1.5100  2.6700\n 0.0000  0.0000  0.0000  5.7000  1.8000\n 0.0000  0.0000  0.0000  0.0000 -7.1000\n[torch.DoubleTensor of dimension 5x5]\n\n> b = a + torch.triu(a, 1):t()\n> b\n\n  1.9600 -6.4900 -0.4700 -7.2000 -0.6500\n -6.4900  3.8000 -6.3900  1.5000 -6.3400\n -0.4700 -6.3900  4.1700 -1.5100  2.6700\n -7.2000  1.5000 -1.5100  5.7000  1.8000\n -0.6500 -6.3400  2.6700  1.8000 -7.1000\n[torch.DoubleTensor of dimension 5x5]\n\n> e = torch.eig(b)\n> e\n 16.0948   0.0000\n-11.0656   0.0000\n -6.2287   0.0000\n  0.8640   0.0000\n  8.8655   0.0000\n[torch.DoubleTensor of dimension 5x2]\n\n> e, v = torch.eig(b, 'V')\n> e\n 16.0948   0.0000\n-11.0656   0.0000\n -6.2287   0.0000\n  0.8640   0.0000\n  8.8655   0.0000\n[torch.DoubleTensor of dimension 5x2]\n\n> v\n-0.4896  0.2981 -0.6075 -0.4026 -0.3745\n 0.6053  0.5078 -0.2880  0.4066 -0.3572\n-0.3991  0.0816 -0.3843  0.6600  0.5008\n 0.4564  0.0036 -0.4467 -0.4553  0.6204\n-0.1622  0.8041  0.4480 -0.1725  0.3108\n[torch.DoubleTensor of dimension 5x5]\n\n> v * torch.diag(e:select(2, 1))*v:t()\n 1.9600 -6.4900 -0.4700 -7.2000 -0.6500\n-6.4900  3.8000 -6.3900  1.5000 -6.3400\n-0.4700 -6.3900  4.1700 -1.5100  2.6700\n-7.2000  1.5000 -1.5100  5.7000  1.8000\n-0.6500 -6.3400  2.6700  1.8000 -7.1000\n[torch.DoubleTensor of dimension 5x5]\n\n> b:dist(v * torch.diag(e:select(2, 1)) * v:t())\n3.5423944346685e-14\n```\n\n\n<a name=\"torch.svd\"></a>\n### torch.svd([resu, ress, resv,] a [, 'S' or 'A']) ###\n\n`U, S, V = torch.svd(A)` returns the singular value decomposition of a real matrix `A` of size `n × m` such that `A = USV'*`.\n\n`U` is `n × n`, `S` is `n × m` and `V` is `m × m`.\n\nThe last argument, if it is string, represents the number of singular values to be computed.\n`'S'` stands for *some* and `'A'` stands for *all*.\n\nNote: Irrespective of the original strides, the returned matrix `U` will be transposed, i.e. with strides `1, n` instead of `n, 1`.\n\n```lua\n> a = torch.Tensor({{8.79,  6.11, -9.15,  9.57, -3.49,  9.84},\n                  {9.93,  6.91, -7.93,  1.64,  4.02,  0.15},\n                  {9.83,  5.04,  4.86,  8.83,  9.80, -8.99},\n                  {5.45, -0.27,  4.85,  0.74, 10.00, -6.02},\n                  {3.16,  7.98,  3.01,  5.80,  4.27, -5.31}}):t()\n\n> a\n  8.7900   9.9300   9.8300   5.4500   3.1600\n  6.1100   6.9100   5.0400  -0.2700   7.9800\n -9.1500  -7.9300   4.8600   4.8500   3.0100\n  9.5700   1.6400   8.8300   0.7400   5.8000\n -3.4900   4.0200   9.8000  10.0000   4.2700\n  9.8400   0.1500  -8.9900  -6.0200  -5.3100\n\n> u, s, v = torch.svd(a)\n> u\n-0.5911  0.2632  0.3554  0.3143  0.2299\n-0.3976  0.2438 -0.2224 -0.7535 -0.3636\n-0.0335 -0.6003 -0.4508  0.2334 -0.3055\n-0.4297  0.2362 -0.6859  0.3319  0.1649\n-0.4697 -0.3509  0.3874  0.1587 -0.5183\n 0.2934  0.5763 -0.0209  0.3791 -0.6526\n[torch.DoubleTensor of dimension 6x5]\n\n> s\n 27.4687\n 22.6432\n  8.5584\n  5.9857\n  2.0149\n[torch.DoubleTensor of dimension 5]\n\n> v\n-0.2514  0.8148 -0.2606  0.3967 -0.2180\n-0.3968  0.3587  0.7008 -0.4507  0.1402\n-0.6922 -0.2489 -0.2208  0.2513  0.5891\n-0.3662 -0.3686  0.3859  0.4342 -0.6265\n-0.4076 -0.0980 -0.4933 -0.6227 -0.4396\n[torch.DoubleTensor of dimension 5x5]\n\n> u * torch.diag(s) * v:t()\n  8.7900   9.9300   9.8300   5.4500   3.1600\n  6.1100   6.9100   5.0400  -0.2700   7.9800\n -9.1500  -7.9300   4.8600   4.8500   3.0100\n  9.5700   1.6400   8.8300   0.7400   5.8000\n -3.4900   4.0200   9.8000  10.0000   4.2700\n  9.8400   0.1500  -8.9900  -6.0200  -5.3100\n[torch.DoubleTensor of dimension 6x5]\n\n> a:dist(u * torch.diag(s) * v:t())\n2.8923773593204e-14\n```\n\n\n<a name=\"torch.inverse\"></a>\n### torch.inverse([res,] x) ###\n\nComputes the inverse of square matrix `x`.\n\n`torch.inverse(x)` returns the result as a new matrix.\n\n`torch.inverse(y, x)` puts the result in `y`.\n\nNote: Irrespective of the original strides, the returned matrix `y` will be transposed, i.e. with strides `1, m` instead of `m, 1`.\n\n```lua\n> x = torch.rand(10, 10)\n> y = torch.inverse(x)\n> z = x * y\n> z\n 1.0000 -0.0000  0.0000 -0.0000  0.0000  0.0000  0.0000 -0.0000  0.0000  0.0000\n 0.0000  1.0000 -0.0000 -0.0000  0.0000  0.0000 -0.0000 -0.0000 -0.0000  0.0000\n 0.0000 -0.0000  1.0000 -0.0000  0.0000  0.0000 -0.0000 -0.0000  0.0000  0.0000\n 0.0000 -0.0000 -0.0000  1.0000 -0.0000  0.0000  0.0000 -0.0000 -0.0000  0.0000\n 0.0000 -0.0000  0.0000 -0.0000  1.0000  0.0000  0.0000 -0.0000 -0.0000  0.0000\n 0.0000 -0.0000  0.0000 -0.0000  0.0000  1.0000  0.0000 -0.0000 -0.0000  0.0000\n 0.0000 -0.0000  0.0000 -0.0000  0.0000  0.0000  1.0000 -0.0000  0.0000  0.0000\n 0.0000 -0.0000 -0.0000 -0.0000  0.0000  0.0000  0.0000  1.0000  0.0000  0.0000\n 0.0000 -0.0000 -0.0000 -0.0000  0.0000  0.0000 -0.0000 -0.0000  1.0000  0.0000\n 0.0000 -0.0000  0.0000 -0.0000  0.0000  0.0000  0.0000 -0.0000  0.0000  1.0000\n[torch.DoubleTensor of dimension 10x10]\n\n> torch.max(torch.abs(z - torch.eye(10))) -- Max nonzero\n2.3092638912203e-14\n```\n\n\n<a name=\"torch.qr\"></a>\n### torch.qr([q, r], x) ###\n\nCompute a QR decomposition of the matrix `x`: matrices `q` and `r` such that `x = q * r`, with `q` orthogonal and `r` upper triangular.\nThis returns the thin (reduced) QR factorization.\n\n`torch.qr(x)` returns the Q and R components as new matrices.\n\n`torch.qr(q, r, x)` stores them in existing `Tensor`s `q` and `r`.\n\nNote that precision may be lost if the magnitudes of the elements of `x` are large.\n\nNote also that, while it should always give you a valid decomposition, it may not give you the same one across platforms - it will depend on your LAPACK implementation.\n\nNote: Irrespective of the original strides, the returned matrix `q` will be transposed, i.e. with strides `1, m` instead of `m, 1`.\n\n```lua\n> a = torch.Tensor{{12, -51, 4}, {6, 167, -68}, {-4, 24, -41}}\n> a\n  12  -51    4\n   6  167  -68\n  -4   24  -41\n[torch.DoubleTensor of dimension 3x3]\n\n> q, r = torch.qr(a)\n> q\n-0.8571  0.3943  0.3314\n-0.4286 -0.9029 -0.0343\n 0.2857 -0.1714  0.9429\n[torch.DoubleTensor of dimension 3x3]\n\n> r\n -14.0000  -21.0000   14.0000\n   0.0000 -175.0000   70.0000\n   0.0000    0.0000  -35.0000\n[torch.DoubleTensor of dimension 3x3]\n\n> (q * r):round()\n  12  -51    4\n   6  167  -68\n  -4   24  -41\n[torch.DoubleTensor of dimension 3x3]\n\n> (q:t() * q):round()\n 1  0  0\n 0  1  0\n 0  0  1\n[torch.DoubleTensor of dimension 3x3]\n```\n\n\n<a name=\"torch.geqrf\"></a>\n### torch.geqrf([m, tau], a) ###\n\nThis is a low-level function for calling LAPACK directly.\nYou'll generally want to use `torch.qr()` instead.\n\nComputes a QR decomposition of `a`, but without constructing Q and R as explicit separate matrices.\nRather, this directly calls the underlying LAPACK function `?geqrf` which produces a sequence of 'elementary reflectors'.\nSee [LAPACK documentation](https://software.intel.com/en-us/node/521004) for further details.\n\n\n<a name=\"torch.orgqr\"></a>\n### torch.orgqr([q], m, tau) ###\n\nThis is a low-level function for calling LAPACK directly.\nYou'll generally want to use `torch.qr()` instead.\n\nConstructs a Q matrix from a sequence of elementary reflectors, such as that given by `torch.geqrf`.\nSee [LAPACK documentation](https://software.intel.com/en-us/node/521010) for further details.\n\n\n<a name=\"torch.ormqr\"></a>\n### torch.ormqr([res], m, tau, mat [, 'L' or 'R'] [, 'N' or 'T']) ###\n\nMultiply a matrix with `Q` as defined by the elementary reflectors and scalar factors returned by `geqrf`.\nThis is a low-level function for calling LAPACK directly.\nYou'll generally want to use `torch.qr()` instead.\n\n* `side` (`'L'` or `'R'`) specifies whether `mat` should be left-multiplied, `mat * Q`, or right-multiplied, `Q * mat`.\n* `trans` (`'N'` or `'T`') specifies whether `Q` should be transposed before being multiplied.\n\nSee [LAPACK documentation](https://software.intel.com/en-us/node/521011) for further details.\n\n\n<a name=\"torch.logical.dok\"></a>\n## Logical Operations on `Tensor`s ##\n\nThese functions implement logical comparison operators that take a `Tensor` as input and another `Tensor` or a number as the comparison target.\nThey return a `ByteTensor` in which each element is `0` or `1` indicating if the comparison for the corresponding element was `false` or `true` respectively.\n\n\n<a name=\"torch.lt\"></a>\n### torch.lt(a, b) ###\n\nImplements `<` operator comparing each element in `a` with `b` (if `b` is a number) or each element in `a` with corresponding element in `b`.\n\n\n<a name=\"torch.le\"></a>\n### torch.le(a, b) ###\n\nImplements `<=` operator comparing each element in `a` with `b` (if `b` is a number) or each element in `a` with corresponding element in `b`.\n\n\n<a name=\"torch.gt\"></a>\n### torch.gt(a, b) ###\n\nImplements `>` operator comparing each element in `a` with `b` (if `b` is a number) or each element in `a` with corresponding element in `b`.\n\n\n<a name=\"torch.ge\"></a>\n### torch.ge(a, b) ###\n\nImplements `>=` operator comparing each element in `a` with `b` (if `b` is a number) or each element in `a` with corresponding element in `b`.\n\n\n<a name=\"torch.eq\"></a>\n### torch.eq(a, b) ###\n\nImplements `==` operator comparing each element in `a` with `b` (if `b` is a number) or each element in `a` with corresponding element in `b`.\n\n\n<a name=\"torch.ne\"></a>\n### torch.ne(a, b) ###\n\nImplements `~=` operator comparing each element in `a` with `b` (if `b` is a number) or each element in `a` with corresponding element in `b`.\n\n\n<a name=\"torch.all\"></a>\n<a name=\"torch.any\"></a>\n### torch.all(a) ###\n### torch.any(a) ###\n\nAdditionally, `any` and `all` logically sum a `ByteTensor` returning `true` if any or all elements are logically true respectively.\nNote that logically true here is meant in the C sense (zero is `false`, non-zero is `true`) such as the output of the `Tensor` element-wise logical operations.\n\n```lua\n> a = torch.rand(10)\n> b = torch.rand(10)\n> a\n 0.5694\n 0.5264\n 0.3041\n 0.4159\n 0.1677\n 0.7964\n 0.0257\n 0.2093\n 0.6564\n 0.0740\n[torch.DoubleTensor of dimension 10]\n\n> b\n 0.2950\n 0.4867\n 0.9133\n 0.1291\n 0.1811\n 0.3921\n 0.7750\n 0.3259\n 0.2263\n 0.1737\n[torch.DoubleTensor of dimension 10]\n\n> torch.lt(a, b)\n 0\n 0\n 1\n 0\n 1\n 0\n 1\n 1\n 0\n 1\n[torch.ByteTensor of dimension 10]\n\n> torch.eq(a, b)\n0\n0\n0\n0\n0\n0\n0\n0\n0\n0\n[torch.ByteTensor of dimension 10]\n\n> torch.ne(a, b)\n 1\n 1\n 1\n 1\n 1\n 1\n 1\n 1\n 1\n 1\n[torch.ByteTensor of dimension 10]\n\n> torch.gt(a, b)\n 1\n 1\n 0\n 1\n 0\n 1\n 0\n 0\n 1\n 0\n[torch.ByteTensor of dimension 10]\n\n> a[torch.gt(a, b)] = 10\n> a\n 10.0000\n 10.0000\n  0.3041\n 10.0000\n  0.1677\n 10.0000\n  0.0257\n  0.2093\n 10.0000\n  0.0740\n[torch.DoubleTensor of dimension 10]\n\n> a[torch.gt(a, 1)] = -1\n> a\n-1.0000\n-1.0000\n 0.3041\n-1.0000\n 0.1677\n-1.0000\n 0.0257\n 0.2093\n-1.0000\n 0.0740\n[torch.DoubleTensor of dimension 10]\n\n> a = torch.ones(3):byte()\n> torch.all(a)\ntrue\n\n> a[2] = 0\n> torch.all(a)\nfalse\n\n> torch.any(a)\ntrue\n\n> a:zero()\n> torch.any(a)\nfalse\n```\n"
  },
  {
    "path": "doc/memoryfile.md",
    "content": "<a name=\"torch.MemoryFile.dok\"></a>\n# MemoryFile #\n\nParent classes: [File](file.md)\n\nA `MemoryFile` is a particular `File` which is able to perform basic\nread/write operations on a buffer in `RAM`. It implements all methods\ndescribed in [File](file.md).\n\nThe data of the `File` is contained into a `NULL` terminated\n[CharStorage](storage.md).\n\n<a name=\"torch.MemoryFile\"></a>\n### torch.MemoryFile([mode]) ###\n\n_Constructor_ which returns a new `MemoryFile` object using `mode`. Valid\n`mode` are `\"r\"` (read), `\"w\"` (write) or `\"rw\"` (read-write). Default is `\"rw\"`.\n\n\n<a name=\"torch.MemoryFile\"></a>\n### torch.MemoryFile(storage, mode) ###\n\n_Constructor_ which returns a new `MemoryFile` object, using the given\n[storage](storage.md) (which must be a `CharStorage`) and `mode`. Valid\n`mode` are `\"r\"` (read), `\"w\"` (write) or `\"rw\"` (read-write). The last character\nin this storage _must_ be `NULL` or an error will be generated. This allows\nto read existing memory. If used for writing, note that the `storage` might\nbe resized by this class if needed.\n\n<a name=\"torch.MemoryFile.storage\"></a>\n### [CharStorage] storage() ###\n\nReturns the [storage](storage.md) which contains all the data of the\n`File` (note: this is _not_ a copy, but a _reference_ on this storage). The\nsize of the storage is the size of the data in the `File`, plus one, the\nlast character being `NULL`.\n\n<a name=\"torch.MemoryFile.longSize\"/></a>\n### longSize([size]) ###\n\nLongs will be written and read from the file as `size` bytes long, which\ncan be 0, 4 or 8. 0 means system default.\n"
  },
  {
    "path": "doc/pipefile.md",
    "content": "<a name=\"torch.PipeFile.dok\"></a>\n# PipeFile #\n\nParent classes: [DiskFile](diskfile.md)\n\nA `PipeFile` is a particular `File` which is able to perform basic read/write operations\non a command pipe. It implements all methods described in [DiskFile](diskfile.md) and [File](file.md).\n\nThe file might be open in read or write mode, depending on the parameter\n`mode` (which can take the value `\"r\"` or `\"w\"`) \ngiven to the [torch.PipeFile(fileName, mode)](#torch.PipeFile). Read-write mode is not allowed.\n\n<a name=\"torch.PipeFile\"></a>\n### torch.PipeFile(command, [mode], [quiet]) ###\n\n_Constructor_ which executes `command` by opening a pipe in read or write\n`mode`. Valid `mode`s are `\"r\"` (read) or `\"w\"` (write). Default is read\nmode.\n\nIf (and only if) `quiet` is `true`, no error will be raised in case of\nproblem opening the file: instead `nil` will be returned.\n\n"
  },
  {
    "path": "doc/random.md",
    "content": "<a name=\"torch.random.dok\"></a>\n# Random Numbers #\n\nTorch provides accurate mathematical random generation, based on\n[Mersenne Twister](http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html)\nrandom number generator.\n\n<a name=\":torch.gen.dok\"></a>\n## Generator handling ##\n\nAll of the below functions, as well as [randn()](maths.md#torch.randn),\n[rand()](maths.md#torch.rand) and [randperm()](maths.md#torch.randperm),\ntake as optional first argument a random number generator.\nIf this argument is not provided, the default global RNG is used.\n\nA non-global RNG can be obtained with [Generator()](#torch.Generator).\nEach RNG has its own state, independent from all other RNG's states.\n\n```\n-- Seed the global RNG\n> torch.manualSeed(0)\n> torch.random()\n2357136044\n-- Creates and seed a non-global RNG\n> gen = torch.Generator()\n> torch.manualSeed(gen, 0)\n> torch.random(gen)\n2357136044\n> torch.random(gen)\n2546248239\n> torch.random()\n2546248239\n```\n\n<a name=\":torch.seed.dok\"></a>\n## Seed Handling ##\n\nThe random number generator is provided with a random seed via\n[seed()](#torch.seed) when torch is being initialized. It can be\nreinitialized using [seed()](#torch.seed) or [manualSeed()](#torch.manualSeed).\n\nInitial seed can be obtained using [initialSeed()](#torch.initialSeed).\n\nSetting a particular seed allows the user to (re)-generate a particular sequence\nof random numbers. Example:\n\n```\n> torch.manualSeed(123)\n> = torch.uniform()\n0.69646918727085\n> return  torch.uniform()\n0.71295532141812\n> return  torch.uniform()\n0.28613933874294\n> torch.manualSeed(123)\n> return  torch.uniform()\n0.69646918727085\n> return  torch.uniform()\n0.71295532141812\n> return  torch.uniform()\n0.28613933874294\n> torch.manualSeed(torch.initialSeed())\n> return  torch.uniform()\n0.69646918727085\n> return  torch.uniform()\n0.71295532141812\n> return  torch.uniform()\n0.28613933874294\n```\n\nTo regenerate a sequence of random numbers starting from a specific point\nin the sequence, one can save the state of the random number generator\nusing [getRNGState()](#torch.getRNGState) and then reset the random number\ngenerator to that state using [setRNGState()](#torch.setRNGState). Example:\n\n```\n> torch.manualSeed(123)\n> = torch.uniform()\n0.69646918727085\n> s = torch.getRNGState()\n> return  torch.uniform()\n0.71295532141812\n> return  torch.uniform()\n0.28613933874294\n> torch.setRNGState(s)\n> return  torch.uniform()\n0.71295532141812\n> return  torch.uniform()\n0.28613933874294\n```\n\n<a name=\"torch.Generator\"></a>\n### [Generator] Generator() ###\n\nCreates a non-global random generator that carries its own state and can be\npassed as the first argument to any function that generates a random number.\n\n<a name=\"torch.seed\"></a>\n### [number] seed([gen,]) ###\n\nSet the seed of the random number generator using `/dev/urandom`\n(on Windows the time of the computer with granularity of seconds is used).\nReturns the seed obtained.\n\n<a name=\"torch.manualSeed\"></a>\n### manualSeed([gen,] number) ###\n\nSet the seed of the random number generator to the given `number`.\n\n<a name=\"torch.initialSeed\"></a>\n### initialSeed([gen]) ###\n\nReturns the initial seed used to initialize the random generator.\n\n<a name=\"torch.getRNGState\"></a>\n### [Tensor] getRNGState([gen]) ###\nReturns the current state of the random number generator as a torch.ByteTensor.\nThis can then be used to set the state of the RNG so that the same sequence of\nrandom numbers is produced.\n\n<a name=\"torch.setRNGState\"></a>\n### [Tensor] setRNGState([gen,] state) ###\nSets the state of the random number generator. If `state` was obtained earlier\nusing `getRNGState` then the random number generator should now generate the\nsame numbers as it did from the point where `state` was obtained. This function\nreturns its argument `state`.\n\n<a name=\"torch.random\"></a>\n### [number] random([gen,] [a], [b]) ###\n\nReturns an unsigned 32 bit integer random number from `[a,b]`. By default `a` is `1` and `b` is `2^32`.\n\n<a name=\"torch.uniform\"></a>\n### [number] uniform([gen,] [a],[b]) ###\n\nReturns a random real number according to uniform distribution on `[a,b)`. By default `a` is `0` and `b` is `1`.\n\n<a name=\"torch.normal\"></a>\n### [number] normal([gen,] [mean],[stdv]) ###\n\nReturns a random real number according to a normal distribution with the given `mean` and standard deviation `stdv`.\n`stdv` must be positive.\n\n<a name=\"torch.exponential\"></a>\n### [number] exponential([gen,] lambda) ###\n\nReturns a random real number according to the exponential distribution\n`p(x) = lambda * exp(-lambda * x)`\n\n<a name=\"torch.cauchy\"></a>\n### [number] cauchy([gen,] median, sigma) ###\n\nReturns a random real number according to the Cauchy distribution\n`p(x) = sigma/(pi*(sigma^2 + (x-median)^2))`\n\n<a name=\"torch.logNormal\"></a>\n### [number] logNormal([gen,] mean, stdv) ###\n\nReturns a random real number according to the log-normal distribution, with\nthe given `mean` and standard deviation `stdv`.\n`mean` and `stdv` are the corresponding mean and standard deviation of the underlying normal distribution, \nand not of the returned distribution.\n\n`stdv` must be positive.\n\n<a name=\"torch.geometric\"></a>\n### [number] geometric([gen,] p) ###\n\nReturns a random integer number according to a geometric distribution\n`p(i) = (1-p) * p^(i-1)`. `p` must satisfy `0 < p < 1`.\n\n<a name=\"torch.bernoulli\"></a>\n### [number] bernoulli([gen,] [p]) ###\n\nReturns `1` with probability `p` and `0` with probability `1-p`. `p` must satisfy `0 <= p <= 1`.\nBy default `p` is equal to `0.5`.\n"
  },
  {
    "path": "doc/serialization.md",
    "content": "\n<a name=\"torch.serialization.dok\"></a>\n# Serialization #\n\nTorch provides 4 high-level methods to serialize/deserialize arbitrary Lua/Torch objects.\nThese functions are just abstractions over the [File](file.md) object, and were created\nfor convenience (these are very common routines).\n\nThe first two functions are useful to serialize/deserialize data to/from files:\n\n  - `torch.save(filename, object [, format, referenced])`\n  - `[object] torch.load(filename [, format, referenced])`\n\nThe next two functions are useful to serialize/deserialize data to/from strings:\n\n  - `[str] torch.serialize(object)`\n  - `[object] torch.deserialize(str)`\n\nSerializing to files is useful to save arbitrary data structures, or share them with other people.\nSerializing to strings is useful to store arbitrary data structures in databases, or 3rd party\nsoftware.\n\n<a name=\"torch.save\"></a>\n### torch.save(filename, object [, format, referenced]) ###\n\nWrites `object` into a file named `filename`. The `format` can be set to\n`ascii` or `binary` (default is binary). Binary format is platform\ndependent, but typically more compact and faster to read/write. The ASCII\nformat is platform-independent, and should be used to share data structures\nacross platforms. The option `referenced` specifies if\n[object references](file.md#torch.File.referenced) should be tracked or not\n(`true` by default).\n\n```\n-- arbitrary object:\nobj = {\n   mat = torch.randn(10,10),\n   name = '10',\n   test = {\n      entry = 1\n   }\n}\n\n-- save to disk:\ntorch.save('test.dat', obj)\n```\n\n<a name=\"torch.load\"></a>\n### [object] torch.load(filename [, format, referenced]) ###\n\nReads `object` from a file named `filename`.\nThe `format` can be set to `ascii`, `binary`, `b32` or `b64` (default is binary).\nBinary format is platform dependent, but typically more compact and faster to read/write.\nUse `b32`/`b64`, instead of `binary`, for loading files saved on a 32/64 bit OS.\nThe ASCII format is platform-independent, and may be used to share data structures across platforms.\nThe option `referenced` specifies if [object references](file.md#torch.File.referenced) should be tracked or not (`true` by default).\nNote that files written with `referenced` at `true` cannot be loaded with `referenced` at `false`.\n\n```\n-- given serialized object from section above, reload:\nobj = torch.load('test.dat')\n\nprint(obj)\n-- will print:\n-- {[mat]  = DoubleTensor - size: 10x10\n--  [name] = string : \"10\"\n--  [test] = table - size: 0}\n```\n\n<a name=\"torch.serialize\"></a>\n### [str] torch.serialize(object [, format]) ###\n\nSerializes `object` into a string. The `format` can be set\nto `ascii` or `binary` (default is binary). Binary format is platform\ndependent, but typically more compact and faster to read/write. The ASCII\nformat is platform-independent, and should be used to share data structures\nacross platforms.\n\n```\n-- arbitrary object:\nobj = {\n   mat = torch.randn(10,10),\n   name = '10',\n   test = {\n      entry = 1\n   }\n}\n\n-- serialize:\nstr = torch.serialize(obj)\n```\n\n<a name=\"torch.deserialize\"></a>\n### [object] torch.deserialize(str [, format]) ###\n\nDeserializes `object` from a string. The `format` can be set\nto `ascii` or `binary` (default is binary). Binary format is platform\ndependent, but typically more compact and faster to read/write. The ASCII\nformat is platform-independent, and should be used to share data structures\nacross platforms.\n\n```\n-- given serialized object from section above, deserialize:\nobj = torch.deserialize(str)\n\nprint(obj)\n-- will print:\n-- {[mat]  = DoubleTensor - size: 10x10\n--  [name] = string : \"10\"\n--  [test] = table - size: 0}\n```\n\n"
  },
  {
    "path": "doc/storage.md",
    "content": "<a name=\"torch.Storage.dok\"></a>\n# Storage #\n<a name=\"torch.CharStorage.dok\"></a>\n<a name=\"torch.ByteStorage.dok\"></a>\n<a name=\"torch.IntStorage.dok\"></a>\n<a name=\"torch.ShortStorage.dok\"></a>\n<a name=\"torch.FloatStorage.dok\"></a>\n<a name=\"torch.LongStorage.dok\"></a>\n<a name=\"torch.DoubleStorage.dok\"></a>\n\n_Storages_ are basically a way for `Lua` to access memory of a `C` pointer\nor array. _Storages_ can also [map the contents of a file to memory](#__torch.StorageMap).\nA `Storage` is an array of _basic_ `C` types. For arrays of `Torch` objects,\nuse the `Lua` tables.\n\nSeveral `Storage` classes for all the basic `C` types exist and have the\nfollowing self-explanatory names: `ByteStorage`, `CharStorage`, `ShortStorage`,\n`IntStorage`, `LongStorage`, `FloatStorage`, `DoubleStorage`.\n\nNote that `ByteStorage` and `CharStorage` represent both arrays of bytes. `ByteStorage` represents an array of\n_unsigned_ chars, while `CharStorage` represents an array of _signed_ chars.\n\nConversions between two `Storage` type might be done using `copy`:\n```lua\nx = torch.IntStorage(10):fill(1)\ny = torch.DoubleStorage(10):copy(x)\n```\n\n[Classical storages](#torch.Storage) are [serializable](file.md#torch.File.serialization).\n[Storages mapping a file](#__torch.StorageMap) are also [serializable](file.md#torch.File.serialization),\nbut _will be saved as a normal storage_. High-level serialization commands are described in the\n[serialization](serialization.md) section.\n\nAn alias `torch.Storage()` is made over your preferred Storage type,\ncontrolled by the\n[torch.setdefaulttensortype](utility.md#torch.setdefaulttensortype)\nfunction. By default, this \"points\" on `torch.DoubleStorage`.\n\n## Constructors and Access Methods ##\n\n<a name=\"torch.Storage\"></a>\n### torch.TYPEStorage([size [, ptr]]) ###\n\nReturns a new `Storage` of type `TYPE`. Valid `TYPE` are `Byte`, `Char`, `Short`,\n`Int`, `Long`, `Float`, and `Double`. If `size` is given, resize the\n`Storage` accordingly, else create an empty `Storage`.\n\nExample:\n```lua\n-- Creates a Storage of 10 double:\nx = torch.DoubleStorage(10)\n```\n\nThe data in the `Storage` is _uninitialized_.\n\nThe optional second argument `ptr` is a number whose value is a\npointer to a memory chunk of size `size*sizeof(TYPE)` (for example coming from the\n[`torch.data()`](tensor.md#result-datatensor-asnumber)\nmethod). The caller remains responsible of the memory chunk and must ensure it remains stable as the storage only keeps a pointer to it (the memory is _not_ copied and will _not_ be freed at storage deletion).\n\n<a name=\"torch.Storage\"></a>\n### torch.TYPEStorage(table) ###\n\n`table` is assumed to be a Lua array of numbers. The constructor returns a new storage of the specified `TYPE`,\nof the size of the table, containing all the table elements converted\n\nExample:\n```lua\n> = torch.IntStorage({1,2,3,4})\n\n 1\n 2\n 3\n 4\n[torch.IntStorage of size 4]\n```\n\n<a name=\"torch.Storage\"></a>\n### torch.TYPEStorage(storage [, offset [, size]]) ###\n\nReturns a new `Storage` of type `TYPE`, which is a view on the first argument. The first argument must be of the same type `TYPE`. An optional `offset` can be provided (defaults to 1). An optional `size` can also be provided to restrict the size of the new storage (defaults to `storage:size()-(offset-1)`).\n\nExample:\n```lua\n-- Creates a Storage of 10 double:\n> x = torch.DoubleStorage(10)\n\n-- Creates a view on this Storage, starting at offset 3, with a size of 5:\n> y = torch.DoubleStorage(x, 3, 5)\n\n-- Modifying elements of y will modify x:\n> x:fill(0)\n> y:fill(1)\n> print(x)\n 0\n 0\n 1\n 1\n 1\n 1\n 1\n 0\n 0\n 0\n[torch.DoubleStorage of size 10]\n```\n\n<a name=\"torch.Storage\"></a>\n### torch.TYPEStorage(filename [, shared [, size [, sharedMem]]]) ###\n<a name=\"__torch.StorageMap\"></a>\n\nReturns a new kind of `Storage` which maps the contents of the given\n`filename` to memory. Valid `TYPE` are `Byte`, `Char`, `Short`, `Int`, `Long`,\n`Float`, and `Double`. If the optional boolean argument `shared` is `true`,\nthe mapped memory is shared amongst all processes on the computer.\n\nWhen `shared` is `true`, the file must be accessible in read-write mode. Any\nchanges on the storage will be written in the file. The changes might be written\nonly after destruction of the storage.\n\nWhen `shared` is `false` (or not provided), the file must be at least\nreadable. Any changes on the storage will not affect the file. Note:\nchanges made on the file after creation of the storage have an unspecified\neffect on the storage contents.\n\nIf `size` is specified, it is the [size](#torch.Storage.size) of the returned\n`Storage` (in elements). In this case, if `shared` is `false` then the file must\nalready contain at least\n```lua\nsize*(size of TYPE)\n```\nbytes. If `shared` is `true` then the file will be created if necessary, and\nextended if necessary to that many bytes in length.\n\nIf `size` is not specified then the [size](#torch.Storage.size) of the returned\n`Storage`  will be\n```lua\n(size of file in byte)/(size of TYPE)\n```\nelements provided a non empty file already exists.\n\nIf `sharedMem` is true then, the file will be created (or mapped) from the shared\nmemory area using [`shm_open()`](http://linux.die.net/man/3/shm_open). On Linux systems\nthis is implemented at `/dev/shm` partition on RAM for interprocess communication.\n\n\nExample:\n```lua\n$ echo \"Hello World\" > hello.txt\n$ lua\nLua 5.1.3  Copyright (C) 1994-2008 Lua.org, PUC-Rio\n> require 'torch'\n> x = torch.CharStorage('hello.txt')\n> = x\n  72\n 101\n 108\n 108\n 111\n  32\n  87\n 111\n 114\n 108\n 100\n  10\n[torch.CharStorage of size 12]\n\n> = x:string()\nHello World\n\n> = x:fill(42):string()\n************\n>\n$ cat hello.txt\nHello World\n$ lua\nLua 5.1.3  Copyright (C) 1994-2008 Lua.org, PUC-Rio\n> require 'torch'\n> x = torch.CharStorage('hello.txt', true)\n> = x:string()\nHello World\n\n> x:fill(42)\n>\n$ cat hello.txt\n************\n```\n\n<a name=\"__torch.StorageSharp\"></a>\n### [number] #self ###\n\nReturns the number of elements in the storage. Equivalent to [size()](#torch.Storage.size).\n\n<a name=\"torch.Storage.__index__\"></a>\n### [number] self[index] ###\n\nReturns or set the element at position `index` in the storage. Valid range\nof `index` is 1 to [size()](#torch.Storage.size).\n\nExample:\n```lua\nx = torch.DoubleStorage(10)\nprint(x[5])\n```\n\n<a name=\"torch.Storage.copy\"></a>\n### [self] copy(storage) ###\n\nCopy another `storage`. The types of the two storages might be different: in that case\na conversion of types occur (which might result, of course, in loss of precision or rounding).\nThis method returns self, allowing things like:\n```lua\nx = torch.IntStorage(10):fill(1)\ny = torch.DoubleStorage(10):copy(x) -- y won't be nil!\n```\n\n<a name=\"torch.Storage.fill\"></a>\n### [self] fill(value) ###\n\nFill the `Storage` with the given value. This method returns self, allowing things like:\n```lua\nx = torch.IntStorage(10):fill(0) -- x won't be nil!\n```\n\n<a name=\"torch.Storage.resize\"></a>\n### [self] resize(size) ###\n\nResize the storage to the provided `size`. _The new contents are undetermined_.\n\nThis function returns self, allowing things like:\n```lua\nx = torch.DoubleStorage(10):fill(1)\ny = torch.DoubleStorage():resize(x:size()):copy(x) -- y won't be nil!\n```\n\n<a name=\"torch.Storage.size\"></a>\n### [number] size() ###\n\nReturns the number of elements in the storage. Equivalent to [#](#__torch.StorageSharp).\n\n<a name=\"torch.Storage.string\"></a>\n### [self] string(str) ###\n\nThis function is available only on `ByteStorage` and `CharStorage`.\n\nThis method resizes the storage to the length of the provided\nstring `str`, and copy the contents of `str` into the storage. The `NULL` terminating character is not copied,\nbut `str` might contain `NULL` characters. The method returns the `Storage`.\n```lua\n> x = torch.CharStorage():string(\"blah blah\")\n> print(x)\n  98\n 108\n  97\n 104\n  32\n  98\n 108\n  97\n 104\n[torch.CharStorage of size 9]\n```\n\n<a name=\"torch.Storage.string\"></a>\n### [string] string() ###\n\nThis function is available only on `ByteStorage` and `CharStorage`.\n\nThe contents of the storage viewed as a string are returned. The string might contain\n`NULL` characters.\n```lua\n> x = torch.CharStorage():string(\"blah blah\")\n> print(x:string())\nblah blah\n```\n\n## Reference counting methods ##\n\nStorages are reference-counted. It means that each time an object (C or the\nLua state) need to keep a reference over a storage, the corresponding\nstorage reference counter will be [increased](#torch.Storage.retain). The\nreference counter is [decreased]((#torch.Storage.free)) when the object\ndoes not need the storage anymore.\n\nThese methods should be used with extreme care. In general, they should\nnever be called, except if you know what you are doing, as the handling of\nreferences is done automatically. They can be useful in threaded\nenvironments. Note that these methods are atomic operations.\n\n<a name=\"torch.Storage.retain\"></a>\n### retain() ###\n\nIncrement the reference counter of the storage.\n\n<a name=\"torch.Storage.free\"></a>\n### free() ###\n\nDecrement the reference counter of the storage. Free the storage if the\ncounter is at 0.\n"
  },
  {
    "path": "doc/tensor.md",
    "content": "<a name=\"torch.Tensor.dok\"></a>\n# Tensor #\n\nThe `Tensor` class is probably the most important class in\n`Torch`. Almost every package depends on this class. It is *__the__*\nclass for handling numeric data. As with   pretty much anything in\n[Torch7](./index.md), tensors are\n[serializable](file.md#torch.File.serialization).\n\n__Multi-dimensional matrix__\n\nA `Tensor` is a multi-dimensional matrix. The number of\ndimensions is unlimited (up to what can be created using\n[LongStorage](storage.md)).\n\nExample:\n```lua\n --- creation of a 4D-tensor 4x5x6x2\n z = torch.Tensor(4,5,6,2)\n --- for more dimensions, (here a 6D tensor) one can do:\n s = torch.LongStorage(6)\n s[1] = 4; s[2] = 5; s[3] = 6; s[4] = 2; s[5] = 7; s[6] = 3;\n x = torch.Tensor(s)\n```\n\nThe number of dimensions of a `Tensor` can be queried by\n[nDimension()](#torch.nDimension) or\n[dim()](#torch.Tensor.dim). Size of the `i-th` dimension is\nreturned by [size(i)](#torch.Tensor.size). A [LongStorage](storage.md)\ncontaining all the dimensions can be returned by\n[size()](#torch.Tensor.size).\n\n```lua\n> x:nDimension()\n6\n> x:size()\n 4\n 5\n 6\n 2\n 7\n 3\n[torch.LongStorage of size 6]\n```\n\n__Internal data representation__\n\nThe actual data of a `Tensor` is contained into a\n[Storage](storage.md). It can be accessed using\n[`storage()`](#torch.storage). While the memory of a\n`Tensor` has to be contained in this unique `Storage`, it might\nnot be contiguous: the first position used in the `Storage` is given\nby [`storageOffset()`](#torch.storageOffset) (starting at\n`1`). And the _jump_ needed to go from one element to another\nelement in the `i-th` dimension is given by\n[`stride(i)`](#torch.Tensor.stride). In other words, given a 3D\ntensor\n\n```lua\nx = torch.Tensor(7,7,7)\n```\naccessing the element `(3,4,5)` can be done by\n```lua\n> x[3][4][5]\n```\nor equivalently (but slowly!)\n```lua\n> x:storage()[x:storageOffset()\n              +(3-1)*x:stride(1)+(4-1)*x:stride(2)+(5-1)*x:stride(3)]\n```\nOne could say that a `Tensor` is a particular way of _viewing_ a\n`Storage`: a `Storage` only represents a chunk of memory, while the\n`Tensor` interprets this chunk of memory as having dimensions:\n```lua\nx = torch.Tensor(4,5)\ns = x:storage()\nfor i=1,s:size() do -- fill up the Storage\n  s[i] = i\nend\n> x -- s is interpreted by x as a 2D matrix\n  1   2   3   4   5\n  6   7   8   9  10\n 11  12  13  14  15\n 16  17  18  19  20\n[torch.DoubleTensor of dimension 4x5]\n```\n\nNote also that in Torch7 ___elements in the same row___ [elements along the __last__ dimension]\nare contiguous in memory for a matrix [tensor]:\n```lua\nx = torch.Tensor(4,5)\ni = 0\n\nx:apply(function()\n  i = i + 1\n  return i\nend)\n\n> x\n  1   2   3   4   5\n  6   7   8   9  10\n 11  12  13  14  15\n 16  17  18  19  20\n[torch.DoubleTensor of dimension 4x5]\n\n> x:stride()\n 5\n 1  -- element in the last dimension are contiguous!\n[torch.LongStorage of size 2]\n```\nThis is exactly like in C (and not `Fortran`).\n\n__Tensors of different types__\n\nActually, several types of `Tensor` exists:\n```lua\nByteTensor -- contains unsigned chars\nCharTensor -- contains signed chars\nShortTensor -- contains shorts\nIntTensor -- contains ints\nLongTensor -- contains longs\nFloatTensor -- contains floats\nDoubleTensor -- contains doubles\n```\n\nMost numeric operations are implemented _only_ for `FloatTensor` and `DoubleTensor`.\nOther Tensor types are useful if you want to save memory space.\n\n__Default Tensor type__\n\nFor convenience, _an alias_ `torch.Tensor` is provided, which allows the user to write\ntype-independent scripts, which can then ran after choosing the desired Tensor type with\na call like\n```lua\ntorch.setdefaulttensortype('torch.FloatTensor')\n```\nSee [torch.setdefaulttensortype](utility.md#torch.setdefaulttensortype) for more details.\nBy default, the alias \"points\" on `torch.DoubleTensor`.\n\n__Efficient memory management__\n\n_All_ tensor operations in this class do _not_ make any memory copy. All\nthese methods transform the existing tensor, or return a new tensor\nreferencing _the same storage_. This magical behavior is internally\nobtained by good usage of the [stride()](#torch.Tensor.stride) and\n[storageOffset()](#torch.storageOffset). Example:\n```lua\nx = torch.Tensor(5):zero()\n> x\n0\n0\n0\n0\n0\n[torch.DoubleTensor of dimension 5]\n> x:narrow(1, 2, 3):fill(1) -- narrow() returns a Tensor\n                            -- referencing the same Storage as x\n> x\n 0\n 1\n 1\n 1\n 0\n[torch.Tensor of dimension 5]\n```\n\nIf you really need to copy a `Tensor`, you can use the [copy()](#torch.Tensor.copy) method:\n```lua\ny = torch.Tensor(x:size()):copy(x)\n```\nOr the convenience method\n```lua\ny = x:clone()\n```\n\nWe now describe all the methods for `Tensor`. If you want to specify the Tensor type,\njust replace `Tensor` by the name of the Tensor variant (like `CharTensor`).\n\n<a name=\"torch.Tensor\"></a>\n## Tensor constructors ##\n\nTensor constructors, create new Tensor object, optionally, allocating\nnew memory. By default the elements of a newly allocated memory are\nnot initialized, therefore, might contain arbitrary numbers. Here are\nseveral ways to construct a new `Tensor`.\n\n<a name=\"torch.Tensor\"></a>\n### torch.Tensor() ###\n\nReturns an empty tensor.\n\n<a name=\"torch.Tensor\"></a>\n### torch.Tensor(tensor) ###\n\nReturns a new tensor which reference the same\n[Storage](#torch.storage) than the given `tensor`. The\n[size](#torch.Tensor.size), [stride](#torch.Tensor.stride), and\n[storage offset](#torch.storageOffset) are the same than the\ngiven tensor.\n\nThe new `Tensor` is now going to \"view\" the same [storage](storage.md)\nas the given `tensor`. As a result, any modification in the elements\nof the `Tensor` will have a impact on the elements of the given\n`tensor`, and vice-versa. No memory copy!\n\n```lua\nx = torch.Tensor(2,5):fill(3.14)\n> x\n 3.1400  3.1400  3.1400  3.1400  3.1400\n 3.1400  3.1400  3.1400  3.1400  3.1400\n[torch.DoubleTensor of dimension 2x5]\n\ny = torch.Tensor(x)\n> y\n 3.1400  3.1400  3.1400  3.1400  3.1400\n 3.1400  3.1400  3.1400  3.1400  3.1400\n[torch.DoubleTensor of dimension 2x5]\n\ny:zero()\n> x -- elements of x are the same as y!\n0 0 0 0 0\n0 0 0 0 0\n[torch.DoubleTensor of dimension 2x5]\n```\n\n\n<a name=\"torch.Tensor\"></a>\n### torch.Tensor(sz1 [,sz2 [,sz3 [,sz4]]]]) ###\n\nCreate a tensor up to 4 dimensions. The tensor size will be `sz1 x sz2 x sx3 x sz4`.\n\n<a name=\"torch.Tensor\"></a>\n### torch.Tensor(sizes, [strides]) ###\n\nCreate a tensor of any number of dimensions. The\n[LongStorage](storage.md) `sizes` gives the size in each dimension of\nthe tensor. The optional [LongStorage](storage.md) `strides` gives the\njump necessary to go from one element to the next one in the each\ndimension. Of course, `sizes` and `strides` must have the same\nnumber of elements. If not given, or if some elements of `strides`\nare _negative_, the [stride()](#torch.Tensor.stride) will be\ncomputed such that the tensor is as contiguous as possible in memory.\n\nExample, create a 4D 4x4x3x2 tensor:\n```lua\nx = torch.Tensor(torch.LongStorage({4,4,3,2}))\n```\n\nPlaying with the strides can give some interesting things:\n```lua\nx = torch.Tensor(torch.LongStorage({4}), torch.LongStorage({0})):zero() -- zeroes the tensor\nx[1] = 1 -- all elements point to the same address!\n> x\n 1\n 1\n 1\n 1\n[torch.DoubleTensor of dimension 4]\n```\n\nNote that _negative strides are not allowed_, and, if given as\nargument when constructing the Tensor, will be interpreted as //choose\nthe right stride such that the Tensor is contiguous in memory//.\n\nNote _this method cannot be used to create `torch.LongTensor`s_.\nThe constructor [from a storage](tensor.md#torchtensorstorage-storageoffset-sizes-strides) will be used:\n```lua\na = torch.LongStorage({1,2}) -- We have a torch.LongStorage containing the values 1 and 2\n-- General case for TYPE ~= Long, e.g. for TYPE = Float:\nb = torch.FloatTensor(a)\n-- Creates a new torch.FloatTensor with 2 dimensions, the first of size 1 and the second of size 2\n> b:size()\n 1\n 2\n[torch.LongStorage of size 2]\n\n-- Special case of torch.LongTensor\nc = torch.LongTensor(a)\n-- Creates a new torch.LongTensor that uses a as storage and thus contains the values 1 and 2\n> c\n 1\n 2\n[torch.LongTensor of size 2]\n```\n\n<a name=\"torch.Tensor\"></a>\n### torch.Tensor(storage, [storageOffset, sizes, [strides]]) ###\n\nReturns a tensor which uses the existing [Storage](storage.md)\n`storage`, starting at position `storageOffset` (>=1).  The size\nof each dimension of the tensor is given by the\n[LongStorage](storage.md) `sizes`.\n\nIf only `storage` is provided, it will create a 1D Tensor viewing\nthe all Storage.\n\nThe jump necessary to go from one element to the next one in each\ndimension is given by the optional argument [LongStorage](storage.md)\n`strides`. If not given, or if some elements of `strides` are\nnegative, the [stride()](#torch.Tensor.stride) will be computed such\nthat the tensor is as contiguous as possible in memory.\n\nAny modification in the elements of the `Storage` will have an\nimpact on the elements of the new `Tensor`, and vice-versa. There is\nno memory copy!\n\n```lua\n-- creates a storage with 10 elements\ns = torch.Storage(10):fill(1)\n\n-- we want to see it as a 2x5 tensor\nx = torch.Tensor(s, 1, torch.LongStorage{2,5})\n> x\n 1  1  1  1  1\n 1  1  1  1  1\n[torch.DoubleTensor of dimension 2x5]\n\nx:zero()\n> s -- the storage contents have been modified\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n[torch.DoubleStorage of size 10]\n```\n\n<a name=\"torch.Tensor\"></a>\n### torch.Tensor(storage, [storageOffset, sz1 [, st1 ... [, sz4 [, st4]]]]) ###\n\nConvenience constructor (for the previous constructor) assuming a\nnumber of dimensions inferior or equal to 4. `szi` is the size in\nthe `i-th` dimension, and `sti` is the stride in the `i-th`\ndimension.\n\n<a name=\"torch.Tensor\"></a>\n### torch.Tensor(table) ###\n\nThe argument is assumed to be a Lua array of numbers. The constructor\nreturns a new Tensor of the size of the table, containing all the table\nelements. The table might be multi-dimensional.\n\nExample:\n```lua\n> torch.Tensor({{1,2,3,4}, {5,6,7,8}})\n 1  2  3  4\n 5  6  7  8\n[torch.DoubleTensor of dimension 2x4]\n```\n\n## A note on function calls ##\n\nThe rest of this guide will present many functions that can be used to manipulate tensors. Most functions have been\ndefined so that they can be called flexibly, either in an object-oriented \"method call\" style i.e. `src:function(...)`\nor a more \"functional\" style `torch.function(src, ...)`, where `src` is a tensor. Note that these different invocations\nmay differ in whether they modify the tensor in-place, or create a new tensor. Additionally, some functions can be\ncalled in the form `dst:function(src, ...)` which usually suggests that the result of the operation on the `src` tensor\nwill be stored in the tensor `dst`.  Further details are given in the individual function definitions, below, but it\nshould be noted that the documentation is currently incomplete in this regard, and readers are encouraged to experiment\nin an interactive session.\n\n## Cloning ##\n\n<a name=\"torch.Tensor.clone\"></a>\n### [Tensor] clone() ###\n\nReturns a clone of a tensor. The memory is copied.\n\n```lua\ni = 0\nx = torch.Tensor(5):apply(function(x)\n  i = i + 1\n  return i\nend)\n> x\n 1\n 2\n 3\n 4\n 5\n[torch.DoubleTensor of dimension 5]\n\n-- create a clone of x\ny = x:clone()\n> y\n 1\n 2\n 3\n 4\n 5\n[torch.DoubleTensor of dimension 5]\n\n-- fill up y with 1\ny:fill(1)\n> y\n 1\n 1\n 1\n 1\n 1\n[torch.DoubleTensor of dimension 5]\n\n-- the contents of x were not changed:\n> x\n 1\n 2\n 3\n 4\n 5\n[torch.DoubleTensor of dimension 5]\n```\n\n<a name=\"torch.Tensor.contiguous\"></a>\n### [Tensor] contiguous ###\n\n  * If the given Tensor contents are contiguous in memory, returns the exact same Tensor (no memory copy).\n  * Otherwise (_not contiguous in memory_), returns a [clone](#torch.Tensor.clone) (memory _copy_).\n\n```lua\nx = torch.Tensor(2,3):fill(1)\n> x\n 1  1  1\n 1  1  1\n[torch.DoubleTensor of dimension 2x3]\n\n-- x is contiguous, so y points to the same thing\ny = x:contiguous():fill(2)\n> y\n 2  2  2\n 2  2  2\n[torch.DoubleTensor of dimension 2x3]\n\n-- contents of x have been changed\n> x\n 2  2  2\n 2  2  2\n[torch.DoubleTensor of dimension 2x3]\n\n-- x:t() is not contiguous, so z is a clone\nz = x:t():contiguous():fill(3.14)\n> z\n 3.1400  3.1400\n 3.1400  3.1400\n 3.1400  3.1400\n[torch.DoubleTensor of dimension 3x2]\n\n-- contents of x have not been changed\n> x\n 2  2  2\n 2  2  2\n[torch.DoubleTensor of dimension 2x3]\n```\n\n<a name=\"torch.type\"></a>\n### [Tensor or string] type(type) ###\n\n__If `type` is `nil`__, returns a string containing the type name of\n  the given tensor.\n\n```lua\n= torch.Tensor():type()\ntorch.DoubleTensor\n```\n\n__If `type` is a string__ describing a Tensor type, and is equal to\nthe given tensor typename, returns the exact same tensor (//no memory\ncopy//).\n\n```lua\nx = torch.Tensor(3):fill(3.14)\n> x\n 3.1400\n 3.1400\n 3.1400\n[torch.DoubleTensor of dimension 3]\n\ny = x:type('torch.DoubleTensor')\n> y\n 3.1400\n 3.1400\n 3.1400\n[torch.DoubleTensor of dimension 3]\n\n-- zero y contents\ny:zero()\n\n-- contents of x have been changed\n> x\n 0\n 0\n 0\n[torch.DoubleTensor of dimension 3]\n\n```\n\n__If `type` is a string__ describing a Tensor type, different from\nthe type name of the given Tensor, returns a new Tensor of the\nspecified type, whose contents corresponds to the contents of the\noriginal Tensor, casted to the given type (//memory copy occurs, with\npossible loss of precision//).\n\n```lua\nx = torch.Tensor(3):fill(3.14)\n> x\n 3.1400\n 3.1400\n 3.1400\n[torch.DoubleTensor of dimension 3]\n\ny = x:type('torch.IntTensor')\n> y\n 3\n 3\n 3\n[torch.IntTensor of dimension 3]\n\n```\n\n<a name=\"torch.Tensor.typeAs\"></a>\n### [Tensor] typeAs(tensor) ###\n\nConvenience method for the [type](#torch.type) method. Equivalent to\n```lua\ntype(tensor:type())\n```\n\n<a name=\"torch.isTensor\"></a>\n### [boolean] isTensor(object) ###\n\nReturns `true` iff the provided `object` is one of the `torch.*Tensor` types.\n\n```lua\n> torch.isTensor(torch.randn(3,4))\ntrue\n\n> torch.isTensor(torch.randn(3,4)[1])\ntrue\n\n> torch.isTensor(torch.randn(3,4)[1][2])\nfalse\n```\n\n<a name=\"torch.byte\"></a>\n### [Tensor] byte(), char(), short(), int(), long(), float(), double() ###\n<a name=\"torch.Tensor.short\"></a>\n<a name=\"torch.Tensor.char\"></a>\n<a name=\"torch.Tensor.long\"></a>\n<a name=\"torch.Tensor.int\"></a>\n<a name=\"torch.Tensor.double\"></a>\n<a name=\"torch.Tensor.float\"></a>\n\nConvenience methods for the [type](#torch.type) method. For example:\n```lua\nx = torch.Tensor(3):fill(3.14)\n> x\n 3.1400\n 3.1400\n 3.1400\n[torch.DoubleTensor of dimension 3]\n\n-- calling type('torch.IntTensor')\n> x:type('torch.IntTensor')\n 3\n 3\n 3\n[torch.IntTensor of dimension 3]\n\n\n-- is equivalent to calling int()\n> x:int()\n 3\n 3\n 3\n[torch.IntTensor of dimension 3]\n```\n\n## Querying the size and structure ##\n\n<a name=\"torch.nDimension\"></a>\n### [number] nDimension() ###\n\nReturns the number of dimensions in a `Tensor`.\n```lua\nx = torch.Tensor(4,5) -- a matrix\n> x:nDimension()\n2\n```\n\n<a name=\"torch.Tensor.dim\"></a>\n### [number] dim() ###\n\nSame as [nDimension()](#torch.nDimension).\n\n<a name=\"torch.Tensor.size\"></a>\n### [number] size(dim) ###\n\nReturns the size of the specified dimension `dim`. Example:\n```lua\nx = torch.Tensor(4,5):zero()\n> x\n 0 0 0 0 0\n 0 0 0 0 0\n 0 0 0 0 0\n 0 0 0 0 0\n[torch.DoubleTensor of dimension 4x5]\n\n> x:size(2) -- gets the number of columns\n5\n```\n\n<a name=\"torch.Tensor.size\"></a>\n### [LongStorage] size() ###\n\nReturns a [LongStorage](storage.md) containing the size of each dimension\nof the tensor.\n```lua\nx = torch.Tensor(4,5):zero()\n> x\n 0 0 0 0 0\n 0 0 0 0 0\n 0 0 0 0 0\n 0 0 0 0 0\n[torch.DoubleTensor of dimension 4x5]\n\n> x:size()\n 4\n 5\n[torch.LongStorage of size 2]\n```\n\n<a name=\"torch.Tensor.size\"></a>\n### [LongStorage] #self ###\n\nSame as [size()](#torch.Tensor.size) method.\n\n<a name=\"torch.Tensor.stride\"></a>\n### [number] stride(dim) ###\n\nReturns the jump necessary to go from one element to the next one in the\nspecified dimension `dim`. Example:\n```lua\nx = torch.Tensor(4,5):zero()\n> x\n 0 0 0 0 0\n 0 0 0 0 0\n 0 0 0 0 0\n 0 0 0 0 0\n[torch.DoubleTensor of dimension 4x5]\n\n-- elements in a row are contiguous in memory\n> x:stride(2)\n1\n\n-- to go from one element to the next one in a column\n-- we need here to jump the size of the row\n> x:stride(1)\n5\n```\n\nNote also that in `Torch` _elements in the same row_ [elements along the __last__ dimension]\nare contiguous in memory for a matrix [tensor].\n\n<a name=\"torch.Tensor.stride\"></a>\n### [LongStorage] stride() ###\n\nReturns the jump necessary to go from one element to the next one in each dimension. Example:\n```lua\nx = torch.Tensor(4,5):zero()\n> x\n 0 0 0 0 0\n 0 0 0 0 0\n 0 0 0 0 0\n 0 0 0 0 0\n[torch.DoubleTensor of dimension 4x5]\n\n> x:stride()\n 5\n 1 -- elements are contiguous in a row [last dimension]\n[torch.LongStorage of size 2]\n```\n\nNote also that in `Torch` _elements in the same row_ [elements along the __last__ dimension]\nare contiguous in memory for a matrix [tensor].\n\n<a name=\"torch.storage\"></a>\n### [Storage] storage() ###\n\nReturns the [Storage](storage.md) used to store all the elements of the `Tensor`.\nBasically, a `Tensor` is a particular way of _viewing_ a `Storage`.\n```lua\nx = torch.Tensor(4,5)\ns = x:storage()\nfor i=1,s:size() do -- fill up the Storage\n  s[i] = i\nend\n\n> x -- s is interpreted by x as a 2D matrix\n  1   2   3   4   5\n  6   7   8   9  10\n 11  12  13  14  15\n 16  17  18  19  20\n[torch.DoubleTensor of dimension 4x5]\n```\n\n<a name=\"torch.Tensor.isContiguous\"></a>\n### [boolean] isContiguous() ###\n\nReturns `true` iff the elements of the `Tensor` are contiguous in memory.\n```lua\n-- normal tensors are contiguous in memory\nx = torch.randn(4,5)\n> x:isContiguous()\ntrue\n\n-- y now \"views\" the 3rd column of x\n-- the storage of y is the same than x\n-- so the memory cannot be contiguous\ny = x:select(2, 3)\n> y:isContiguous()\nfalse\n\n-- indeed, to jump to one element to\n-- the next one, the stride is 5\n> y:stride()\n 5\n[torch.LongStorage of size 1]\n```\n\n<a name=\"torch.Tensor.isSize\"></a>\n### [boolean] isSize(storage) ###\n\nReturns `true` iff the dimensions of the `Tensor` match the elements of the `storage`.\n```lua\nx = torch.Tensor(4,5)\ny = torch.LongStorage({4,5})\nz = torch.LongStorage({5,4,1})\n> x:isSize(y)\ntrue\n\n> x:isSize(z)\nfalse\n\n> x:isSize(x:size())\ntrue\n```\n\n<a name=\"torch.Tensor.isSameSizeAs\"></a>\n### [boolean] isSameSizeAs(tensor) ###\n\nReturns `true` iff the dimensions of the `Tensor` and the argument `Tensor` are exactly the same.\n```lua\nx = torch.Tensor(4,5)\ny = torch.Tensor(4,5)\n> x:isSameSizeAs(y)\ntrue\n\ny = torch.Tensor(4,6)\n> x:isSameSizeAs(y)\nfalse\n```\n\n<a name=\"torch.Tensor.nElement\"></a>\n### [number] nElement() ###\n\nReturns the number of elements of a tensor.\n```lua\nx = torch.Tensor(4,5)\n> x:nElement() -- 4x5 = 20!\n20\n```\n\n<a name=\"torch.storageOffset\"></a>\n### [number] storageOffset() ###\n\nReturn the first index (starting at 1) used in the tensor's [storage](#torch.storage).\n\n<a name=\"torch.__index__\"></a>\n## Querying elements ##\n\nElements of a tensor can be retrieved with the `[index]` operator.\n\nIf `index` is a number, `[index]` operator is equivalent to a\n[`select(1, index)`](#torch.Tensor.select). If the tensor has more\nthan one dimension, this operation returns a slice of the tensor that\nshares the same underlying storage. If the tensor is a 1D tensor, it\nreturns the value at `index` in this tensor.\n\nIf `index` is a table, the table must contain _n_ numbers, where\n_n_ is the [number of dimensions](#torch.nDimension) of the\nTensor. It will return the element at the given position.\n\nIn the same spirit, `index` might be a [LongStorage](storage.md),\nspecifying the position (in the Tensor) of the element to be\nretrieved.\n\nIf `index` is a `ByteTensor` in which each element is 0 or 1 then it acts as a\nselection mask used to extract a subset of the original tensor. This is\nparticularly useful with [logical operators](maths.md#logical-operations-on-tensors)\nlike [`torch.le`](maths.md#torchlea-b).\n\nExample:\n```lua\nx = torch.Tensor(3,3)\ni = 0; x:apply(function() i = i + 1; return i end)\n> x\n 1  2  3\n 4  5  6\n 7  8  9\n[torch.DoubleTensor of dimension 3x3]\n\n> x[2] -- returns row 2\n 4\n 5\n 6\n[torch.DoubleTensor of dimension 3]\n\n> x[2][3] -- returns row 2, column 3\n6\n\n> x[{2,3}] -- another way to return row 2, column 3\n6\n\n> x[torch.LongStorage{2,3}] -- yet another way to return row 2, column 3\n6\n\n> x[torch.le(x,3)] -- torch.le returns a ByteTensor that acts as a mask\n 1\n 2\n 3\n[torch.DoubleTensor of dimension 3]\n```\n\n<a name=\"torch.Tensor.set\"></a>\n## Referencing a tensor to an existing tensor or chunk of memory ##\n\nA `Tensor` being a way of _viewing_ a [Storage](storage.md), it is\npossible to \"set\" a `Tensor` such that it views an existing [Storage](storage.md).\n\nNote that if you want to perform a set on an empty `Tensor` like\n```lua\ny = torch.Storage(10)\nx = torch.Tensor()\nx:set(y, 1, 10)\n```\nyou might want in that case to use one of the [equivalent constructor](#torch.Tensor).\n```lua\ny = torch.Storage(10)\nx = torch.Tensor(y, 1, 10)\n```\n\n<a name=\"torch.Tensor.set\"></a>\n### [self] set(tensor) ###\n\nThe `Tensor` is now going to \"view\" the same [storage](#torch.storage)\nas the given `tensor`. As the result, any modification in the elements of\nthe `Tensor` will have an impact on the elements of the given `tensor`, and\nvice-versa. This is an efficient method, as there is no memory copy!\n\n```lua\nx = torch.Tensor(2,5):fill(3.14)\n> x\n 3.1400  3.1400  3.1400  3.1400  3.1400\n 3.1400  3.1400  3.1400  3.1400  3.1400\n[torch.DoubleTensor of dimension 2x5]\n\ny = torch.Tensor():set(x)\n> y\n 3.1400  3.1400  3.1400  3.1400  3.1400\n 3.1400  3.1400  3.1400  3.1400  3.1400\n[torch.DoubleTensor of dimension 2x5]\n\ny:zero()\n> x -- elements of x are the same than y!\n 0 0 0 0 0\n 0 0 0 0 0\n[torch.DoubleTensor of dimension 2x5]\n```\n\n<a name=\"torch.Tensor.isSetTo\"></a>\n### [boolean] isSetTo(tensor) ###\n\nReturns true iff the `Tensor` is set to the argument `Tensor`. Note: this is\nonly true if the tensors are the same size, have the same strides and share the\nsame storage and offset.\n\n```lua\nx = torch.Tensor(2,5)\ny = torch.Tensor()\n> y:isSetTo(x)\n false\n> y:set(x)\n> y:isSetTo(x)\n  true\n> y:t():isSetTo(x)\n  false -- x and y have different strides\n```\n\n<a name=\"torch.Tensor.set\"></a>\n### [self] set(storage, [storageOffset, sizes, [strides]]) ###\n\nThe `Tensor` is now going to \"view\" the given\n[`storage`](storage.md), starting at position `storageOffset` (>=1)\nwith the given [dimension `sizes`](#torch.Tensor.size) and the optional given\n[`strides`](#torch.Tensor.stride). As the result, any modification in the\nelements of the `Storage` will have a impact on the elements of the\n`Tensor`, and vice-versa. This is an efficient method, as there is no\nmemory copy!\n\nIf only `storage` is provided, the whole storage will be viewed as a 1D Tensor.\n\n```lua\n-- creates a storage with 10 elements\ns = torch.Storage(10):fill(1)\n\n-- we want to see it as a 2x5 tensor\nsz = torch.LongStorage({2,5})\nx = torch.Tensor()\nx:set(s, 1, sz)\n> x\n 1  1  1  1  1\n 1  1  1  1  1\n[torch.DoubleTensor of dimension 2x5]\n\nx:zero()\n> s -- the storage contents have been modified\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n[torch.DoubleStorage of size 10]\n```\n\n<a name=\"torch.Tensor.set\"></a>\n### [self] set(storage, [storageOffset, sz1 [, st1 ... [, sz4 [, st4]]]]) ###\n\nThis is a \"shortcut\" for previous method.\nIt works up to 4 dimensions. `szi` is the size of the `i`-th dimension of the tensor.\n`sti` is the stride in the `i`-th dimension.\n\n## Copying and initializing ##\n\n<a name=\"torch.Tensor.copy\"></a>\n### [self] copy(tensor) ###\n\nReplace the elements of the `Tensor` by copying the elements of the given `tensor`. The\n[number of elements](#torch.Tensor.nElement) must match, but the\nsizes might be different.\n\n```lua\nx = torch.Tensor(4):fill(1)\ny = torch.Tensor(2,2):copy(x)\n> x\n 1\n 1\n 1\n 1\n[torch.DoubleTensor of dimension 4]\n\n> y\n 1  1\n 1  1\n[torch.DoubleTensor of dimension 2x2]\n```\n\nIf a different type of `tensor` is given, then a type conversion occurs,\nwhich, of course, might result in loss of precision.\n\n<a name=\"torch.fill\"></a>\n### [self] fill(value) ###\n\nFill the tensor with the given `value`.\n```lua\n> torch.DoubleTensor(4):fill(3.14)\n 3.1400\n 3.1400\n 3.1400\n 3.1400\n[torch.DoubleTensor of dimension 4]\n```\n\n<a name=\"torch.zero\"></a>\n### [self] zero() ###\n\nFill the tensor with zeros.\n```lua\n> torch.Tensor(4):zero()\n 0\n 0\n 0\n 0\n[torch.DoubleTensor of dimension 4]\n```\n\n<a name=\"torch.resize.dok\"></a>\n## Resizing ##\n\n__When resizing to a larger size__, the underlying [Storage](storage.md) is resized to fit\nall the elements of the `Tensor`.\n\n__When resizing to a smaller size__, the underlying [Storage](#Storage) is not resized.\n\n__Important note:__ the content of a `Tensor` after resizing is _undetermined_ as [strides](#torch.Tensor.stride)\nmight have been completely changed. In particular, _the elements of the resized tensor are contiguous in memory_.\n\n<a name=\"torch.Tensor.resizeAs\"></a>\n### [self] resizeAs(tensor) ###\n\nResize the `tensor` as the given `tensor` (of the same type).\n\n<a name=\"torch.resize\"></a>\n### [self] resize(sizes) ###\n\nResize the `tensor` according to the given [LongStorage](storage.md) `sizes`.\n\n<a name=\"torch.resize\"></a>\n### [self] resize(sz1 [,sz2 [,sz3 [,sz4]]]]) ###\n\nConvenience method of the previous method, working for a number of dimensions up to 4.\n\n## Extracting sub-tensors ##\n\nEach of these methods returns a `Tensor` which is a sub-tensor of the given\ntensor. \n\nFor methods `narrow`, `select` and `sub` the returned tensor _shares the same `Storage`_ as the original. Hence, any modification in the memory of the sub-tensor will have an impact on the primary tensor, and vice-versa. These methods are very fast, as they do not involve any memory copy.\n\nFor all other methods in this section such as `index`, `indexCopy` etc., since you cannot extract a shared subtensor (technically), a new tensor is returned. If you make changes in this new tensor, they are not reflected in the original tensor.\n\n<a name=\"torch.Tensor.narrow\"></a>\n### [self] narrow(dim, index, size) ###\n\nReturns a new `Tensor` which is a narrowed version of the current one: the dimension `dim` is narrowed\nfrom `index` to `index+size-1`.\n\n```lua\nx = torch.Tensor(5, 6):zero()\n> x\n\n0 0 0 0 0 0\n0 0 0 0 0 0\n0 0 0 0 0 0\n0 0 0 0 0 0\n0 0 0 0 0 0\n[torch.DoubleTensor of dimension 5x6]\n\ny = x:narrow(1, 2, 3) -- narrow dimension 1 from index 2 to index 2+3-1\ny:fill(1) -- fill with 1\n> y\n 1  1  1  1  1  1\n 1  1  1  1  1  1\n 1  1  1  1  1  1\n[torch.DoubleTensor of dimension 3x6]\n\n> x -- memory in x has been modified!\n 0  0  0  0  0  0\n 1  1  1  1  1  1\n 1  1  1  1  1  1\n 1  1  1  1  1  1\n 0  0  0  0  0  0\n[torch.DoubleTensor of dimension 5x6]\n```\n\n<a name=\"torch.Tensor.sub\"></a>\n### [Tensor] sub(dim1s, dim1e ... [, dim4s [, dim4e]]) ###\n\nThis method is equivalent to do a series of\n[narrow](#torch.Tensor.narrow) up to the first 4 dimensions.  It\nreturns a new `Tensor` which is a sub-tensor going from index\n`dimis` to `dimie` in the `i`-th dimension. Negative values are\ninterpreted index starting from the end: `-1` is the last index,\n`-2` is the index before the last index, ...\n\n```lua\nx = torch.Tensor(5, 6):zero()\n> x\n 0 0 0 0 0 0\n 0 0 0 0 0 0\n 0 0 0 0 0 0\n 0 0 0 0 0 0\n 0 0 0 0 0 0\n[torch.DoubleTensor of dimension 5x6]\n\ny = x:sub(2,4):fill(1) -- y is sub-tensor of x:\n> y                    -- dimension 1 starts at index 2, ends at index 4\n 1  1  1  1  1  1\n 1  1  1  1  1  1\n 1  1  1  1  1  1\n[torch.DoubleTensor of dimension 3x6]\n\n> x                    -- x has been modified!\n 0  0  0  0  0  0\n 1  1  1  1  1  1\n 1  1  1  1  1  1\n 1  1  1  1  1  1\n 0  0  0  0  0  0\n[torch.DoubleTensor of dimension 5x6]\n\nz = x:sub(2,4,3,4):fill(2) -- we now take a new sub-tensor\n> z                        -- dimension 1 starts at index 2, ends at index 4\n                           -- dimension 2 starts at index 3, ends at index 4\n 2  2\n 2  2\n 2  2\n[torch.DoubleTensor of dimension 3x2]\n\n> x                        -- x has been modified\n 0  0  0  0  0  0\n 1  1  2  2  1  1\n 1  1  2  2  1  1\n 1  1  2  2  1  1\n 0  0  0  0  0  0\n[torch.DoubleTensor of dimension 5x6]\n\n> y                        -- y has been modified\n 1  1  2  2  1  1\n 1  1  2  2  1  1\n 1  1  2  2  1  1\n[torch.DoubleTensor of dimension 3x6]\n\n> y:sub(-1, -1, 3, 4)      -- negative values = bounds\n 2  2\n[torch.DoubleTensor of dimension 1x2]\n```\n\n<a name=\"torch.Tensor.select\"></a>\n### [Tensor] select(dim, index) ###\n\nReturns a new `Tensor` which is a tensor slice at the given `index` in the\ndimension `dim`. The returned tensor has one less dimension: the dimension\n`dim` is removed.  As a result, it is not possible to `select()` on a 1D\ntensor.\n\nNote that \"selecting\" on the first dimension is equivalent to use the [[] operator](#torch.__index__ )\n\n```lua\nx = torch.Tensor(5,6):zero()\n> x\n 0 0 0 0 0 0\n 0 0 0 0 0 0\n 0 0 0 0 0 0\n 0 0 0 0 0 0\n 0 0 0 0 0 0\n[torch.DoubleTensor of dimension 5x6]\n\ny = x:select(1, 2):fill(2) -- select row 2 and fill up\n> y\n 2\n 2\n 2\n 2\n 2\n 2\n[torch.DoubleTensor of dimension 6]\n\n> x\n 0  0  0  0  0  0\n 2  2  2  2  2  2\n 0  0  0  0  0  0\n 0  0  0  0  0  0\n 0  0  0  0  0  0\n[torch.DoubleTensor of dimension 5x6]\n\nz = x:select(2,5):fill(5) -- select column 5 and fill up\n> z\n 5\n 5\n 5\n 5\n 5\n[torch.DoubleTensor of dimension 5]\n\n> x\n 0  0  0  0  5  0\n 2  2  2  2  5  2\n 0  0  0  0  5  0\n 0  0  0  0  5  0\n 0  0  0  0  5  0\n[torch.DoubleTensor of dimension 5x6]\n```\n\n<a name=\"torch.Tensor.indexing\"></a>\n### [Tensor] [{ dim1,dim2,... }] or [{ {dim1s,dim1e}, {dim2s,dim2e} }] ###\n\nThe indexing operator [] can be used to combine narrow/sub and\nselect in a concise and efficient way. It can also be used\nto copy, and fill (sub) tensors.\n\nThis operator also works with an input mask made of a `ByteTensor` with 0 and 1\nelements, e.g with a [logical operator](maths.md#logical-operations-on-tensors).\n\n```lua\nx = torch.Tensor(5, 6):zero()\n> x\n 0 0 0 0 0 0\n 0 0 0 0 0 0\n 0 0 0 0 0 0\n 0 0 0 0 0 0\n 0 0 0 0 0 0\n[torch.DoubleTensor of dimension 5x6]\n\nx[{ 1,3 }] = 1 -- sets element at (i=1,j=3) to 1\n> x\n 0  0  1  0  0  0\n 0  0  0  0  0  0\n 0  0  0  0  0  0\n 0  0  0  0  0  0\n 0  0  0  0  0  0\n[torch.DoubleTensor of dimension 5x6]\n\nx[{ 2,{2,4} }] = 2  -- sets a slice of 3 elements to 2\n> x\n 0  0  1  0  0  0\n 0  2  2  2  0  0\n 0  0  0  0  0  0\n 0  0  0  0  0  0\n 0  0  0  0  0  0\n[torch.DoubleTensor of dimension 5x6]\n\nx[{ {},4 }] = -1 -- sets the full 4th column to -1\n> x\n 0  0  1 -1  0  0\n 0  2  2 -1  0  0\n 0  0  0 -1  0  0\n 0  0  0 -1  0  0\n 0  0  0 -1  0  0\n[torch.DoubleTensor of dimension 5x6]\n\nx[{ {},2 }] = torch.range(1,5) -- copy a 1D tensor to a slice of x\n> x\n\n 0  1  1 -1  0  0\n 0  2  2 -1  0  0\n 0  3  0 -1  0  0\n 0  4  0 -1  0  0\n 0  5  0 -1  0  0\n[torch.DoubleTensor of dimension 5x6]\n\nx[torch.lt(x,0)] = -2 -- sets all negative elements to -2 via a mask\n> x\n\n 0  1  1 -2  0  0\n 0  2  2 -2  0  0\n 0  3  0 -2  0  0\n 0  4  0 -2  0  0\n 0  5  0 -2  0  0\n[torch.DoubleTensor of dimension 5x6]\n```\n\n<a name=\"torch.Tensor.index\"></a>\n### [Tensor] index(dim, index) ###\n\nReturns a new `Tensor` which indexes the original `Tensor` along dimension `dim`\nusing the entries in `torch.LongTensor` `index`.\nThe returned `Tensor` has the same number of dimensions as the original `Tensor`.\nThe returned `Tensor` does __not__ use the same storage as the original `Tensor` -- see below for storing the result\n in an existing `Tensor`.\n\n```lua\nx = torch.rand(5,5)\n> x\n 0.8020  0.7246  0.1204  0.3419  0.4385\n 0.0369  0.4158  0.0985  0.3024  0.8186\n 0.2746  0.9362  0.2546  0.8586  0.6674\n 0.7473  0.9028  0.1046  0.9085  0.6622\n 0.1412  0.6784  0.1624  0.8113  0.3949\n[torch.DoubleTensor of dimension 5x5]\n\ny = x:index(1,torch.LongTensor{3,1})\n> y\n 0.2746  0.9362  0.2546  0.8586  0.6674\n 0.8020  0.7246  0.1204  0.3419  0.4385\n[torch.DoubleTensor of dimension 2x5]\n\ny:fill(1)\n> y\n 1  1  1  1  1\n 1  1  1  1  1\n[torch.DoubleTensor of dimension 2x5]\n\n> x\n 0.8020  0.7246  0.1204  0.3419  0.4385\n 0.0369  0.4158  0.0985  0.3024  0.8186\n 0.2746  0.9362  0.2546  0.8586  0.6674\n 0.7473  0.9028  0.1046  0.9085  0.6622\n 0.1412  0.6784  0.1624  0.8113  0.3949\n[torch.DoubleTensor of dimension 5x5]\n\n```\n\nNote the explicit `index` function is different than the indexing operator `[]`.\nThe indexing operator `[]` is a syntactic shortcut for a series of select and narrow operations,\ntherefore it always returns a new view on the original tensor that shares the same storage.\nHowever, the explicit `index` function can not use the same storage.\n\nIt is possible to store the result into an existing Tensor with `result:index(source, ...)`:\n\n```lua\nx = torch.rand(5,5)\n> x\n 0.8020  0.7246  0.1204  0.3419  0.4385\n 0.0369  0.4158  0.0985  0.3024  0.8186\n 0.2746  0.9362  0.2546  0.8586  0.6674\n 0.7473  0.9028  0.1046  0.9085  0.6622\n 0.1412  0.6784  0.1624  0.8113  0.3949\n[torch.DoubleTensor of dimension 5x5]\n\ny = torch.Tensor()\ny:index(x,1,torch.LongTensor{3,1})\n> y\n 0.2746  0.9362  0.2546  0.8586  0.6674\n 0.8020  0.7246  0.1204  0.3419  0.4385\n[torch.DoubleTensor of dimension 2x5]\n```\n\n\n<a name=\"torch.Tensor.indexCopy\"></a>\n### [Tensor] indexCopy(dim, index, tensor) ###\n\nCopies the elements of `tensor` into the original tensor by selecting the indices in the order\ngiven in `index`. The shape of `tensor` must exactly match the elements indexed or an error will be thrown.\n\n```lua\n> x\n 0.8020  0.7246  0.1204  0.3419  0.4385\n 0.0369  0.4158  0.0985  0.3024  0.8186\n 0.2746  0.9362  0.2546  0.8586  0.6674\n 0.7473  0.9028  0.1046  0.9085  0.6622\n 0.1412  0.6784  0.1624  0.8113  0.3949\n[torch.DoubleTensor of dimension 5x5]\n\nz=torch.Tensor(5,2)\nz:select(2,1):fill(-1)\nz:select(2,2):fill(-2)\n> z\n-1 -2\n-1 -2\n-1 -2\n-1 -2\n-1 -2\n[torch.DoubleTensor of dimension 5x2]\n\nx:indexCopy(2,torch.LongTensor{5,1},z)\n> x\n-2.0000  0.7246  0.1204  0.3419 -1.0000\n-2.0000  0.4158  0.0985  0.3024 -1.0000\n-2.0000  0.9362  0.2546  0.8586 -1.0000\n-2.0000  0.9028  0.1046  0.9085 -1.0000\n-2.0000  0.6784  0.1624  0.8113 -1.0000\n[torch.DoubleTensor of dimension 5x5]\n\n```\n\n<a name=\"torch.Tensor.indexAdd\"></a>\n### [Tensor] indexAdd(dim, index, tensor) ###\n\nAccumulate the elements of `tensor` into the original tensor by adding to the indices in the order\ngiven in `index`. The shape of `tensor` must exactly match the elements indexed or an error will be thrown.\n\n```lua\nExample 1\n\n> x\n-2.1742  0.5688 -1.0201  0.1383  1.0504\n 0.0970  0.2169  0.1324  0.9553 -1.9518\n-0.7607  0.8947  0.1658 -0.2181 -2.1237\n-1.4099  0.2342  0.4549  0.6316 -0.2608\n 0.0349  0.4713  0.0050  0.1677  0.2103\n[torch.DoubleTensor of size 5x5]\n\nz=torch.Tensor(5, 2)\nz:select(2,1):fill(-1)\nz:select(2,2):fill(-2)\n> z\n-1 -2\n-1 -2\n-1 -2\n-1 -2\n-1 -2\n[torch.DoubleTensor of dimension 5x2]\n\n> x:indexAdd(2,torch.LongTensor{5,1},z)\n> x\n-4.1742  0.5688 -1.0201  0.1383  0.0504\n-1.9030  0.2169  0.1324  0.9553 -2.9518\n-2.7607  0.8947  0.1658 -0.2181 -3.1237\n-3.4099  0.2342  0.4549  0.6316 -1.2608\n-1.9651  0.4713  0.0050  0.1677 -0.7897\n[torch.DoubleTensor of size 5x5]\n\nExample 2\n\n> a = torch.range(1, 5)\n> a\n 1\n 2\n 3\n 4\n 5\n[torch.DoubleTensor of size 5]\n\n> a:indexAdd(1, torch.LongTensor{1, 1, 3, 3}, torch.range(1, 4))\n> a\n  4\n  2\n 10\n  4\n  5\n[torch.DoubleTensor of size 5]\n\n```\n\n<a name=\"torch.Tensor.indexFill\"></a>\n### [Tensor] indexFill(dim, index, val) ###\n\nFills the elements of the original `Tensor` with value `val` by selecting the indices in the order\ngiven in `index`.\n\n```lua\nx=torch.rand(5,5)\n> x\n 0.8414  0.4121  0.3934  0.5600  0.5403\n 0.3029  0.2040  0.7893  0.6079  0.6334\n 0.3743  0.1389  0.1573  0.1357  0.8460\n 0.2838  0.9925  0.0076  0.7220  0.5185\n 0.8739  0.6887  0.4271  0.0385  0.9116\n[torch.DoubleTensor of dimension 5x5]\n\nx:indexFill(2,torch.LongTensor{4,2},-10)\n> x\n  0.8414 -10.0000   0.3934 -10.0000   0.5403\n  0.3029 -10.0000   0.7893 -10.0000   0.6334\n  0.3743 -10.0000   0.1573 -10.0000   0.8460\n  0.2838 -10.0000   0.0076 -10.0000   0.5185\n  0.8739 -10.0000   0.4271 -10.0000   0.9116\n[torch.DoubleTensor of dimension 5x5]\n\n```\n\n<a name=\"torch.Tensor.gather\"></a>\n### [Tensor] gather(dim, index) ###\n\nCreates a new `Tensor` from the original tensor by gathering a number of values from\neach \"row\", where the rows are along the dimension `dim`. The values in a `LongTensor`, passed as `index`,\nspecify which values to take from each row. Specifically, the resulting `Tensor`, which will have the same size as\nthe `index` tensor, is given by\n\n```lua\n-- dim = 1\nresult[i][j][k]... = src[index[i][j][k]...][j][k]...\n\n-- dim = 2\nresult[i][j][k]... = src[i][index[i][j][k]...][k]...\n\n-- etc.\n```\nwhere `src` is the original `Tensor`.\n\nThe same number of values are selected from each row, and the same value cannot be selected from a row more than\nonce. The values in the `index` tensor must not be larger than the length of the row, that is they must be between\n1 and `src:size(dim)` inclusive. It can be somewhat confusing to ensure that the `index` tensor has the correct shape.\nViewed pictorially:\n\n![The gather operation](gather.png)\n\nNumerically, to give an example, if `src` has size `n x m x p x q`, we are gathering along `dim = 3`, and we wish to\ngather `k` elements from each row (where `k <= p`) then `index` must have size `n x m x k x q`.\n\nIt is possible to store the result into an existing Tensor with `result:gather(src, ...)`.\n\n```lua\nx = torch.rand(5, 5)\n> x\n 0.7259  0.5291  0.4559  0.4367  0.4133\n 0.0513  0.4404  0.4741  0.0658  0.0653\n 0.3393  0.1735  0.6439  0.1011  0.7923\n 0.7606  0.5025  0.5706  0.7193  0.1572\n 0.1720  0.3546  0.8354  0.8339  0.3025\n[torch.DoubleTensor of size 5x5]\n\ny = x:gather(1, torch.LongTensor{{1, 2, 3, 4, 5}, {2, 3, 4, 5, 1}})\n> y\n 0.7259  0.4404  0.6439  0.7193  0.3025\n 0.0513  0.1735  0.5706  0.8339  0.4133\n[torch.DoubleTensor of size 2x5]\n\nz = x:gather(2, torch.LongTensor{{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 1}})\n> z\n 0.7259  0.5291\n 0.4404  0.4741\n 0.6439  0.1011\n 0.7193  0.1572\n 0.3025  0.1720\n[torch.DoubleTensor of size 5x2]\n\n```\n\n<a name=\"torch.Tensor.scatter\"></a>\n### [Tensor] scatter(dim, index, src|val) ###\n\nWrites all values from tensor `src` or the scalar `val` into `self` at the specified indices. The indices are specified\nwith respect to the given dimension, `dim`, in the manner described in [gather](#torch.Tensor.gather). Note that, as\nfor gather, the values of index must be between 1 and `self:size(dim)` inclusive and all values in a row along the\nspecified dimension must be unique.\n\n```lua\nx = torch.rand(2, 5)\n> x\n 0.3227  0.4294  0.8476  0.9414  0.1159\n 0.7338  0.5185  0.2947  0.0578  0.1273\n[torch.DoubleTensor of size 2x5]\n\ny = torch.zeros(3, 5):scatter(1, torch.LongTensor{{1, 2, 3, 1, 1}, {3, 1, 1, 2, 3}}, x)\n> y\n 0.3227  0.5185  0.2947  0.9414  0.1159\n 0.0000  0.4294  0.0000  0.0578  0.0000\n 0.7338  0.0000  0.8476  0.0000  0.1273\n[torch.DoubleTensor of size 3x5]\n\nz = torch.zeros(2, 4):scatter(2, torch.LongTensor{{3}, {4}}, 1.23)\n> z\n 0.0000  0.0000  1.2300  0.0000\n 0.0000  0.0000  0.0000  1.2300\n[torch.DoubleTensor of size 2x4]\n\n```\n\n<a name=\"torch.Tensor.maskedSelect\"></a>\n### [Tensor] maskedSelect(mask) ###\n\nReturns a new Tensor which contains all elements aligned to a `1` in the corresponding\n`mask`. This `mask` is a `torch.ByteTensor` of zeros and ones. The `mask` and\n`Tensor` must have the same number of elements. The resulting Tensor will\nbe a 1D tensor of the same type as `Tensor` having size `mask:sum()`.\n\n```lua\nx = torch.range(1,12):double():resize(3,4)\n> x\n  1   2   3   4\n  5   6   7   8\n  9  10  11  12\n[torch.DoubleTensor of dimension 3x4]\n\nmask = torch.ByteTensor(2,6):bernoulli()\n> mask\n 1  0  1  0  0  0\n 1  1  0  0  0  1\n[torch.ByteTensor of dimension 2x6]\n\ny = x:maskedSelect(mask)\n> y\n  1\n  3\n  7\n  8\n 12\n[torch.DoubleTensor of dimension 5]\n\nz = torch.DoubleTensor()\nz:maskedSelect(x, mask)\n> z\n  1\n  3\n  7\n  8\n 12\n```\n\nNote how the dimensions of the above `x`, `mask` and `y` do not match.\nAlso note how an existing tensor `z` can be used to store the results.\n\n\n<a name=\"torch.Tensor.maskedCopy\"></a>\n### [Tensor] maskedCopy(mask, tensor) ###\n\nCopies the elements of `tensor` into `mask` locations of itself. The masked elements are those elements having a\ncorresponding `1` in the `mask` Tensor. This `mask` is a `torch.ByteTensor`\nof zeros and ones. The destination `Tensor` and the `mask` Tensor should have the same number of elements.\nThe source `tensor` should have at least as many elements as the number of 1s in the `mask`.\n\n```lua\nx = torch.Tensor({0, 0, 0, 0})\nmask = torch.ByteTensor({0, 1, 0, 1})\ny = torch.Tensor({10, 20})\nx:maskedCopy(mask,y)\nprint(x)\n\n  0\n 10\n  0\n 20\n[torch.DoubleTensor of size 4]\n```\n\n```lua\nx = torch.range(1,4):double():resize(2,2)\n> x\n 1  2\n 3  4\n[torch.DoubleTensor of dimension 2x4]\n\nmask = torch.ByteTensor(1,8):bernoulli()\n> mask\n 0  0  1  1  1  0  1  0\n[torch.ByteTensor of dimension 1x8]\n\ny = torch.DoubleTensor(2,4):fill(-1)\n> y\n-1 -1 -1 -1\n-1 -1 -1 -1\n[torch.DoubleTensor of dimension 2x4]\n\ny:maskedCopy(mask, x)\n> y\n -1 -1  1  2\n  3 -1  4 -1\n[torch.DoubleTensor of dimension 2x4]\n```\n\nNote how the dimensions of the above `x`, `mask` and `y` do not match,\nbut the number of elements do.\n\n<a name=\"torch.Tensor.maskedFill\"></a>\n### [Tensor] maskedFill(mask, val) ###\n\nFills the masked elements of itself with value `val`. The masked elements are those elements having a\ncorresponding `1` in the `mask` Tensor. This `mask` is a `torch.ByteTensor`\nof zeros and ones. The `mask` and `Tensor` must have the same number of elements.\n\n```lua\nx = torch.range(1,4):double():resize(1,4)\n> x\n 1  2  3  4\n[torch.DoubleTensor of dimension 1x4]\n\nmask = torch.ByteTensor(2,2):bernoulli()\n> mask\n 0  0\n 1  1\n[torch.ByteTensor of dimension 2x2]\n\nx:maskedFill(mask, -1)\n> x\n 1  2 -1 -1\n[torch.DoubleTensor of dimension 1x4]\n\n```\nNote how the dimensions of the above `x` and `mask` do not match,\nbut the number of elements do.\n\n## Search ##\n\nEach of these methods returns a `LongTensor` corresponding to the indices of the\ngiven search operation.\n\n<a name=\"torch.Tensor.nonzero\"></a>\n### [LongTensor] nonzero(tensor) ###\n\nFinds and returns a `LongTensor` corresponding to the *subscript* indices of all\nnon-zero elements in `tensor`.\n\nNote that torch uses the first argument on dispatch to determine the return\ntype. Since the first argument is any `torch.TensorType`, but the return type\nis always `torch.LongTensor`, the function call\n`torch.nonzero(torch.LongTensor(), tensor)` does not work. However,\n`tensor.nonzero(torch.LongTensor(), tensor)` does work.\n\n```lua\n> x = torch.rand(4, 4):mul(3):floor():int()\n> x\n 2  0  2  0\n 0  0  1  2\n 0  2  2  1\n 2  1  2  2\n[torch.IntTensor of dimension 4x4]\n\n> torch.nonzero(x)\n 1  1\n 1  3\n 2  3\n 2  4\n 3  2\n 3  3\n 3  4\n 4  1\n 4  2\n 4  3\n 4  4\n[torch.LongTensor of dimension 11x2]\n\n> x:nonzero()\n 1  1\n 1  3\n 2  3\n 2  4\n 3  2\n 3  3\n 3  4\n 4  1\n 4  2\n 4  3\n 4  4\n[torch.LongTensor of dimension 11x2]\n\n> indices = torch.LongTensor()\n> x.nonzero(indices, x)\n 1  1\n 1  3\n 2  3\n 2  4\n 3  2\n 3  3\n 3  4\n 4  1\n 4  2\n 4  3\n 4  4\n[torch.LongTensor of dimension 11x2]\n\n> x:eq(1):nonzero()\n 2  3\n 3  4\n 4  2\n[torch.LongTensor of dimension 3x2]\n\n```\n\n## Expanding/Replicating/Squeezing Tensors ##\n\nThese methods returns a Tensor which is created by replications of the\noriginal tensor.\n\n<a name=\"torch.expand\"></a>\n### [result] expand([result,] sizes) ###\n\n`sizes` can either be a `torch.LongStorage` or numbers. Expanding a tensor\ndoes not allocate new memory, but only creates a new view on the existing tensor where\nsingleton dimensions can be expanded to multiple ones by setting the `stride` to 0.\nAny dimension that has size 1 can be expanded to arbitrary value without any new memory allocation. Attempting to\nexpand along a dimension that does not have size 1 will result in an error.\n\n```lua\nx = torch.rand(10,1)\n> x\n 0.3837\n 0.5966\n 0.0763\n 0.1896\n 0.4958\n 0.6841\n 0.4038\n 0.4068\n 0.1502\n 0.2239\n[torch.DoubleTensor of dimension 10x1]\n\ny = torch.expand(x,10,2)\n> y\n 0.3837  0.3837\n 0.5966  0.5966\n 0.0763  0.0763\n 0.1896  0.1896\n 0.4958  0.4958\n 0.6841  0.6841\n 0.4038  0.4038\n 0.4068  0.4068\n 0.1502  0.1502\n 0.2239  0.2239\n[torch.DoubleTensor of dimension 10x2]\n\ny:fill(1)\n> y\n 1  1\n 1  1\n 1  1\n 1  1\n 1  1\n 1  1\n 1  1\n 1  1\n 1  1\n 1  1\n[torch.DoubleTensor of dimension 10x2]\n\n> x\n 1\n 1\n 1\n 1\n 1\n 1\n 1\n 1\n 1\n 1\n[torch.DoubleTensor of dimension 10x1]\n\ni=0; y:apply(function() i=i+1;return i end)\n> y\n  2   2\n  4   4\n  6   6\n  8   8\n 10  10\n 12  12\n 14  14\n 16  16\n 18  18\n 20  20\n[torch.DoubleTensor of dimension 10x2]\n\n> x\n  2\n  4\n  6\n  8\n 10\n 12\n 14\n 16\n 18\n 20\n[torch.DoubleTensor of dimension 10x1]\n\n```\n\n<a name=\"torch.Tensor.expandAs\"></a>\n### [result] expandAs([result,] tensor) ###\n\nThis is equivalent to `self:expand(tensor:size())`\n\n<a name=\"torch.repeatTensor\"></a>\n### [Tensor] repeatTensor([result,] sizes) ###\n\n`sizes` can either be a `torch.LongStorage` or numbers. Repeating a tensor allocates\n new memory, unless `result` is provided, in which case its memory is\n resized. `sizes` specify the number of times the tensor is repeated in each dimension.\n\n ```lua\nx = torch.rand(5)\n> x\n 0.7160\n 0.6514\n 0.0704\n 0.7856\n 0.7452\n[torch.DoubleTensor of dimension 5]\n\n> torch.repeatTensor(x,3,2)\n 0.7160  0.6514  0.0704  0.7856  0.7452  0.7160  0.6514  0.0704  0.7856  0.7452\n 0.7160  0.6514  0.0704  0.7856  0.7452  0.7160  0.6514  0.0704  0.7856  0.7452\n 0.7160  0.6514  0.0704  0.7856  0.7452  0.7160  0.6514  0.0704  0.7856  0.7452\n[torch.DoubleTensor of dimension 3x10]\n\n> torch.repeatTensor(x,3,2,1)\n(1,.,.) =\n  0.7160  0.6514  0.0704  0.7856  0.7452\n  0.7160  0.6514  0.0704  0.7856  0.7452\n\n(2,.,.) =\n  0.7160  0.6514  0.0704  0.7856  0.7452\n  0.7160  0.6514  0.0704  0.7856  0.7452\n\n(3,.,.) =\n  0.7160  0.6514  0.0704  0.7856  0.7452\n  0.7160  0.6514  0.0704  0.7856  0.7452\n[torch.DoubleTensor of dimension 3x2x5]\n\n ```\n\n<a name=\"torch.squeeze\"></a>\n### [Tensor] squeeze([dim]) ###\n\nRemoves all singleton dimensions of the tensor.\nIf `dim` is given, squeezes only that particular dimension of the tensor.\n\n ```lua\nx=torch.rand(2,1,2,1,2)\n> x\n(1,1,1,.,.) =\n  0.6020  0.8897\n\n(2,1,1,.,.) =\n  0.4713  0.2645\n\n(1,1,2,.,.) =\n  0.4441  0.9792\n\n(2,1,2,.,.) =\n  0.5467  0.8648\n[torch.DoubleTensor of dimension 2x1x2x1x2]\n\n> torch.squeeze(x)\n(1,.,.) =\n  0.6020  0.8897\n  0.4441  0.9792\n\n(2,.,.) =\n  0.4713  0.2645\n  0.5467  0.8648\n[torch.DoubleTensor of dimension 2x2x2]\n\n> torch.squeeze(x,2)\n(1,1,.,.) =\n  0.6020  0.8897\n\n(2,1,.,.) =\n  0.4713  0.2645\n\n(1,2,.,.) =\n  0.4441  0.9792\n\n(2,2,.,.) =\n  0.5467  0.8648\n[torch.DoubleTensor of dimension 2x2x1x2]\n\n ```\n\n## Manipulating the tensor view ##\n\nEach of these methods returns a `Tensor` which is another way of viewing\nthe `Storage` of the given tensor. Hence, any modification in the memory of\nthe sub-tensor will have an impact on the primary tensor, and vice-versa.\n\nThese methods are very fast, because they do not involve any memory copy.\n\n<a name=\"torch.view\"></a>\n### [result] view([result,] tensor, sizes) ###\n\nCreates a view with different dimensions of the storage associated with `tensor`.\nIf `result` is not passed, then a new tensor is returned, otherwise its storage is\nmade to point to storage of `tensor`.\n\n`sizes` can either be a `torch.LongStorage` or numbers. If one of the dimensions\nis -1, the size of that dimension is inferred from the rest of the elements.\n\n\n```lua\nx = torch.zeros(4)\n> x:view(2,2)\n 0 0\n 0 0\n[torch.DoubleTensor of dimension 2x2]\n\n> x:view(2,-1)\n 0 0\n 0 0\n[torch.DoubleTensor of dimension 2x2]\n\n> x:view(torch.LongStorage{2,2})\n 0 0\n 0 0\n[torch.DoubleTensor of dimension 2x2]\n\n> x\n 0\n 0\n 0\n 0\n[torch.DoubleTensor of dimension 4]\n```\n\n<a name=\"torch.viewAs\"></a>\n### [result] viewAs([result,] tensor, template) ###\n\nCreates a view with the same dimensions as `template` of the storage associated\nwith `tensor`. If `result` is not passed, then a new tensor is returned, otherwise its storage is\nmade to point to storage of `tensor`.\n\n\n```lua\nx = torch.zeros(4)\ny = torch.Tensor(2,2)\n> x:viewAs(y)\n 0 0\n 0 0\n[torch.DoubleTensor of dimension 2x2]\n```\n\n\n<a name=\"torch.Tensor.transpose\"></a>\n### [Tensor] transpose(dim1, dim2) ###\n\nReturns a tensor where dimensions `dim1` and `dim2` have been swapped. For 2D tensors,\nthe convenience method of [t()](#torch.Tensor.t) is available.\n```lua\nx = torch.Tensor(3,4):zero()\nx:select(2,3):fill(7) -- fill column 3 with 7\n> x\n 0  0  7  0\n 0  0  7  0\n 0  0  7  0\n[torch.DoubleTensor of dimension 3x4]\n\ny = x:transpose(1,2) -- swap dimension 1 and 2\n> y\n 0  0  0\n 0  0  0\n 7  7  7\n 0  0  0\n[torch.DoubleTensor of dimension 4x3]\n\ny:select(2, 3):fill(8) -- fill column 3 with 8\n> y\n 0  0  8\n 0  0  8\n 7  7  8\n 0  0  8\n[torch.DoubleTensor of dimension 4x3]\n\n> x -- contents of x have changed as well\n 0  0  7  0\n 0  0  7  0\n 8  8  8  8\n[torch.DoubleTensor of dimension 3x4]\n```\n\n\n<a name=\"torch.Tensor.t\"></a>\n### [Tensor] t() ###\n\nConvenience method of [transpose()](#torch.Tensor.transpose) for 2D\ntensors. The given tensor must be 2 dimensional. Swap dimensions 1 and 2.\n```lua\nx = torch.Tensor(3,4):zero()\nx:select(2,3):fill(7)\ny = x:t()\n> y\n 0  0  0\n 0  0  0\n 7  7  7\n 0  0  0\n[torch.DoubleTensor of dimension 4x3]\n\n> x\n 0  0  7  0\n 0  0  7  0\n 0  0  7  0\n[torch.DoubleTensor of dimension 3x4]\n```\n\n\n<a name=\"torch.Tensor.permute\"></a>\n### [Tensor] permute(dim1, dim2, ..., dimn) ###\n\nGeneralizes the function [transpose()](#torch.Tensor.transpose) and can be used\nas a convenience method replacing a sequence of transpose() calls.\nReturns a tensor where the dimensions were permuted according to the permutation\ngiven by (dim1, dim2, ... , dimn). The permutation must be specified fully, i.e.\nthere must be as many parameters as the tensor has dimensions.\n```lua\nx = torch.Tensor(3,4,2,5)\n> x:size()\n 3\n 4\n 2\n 5\n[torch.LongStorage of size 4]\n\ny = x:permute(2,3,1,4) -- equivalent to y = x:transpose(1,3):transpose(1,2)\n> y:size()\n 4\n 2\n 3\n 5\n[torch.LongStorage of size 4]\n\n```\n\n\n<a name=\"torch.Tensor.unfold\"></a>\n### [Tensor] unfold(dim, size, step) ###\n\nReturns a tensor which contains all slices of size `size` in the dimension `dim`. Step between\ntwo slices is given by `step`.\n\nIf `sizedim` is the original size of dimension `dim`, the size of dimension\n`dim` in the returned tensor will be `(sizedim - size) / step + 1`\n\nAn additional dimension of size `size` is appended in the returned tensor.\n\n```lua\nx = torch.Tensor(7)\nfor i=1,7 do x[i] = i end\n> x\n 1\n 2\n 3\n 4\n 5\n 6\n 7\n[torch.DoubleTensor of dimension 7]\n\n> x:unfold(1, 2, 1)\n 1  2\n 2  3\n 3  4\n 4  5\n 5  6\n 6  7\n[torch.DoubleTensor of dimension 6x2]\n\n> x:unfold(1, 2, 2)\n 1  2\n 3  4\n 5  6\n[torch.DoubleTensor of dimension 3x2]\n```\n\n## Applying a function to a tensor ##\n\nThese functions apply a function to each element of the tensor on which called the\nmethod (self). These methods are much faster than using a `for`\nloop in `Lua`. The results are stored in `self` (if the function returns\nsomething).\n\n<a name=\"torch.Tensor.apply\"></a>\n### [self] apply(function) ###\n\nApply the given function to all elements of self.\n\nThe function takes a number (the current element of the tensor) and might return\na number, in which case it will be stored in self.\n\nExamples:\n```lua\ni = 0\nz = torch.Tensor(3,3)\nz:apply(function(x)\n  i = i + 1\n  return i\nend) -- fill up the tensor\n> z\n 1  2  3\n 4  5  6\n 7  8  9\n[torch.DoubleTensor of dimension 3x3]\n\nz:apply(math.sin) -- apply the sin function\n> z\n 0.8415  0.9093  0.1411\n-0.7568 -0.9589 -0.2794\n 0.6570  0.9894  0.4121\n[torch.DoubleTensor of dimension 3x3]\n\nsum = 0\nz:apply(function(x)\n  sum = sum + x\nend) -- compute the sum of the elements\n> sum\n1.9552094821074\n\n> z:sum() -- it is indeed correct!\n1.9552094821074\n```\n\n<a name=\"torch.Tensor.map\"></a>\n### [self] map(tensor, function(xs, xt)) ###\n\nApply the given function to all elements of self and `tensor`. The number of elements of both tensors\nmust match, but sizes do not matter.\n\nThe function takes two numbers (the current element of self and `tensor`) and might return\na number, in which case it will be stored in self.\n\nExample:\n```lua\nx = torch.Tensor(3,3)\ny = torch.Tensor(9)\ni = 0\nx:apply(function() i = i + 1; return i end) -- fill-up x\ni = 0\ny:apply(function() i = i + 1; return i end) -- fill-up y\n> x\n 1  2  3\n 4  5  6\n 7  8  9\n[torch.DoubleTensor of dimension 3x3]\n\n> y\n 1\n 2\n 3\n 4\n 5\n 6\n 7\n 8\n 9\n[torch.DoubleTensor of dimension 9]\n\nx:map(y, function(xx, yy) return xx*yy end) -- element-wise multiplication\n> x\n  1   4   9\n 16  25  36\n 49  64  81\n[torch.DoubleTensor of dimension 3x3]\n```\n\n<a name=\"torch.Tensor.map2\"></a>\n### [self] map2(tensor1, tensor2, function(x, xt1, xt2)) ###\n\nApply the given function to all elements of self, `tensor1` and `tensor2`. The number of elements of all tensors\nmust match, but sizes do not matter.\n\nThe function takes three numbers (the current element of self, `tensor1` and `tensor2`) and might return\na number, in which case it will be stored in self.\n\nExample:\n```lua\nx = torch.Tensor(3,3)\ny = torch.Tensor(9)\nz = torch.Tensor(3,3)\n\ni = 0; x:apply(function() i = i + 1; return math.cos(i)*math.cos(i) end)\ni = 0; y:apply(function() i = i + 1; return i end)\ni = 0; z:apply(function() i = i + 1; return i end)\n\n> x\n 0.2919  0.1732  0.9801\n 0.4272  0.0805  0.9219\n 0.5684  0.0212  0.8302\n[torch.DoubleTensor of dimension 3x3]\n\n> y\n 1\n 2\n 3\n 4\n 5\n 6\n 7\n 8\n 9\n[torch.DoubleTensor of dimension 9]\n\n> z\n 1  2  3\n 4  5  6\n 7  8  9\n[torch.DoubleTensor of dimension 3x3]\n\nx:map2(y, z, function(xx, yy, zz) return xx+yy*zz end)\n> x\n  1.2919   4.1732   9.9801\n 16.4272  25.0805  36.9219\n 49.5684  64.0212  81.8302\n[torch.DoubleTensor of dimension 3x3]\n```\n\n\n## Dividing a tensor into a table of tensors ##\n\nThese functions divide a Tensor into a table of Tensors.\n\n<a name=\"torch.split\"></a>\n### [result] split([result,] tensor, size, [dim]) ###\n\nSplits Tensor `tensor` along dimension `dim`\ninto a `result` table of Tensors of size `size` (a number)\nor less (in the case of the last Tensor). The sizes of the non-`dim`\ndimensions remain unchanged. Internally, a series of\n[narrows](#torch.Tensor.narrow) are performed along\ndimensions `dim`. Argument `dim` defaults to 1.\n\nIf `result` is not passed, then a new table is returned, otherwise it\nis emptied and reused.\n\nExample:\n```lua\nx = torch.randn(3,4,5)\n\n> x:split(2,1)\n{\n  1 : DoubleTensor - size: 2x4x5\n  2 : DoubleTensor - size: 1x4x5\n}\n\n> x:split(3,2)\n{\n  1 : DoubleTensor - size: 3x3x5\n  2 : DoubleTensor - size: 3x1x5\n}\n\n> x:split(2,3)\n{\n  1 : DoubleTensor - size: 3x4x2\n  2 : DoubleTensor - size: 3x4x2\n  3 : DoubleTensor - size: 3x4x1\n}\n```\n\n\n<a name=\"torch.chunk\"></a>\n### [result] chunk([result,] tensor, n, [dim]) ###\n\nSplits Tensor `tensor` into `n` chunks of approximately equal size along\ndimensions `dim` and returns these as a `result` table of Tensors.\nArgument `dim` defaults to 1.\n\nThis function uses [split](#torch.split) internally:\n```lua\ntorch.split(result, tensor, math.ceil(tensor:size(dim)/n), dim)\n```\n\nExample:\n```lua\nx = torch.randn(3,4,5)\n\n> x:chunk(2,1)\n{\n  1 : DoubleTensor - size: 2x4x5\n  2 : DoubleTensor - size: 1x4x5\n}\n\n> x:chunk(2,2)\n{\n  1 : DoubleTensor - size: 3x2x5\n  2 : DoubleTensor - size: 3x2x5\n}\n\n> x:chunk(2,3)\n{\n  1 : DoubleTensor - size: 3x4x3\n  2 : DoubleTensor - size: 3x4x2\n}\n```\n\n## LuaJIT FFI access ##\nThese functions expose Torch's Tensor and Storage data structures, through\n[LuaJIT FFI](http://luajit.org/ext_ffi_api.html).\nThis allows extremely fast access to Tensors and Storages, all from Lua.\n\n<a name=\"torch.data\"></a>\n### [result] data(tensor, [asnumber]) ###\n\nReturns a LuaJIT FFI pointer to the raw data of the tensor.\nIf `asnumber` is true, then returns the pointer as a `intptr_t` cdata\nthat you can transform to a plain lua number with `tonumber()`.\n\nAccessing the raw data of a Tensor like this is extremely efficient, in fact, it's\nalmost as fast as C in lots of cases.\n\nExample:\n```lua\nt = torch.randn(3,2)\n> t\n 0.8008 -0.6103\n 0.6473 -0.1870\n-0.0023 -0.4902\n[torch.DoubleTensor of dimension 3x2]\n\nt_data = torch.data(t)\nfor i = 0,t:nElement()-1 do t_data[i] = 0 end\n> t\n 0 0\n 0 0\n 0 0\n[torch.DoubleTensor of dimension 3x2]\n```\n\nWARNING: bear in mind that accessing the raw data like this is dangerous, and should\nonly be done on contiguous tensors (if a tensor is not contiguous, then you have to\nuse its size and stride information). Making sure a tensor is contiguous is easy:\n```lua\nt = torch.randn(3,2)\nt_noncontiguous = t:transpose(1,2)\n\n-- it would be unsafe to work with torch.data(t_noncontiguous)\nt_transposed_and_contiguous = t_noncontiguous:contiguous()\n\n-- it is now safe to work with the raw pointer\ndata = torch.data(t_transposed_and_contiguous)\n```\n\nLast, the pointer can be returned as a plain `intptr_t` cdata. This can be useful\nto share pointers between threads (warning: this is dangerous, as the second\ntensor doesn't increment the reference counter on the storage. If the first tensor\ngets freed, then the data of the second tensor becomes a dangling pointer):\n\n```lua\nt = torch.randn(10)\np = tonumber(torch.data(t,true))\ns = torch.Storage(10, p)\ntt = torch.Tensor(s)\n-- tt and t are a view on the same data.\n```\n\n<a name=\"torch.cdata\"></a>\n### [result] cdata(tensor, [asnumber]) ###\n\nReturns a LuaJIT FFI pointer to the C structure of the tensor.\nUse this with caution, and look at [FFI.lua](https://github.com/torch/torch7/blob/master/FFI.lua)\nfor the members of the tensor\n\n## Reference counting ##\n\nTensors are reference-counted. It means that each time an object (C or the\nLua state) need to keep a reference over a tensor, the corresponding\ntensor reference counter will be [increased](#torch.Tensor.retain). The\nreference counter is [decreased]((#torch.Tensor.free)) when the object\ndoes not need the tensor anymore.\n\nThese methods should be used with extreme care. In general, they should\nnever be called, except if you know what you are doing, as the handling of\nreferences is done automatically. They can be useful in threaded\nenvironments. Note that these methods are atomic operations.\n\n<a name=\"torch.Tensor.retain\"></a>\n### retain() ###\n\nIncrement the reference counter of the tensor.\n\n<a name=\"torch.Tensor.free\"></a>\n### free() ###\n\nDecrement the reference counter of the tensor. Free the tensor if the\ncounter is at 0.\n"
  },
  {
    "path": "doc/tester.md",
    "content": "<a name=\"torch.Tester.dok\"></a>\n# Tester #\n\nThis class provides a generic unit testing framework. It is already\nbeing used in [nn](../index.md) package to verify the correctness of classes.\n\nThe framework is generally used as follows.\n\n```lua\nlocal mytest = torch.TestSuite()\n\nlocal tester = torch.Tester()\n\nfunction mytest.testA()\n   local a = torch.Tensor{1, 2, 3}\n   local b = torch.Tensor{1, 2, 4}\n   tester:eq(a, b, \"a and b should be equal\")\nend\n\nfunction mytest.testB()\n   local a = {2, torch.Tensor{1, 2, 2}}\n   local b = {2, torch.Tensor{1, 2, 2.001}}\n   tester:eq(a, b, 0.01, \"a and b should be approximately equal\")\nend\n\nfunction mytest.testC()\n   local function myfunc()\n      return \"hello \" .. world\n   end\n   tester:assertNoError(myfunc, \"myfunc shouldn't give an error\")\nend\n\ntester:add(mytest)\ntester:run()\n```\n\nRunning this code will report two test failures (and one test success).\nGenerally it is  better to put a single test case in each test function unless\nseveral very related test cases exist.\nThe error report includes the message and line number of the error.\n\n```\nRunning 3 tests\n1/3 testB ............................................................... [PASS]\n2/3 testA ............................................................... [FAIL]\n3/3 testC ............................................................... [FAIL]\nCompleted 3 asserts in 3 tests with 2 failures and 0 errors\n--------------------------------------------------------------------------------\ntestA\na and b should be equal\nTensorEQ(==) violation: max diff=1, tolerance=0\nstack traceback:\n        ./test.lua:8: in function <./test.lua:5>\n\n--------------------------------------------------------------------------------\ntestC\nmyfunc shouldn't give an error\nERROR violation: err=./test.lua:19: attempt to concatenate global 'world' (a nil value)\nstack traceback:\n        ./test.lua:21: in function <./test.lua:17>\n\n--------------------------------------------------------------------------------\ntorch/torch/Tester.lua:383: An error was found while running tests!\nstack traceback:\n        [C]: in function 'assert'\n        torch/torch/Tester.lua:383: in function 'run'\n        ./test.lua:25: in main chunk\n```\n\nHistorically, Tester has supported a variety of equality checks\n([asserteq](#torch.Tester.asserteq),\n[assertalmosteq](#torch.Tester.assertalmosteq),\n[assertTensorEq](#torch.Tester.assertTensorEq),\n[assertTableEq](#torch.Tester.assertTableEq), and their negations). In general\nhowever, you should just use [eq](#torch.Tester.eq) (or its negation\n[ne](#torch.Tester.ne)).  These functions do deep checking of many object types\nincluding recursive tables and tensors, and support a\ntolerance parameter for comparing numerical values (including tensors).\n\nMany of the tester functions accept both an optional `tolerance` parameter and a\n`message` to display if the test case fails. For both convenience and backwards\ncompatibility, these arguments can be supplied in either order.\n\n<a name=\"torch.Tester\"></a>\n### torch.Tester() ###\n\nReturns a new instance of `torch.Tester` class.\n\n<a name=\"torch.Tester.add\"></a>\n### add(f, 'name') ###\n\nAdds `f`, either a test function or a table of test functions, to the tester.\n\nIf `f` is a function then names should be unique. There are a couple of special\nvalues for `name`: if it is `_setUp` or `_tearDown`, then the function will be\ncalled either *before* or *after* every test respectively, with the name of the\ntest passed as a parameter.\n\nIf `f` is a table then `name` should be nil, and the names of the individual\ntests in the table will be taken from the corresponding table key. It's\nrecommended you use [TestSuite](#torch.TestSuite.dok) for tables of tests.\n\nReturns the torch.Tester instance.\n\n<a name=\"torch.Tester.run\"></a>\n### run(testNames) ###\n\nRuns tests that have been added by [add(f, 'name')](#torch.Tester.add).\nWhile running it reports progress, and at the end gives a summary of all errors.\n\nIf a list of names `testNames` is passed, then all tests matching these names\n(using `string.match`) will be run; otherwise all tests will be run.\n\n```lua\ntester:run() -- runs all tests\ntester:run(\"test1\") -- runs the test named \"test1\"\ntester:run({\"test2\", \"test3\"}) -- runs the tests named \"test2\" and \"test3\"\n```\n\n<a name=\"torch.Tester.disable\"></a>\n### disable(testNames) ###\n\nPrevents the given tests from running, where `testNames` can be a single string\nor list of strings. More precisely, when [run](#torch.Tester.run)\nis invoked, it will skip these tests, while still printing out an indication of\nskipped tests. This is useful for temporarily disabling tests without\ncommenting out the code (for example, if they depend on upstream code that is\ncurrently broken), and explicitly flagging them as skipped.\n\nReturns the torch.Tester instance.\n\n```lua\nlocal tester = torch.Tester()\nlocal tests = torch.TestSuite()\n\nfunction tests.brokenTest()\n  -- ...\nend\n\ntester:add(tests):disable('brokenTest'):run()\n```\n\n```\nRunning 1 test\n1/1 brokenTest .......................................................... [SKIP]\nCompleted 0 asserts in 1 test with 0 failures and 0 errors and 1 disabled\n```\n\n<a name=\"torch.Tester.assert\"></a>\n### assert(condition [, message]) ###\n\nChecks that `condition` is true (using the optional `message` if the test\nfails).\nReturns whether the test passed.\n\n<a name=\"torch.Tester.assertGeneralEq\"></a>\n### assertGeneralEq(got, expected [, tolerance] [, message]) ###\n\nGeneral equality check between numbers, tables, strings, `torch.Tensor`\nobjects, `torch.Storage` objects, etc.\n\nChecks that `got` and `expected` have the same contents, where tables are\ncompared recursively, tensors and storages are compared elementwise, and numbers\nare compared within `tolerance` (default value `0`). Other types are compared by\nstrict equality. The optional `message` is used if the test fails.\nReturns whether the test passed.\n\n<a name=\"torch.Tester.eq\"></a>\n### eq(got, expected  [, tolerance] [, message]) ###\n\nConvenience function; does the same as\n[assertGeneralEq](#torch.Tester.assertGeneralEq).\n\n<a name=\"torch.Tester.assertGeneralNe\"></a>\n### assertGeneralNe(got, unexpected  [, tolerance] [, message]) ###\n\nGeneral inequality check between numbers, tables, strings, `torch.Tensor`\nobjects, `torch.Storage` objects, etc.\n\nChecks that `got` and `unexpected` have different contents, where tables are\ncompared recursively, tensors and storages are compared elementwise, and numbers\nare compared within `tolerance` (default value `0`). Other types are compared by\nstrict equality. The optional `message` is used if the test fails.\nReturns whether the test passed.\n\n<a name=\"torch.Tester.ne\"></a>\n### ne(got, unexpected  [, tolerance] [, message]) ###\n\nConvenience function; does the same as\n[assertGeneralNe](#torch.Tester.assertGeneralNe).\n\n<a name=\"torch.Tester.assertlt\"></a>\n### assertlt(a, b [, message]) ###\n\nChecks that `a < b` (using the optional `message` if the test fails),\nwhere `a` and `b` are numbers.\nReturns whether the test passed.\n\n<a name=\"torch.Tester.assertgt\"></a>\n### assertgt(a, b [, message]) ###\n\nChecks that `a > b` (using the optional `message` if the test fails),\nwhere `a` and `b` are numbers.\nReturns whether the test passed.\n\n<a name=\"torch.Tester.assertle\"></a>\n### assertle(a, b [, message]) ###\n\nChecks that `a <= b` (using the optional `message` if the test fails),\nwhere `a` and `b` are numbers.\nReturns whether the test passed.\n\n<a name=\"torch.Tester.assertge\"></a>\n### assertge(a, b [, message]) ###\n\nChecks that `a >= b` (using the optional `message` if the test fails),\nwhere `a` and `b` are numbers.\nReturns whether the test passed.\n\n<a name=\"torch.Tester.asserteq\"></a>\n### asserteq(a, b [, message]) ###\n\nChecks that `a == b` (using the optional `message` if the test fails).\nNote that this uses the generic lua equality check, so objects such as tensors\nthat have the same content but are distinct objects will fail this test;\nconsider using [assertGeneralEq()](#torch.Tester.assertGeneralEq) instead.\nReturns whether the test passed.\n\n<a name=\"torch.Tester.assertne\"></a>\n### assertne(a, b [, message]) ###\n\nChecks that `a ~= b` (using the optional `message` if the test fails).\nNote that this uses the generic lua inequality check, so objects such as tensors\nthat have the same content but are distinct objects will pass this test;\nconsider using [assertGeneralNe()](#torch.Tester.assertGeneralNe) instead.\nReturns whether the test passed.\n\n<a name=\"torch.Tester.assertalmosteq\"></a>\n### assertalmosteq(a, b [, tolerance] [, message]) ###\n\nChecks that `|a - b| <= tolerance` (using the optional `message` if the\ntest fails), where `a` and `b` are numbers, and `tolerance` is an optional\nnumber (default `1e-16`).\nReturns whether the test passed.\n\n<a name=\"torch.Tester.assertTensorEq\"></a>\n### assertTensorEq(ta, tb [, tolerance] [, message]) ###\n\nChecks that `max(abs(ta - tb)) <= tolerance` (using the optional `message`\nif the test fails), where `ta` and `tb` are tensors, and `tolerance` is an\noptional number (default `1e-16`). Tensors that are different types or sizes\nwill cause this check to fail.\nReturns whether the test passed.\n\n<a name=\"torch.Tester.assertTensorNe\"></a>\n### assertTensorNe(ta, tb [, tolerance] [, message]) ###\n\nChecks that `max(abs(ta - tb)) > tolerance` (using the optional `message`\nif the test fails), where `ta` and `tb` are tensors, and `tolerance` is an\noptional number (default `1e-16`). Tensors that are different types or sizes\nwill cause this check to pass.\nReturns whether the test passed.\n\n<a name=\"torch.Tester.assertTableEq\"></a>\n### assertTableEq(ta, tb [, tolerance] [, message]) ###\n\nChecks that the two tables have the same contents, comparing them\nrecursively, where objects such as tensors are compared using their contents.\nNumbers (such as those appearing in tensors) are considered equal if\ntheir difference is at most the given tolerance.\n\n<a name=\"torch.Tester.assertTableNe\"></a>\n### assertTableNe(ta, tb [, tolerance] [, message]) ###\n\nChecks that the two tables have distinct contents, comparing them\nrecursively, where objects such as tensors are compared using their contents.\nNumbers (such as those appearing in tensors) are considered equal if\ntheir difference is at most the given tolerance.\n\n<a name=\"torch.Tester.assertError\"></a>\n### assertError(f [, message]) ###\n\nChecks that calling `f()` (via `pcall`) raises an error (using the\noptional `message` if the test fails).\nReturns whether the test passed.\n\n<a name=\"torch.Tester.assertNoError\"></a>\n### assertNoError(f [, message]) ###\n\nCheck that calling `f()` (via `pcall`) does not raise an error (using the\noptional `message` if the test fails).\nReturns whether the test passed.\n\n<a name=\"torch.Tester.assertErrorMsg\"></a>\n### assertErrorMsg(f, errmsg [, message]) ###\n\nChecks that calling `f()` (via `pcall`) raises an error with the specific error\nmessage `errmsg` (using the optional `message` if the test fails).\nReturns whether the test passed.\n\n<a name=\"torch.Tester.assertErrorPattern\"></a>\n### assertErrorPattern(f, errPattern [, message]) ###\n\nChecks that calling `f()` (via `pcall`) raises an error matching `errPattern`\n(using the optional `message` if the test fails).\nThe matching is done using `string.find`; in particular substrings will match.\nReturns whether the test passed.\n\n<a name=\"torch.Tester.assertErrorObj\"></a>\n### assertErrorObj(f, errcomp [, message]) ###\n\nChecks that calling `f()` (via `pcall`) raises an error object `err` such that\ncalling `errcomp(err)` returns true (using the optional `message` if the test\nfails).\nReturns whether the test passed.\n\n<a name=\"torch.Tester.setEarlyAbort\"></a>\n### setEarlyAbort(earlyAbort) ###\n\nIf `earlyAbort == true` then the testing will stop on the first test failure.\nBy default this is off.\n\n<a name=\"torch.Tester.setRethrowErrors\"></a>\n### setRethrowErrors(rethrowErrors) ###\n\nIf `rethrowErrors == true` then lua errors encountered during the execution of\nthe tests will be rethrown, instead of being caught by the tester.\nBy default this is off.\n\n<a name=\"torch.Tester.setSummaryOnly\"></a>\n### setSummaryOnly(summaryOnly) ###\n\nIf `summaryOnly == true`, then only the pass / fail status of the tests will be\nprinted out, rather than full error messages. By default, this is off.\n\n\n<a name=\"torch.TestSuite.dok\"></a>\n# TestSuite #\n\nA TestSuite is used in conjunction with [Tester](#torch.Tester.dok). It is\ncreated via `torch.TestSuite()`, and behaves like a plain lua table,\nexcept that it also checks that duplicate tests are not created.\nIt is recommended that you always use a TestSuite instead of a plain table for\nyour tests.\n\nThe following example code attempts to add a function with the same name\ntwice to a TestSuite (a surprisingly common mistake), which gives an error.\n\n```lua\n> test = torch.TestSuite()\n>\n> function test.myTest()\n>    -- ...\n> end\n>\n> -- ...\n>\n> function test.myTest()\n>    -- ...\n> end\ntorch/TestSuite.lua:16: Test myTest is already defined.\n```\n\n"
  },
  {
    "path": "doc/timer.md",
    "content": "<a name=\"torch.Timer.dok\"></a>\n# Timer #\n\nThis class is able to measure time (in seconds) elapsed in a particular period. Example:\n```lua\n  timer = torch.Timer() -- the Timer starts to count now\n  x = 0\n  for i=1,1000000 do\n    x = x + math.sin(x)\n  end\n  print('Time elapsed for 1,000,000 sin: ' .. timer:time().real .. ' seconds')\n```\n\n<a name=\"torch.Timer\"></a>\n## Timer Class Constructor and Methods ##\n\n<a name=\"torch.Timer\"></a>\n### torch.Timer() ###\n\nReturns a new `Timer`. The timer starts to count the time now.\n\n<a name=\"torch.Timer.reset\"></a>\n### [self] reset() ###\n\nResets the timer accumulated time to `0`. If the timer was running, the timer\nrestarts to count the time now. If the timer was stopped, it stays stopped.\n\n<a name=\"torch.Timer.resume\"></a>\n### [self] resume() ###\n\nResumes a stopped timer. The timer restarts to count the time, and addition\nthe accumulated time with the time already counted before being stopped.\n\n<a name=\"torch.Timer.stop\"></a>\n### [self] stop() ###\n\nStops the timer. The accumulated time counted until now is stored.\n\n<a name=\"torch.Timer.time\"></a>\n### [table] time() ###\n\nReturns a table reporting the accumulated time elapsed until now. Following the UNIX shell `time` command,\nthere are three fields in the table:\n  * `real`: the wall-clock elapsed time.\n  * `user`: the elapsed CPU time. Note that the CPU time of a threaded program sums time spent in all threads.\n  * `sys`: the time spent in system usage.\n\n"
  },
  {
    "path": "doc/utility.md",
    "content": "<a name=\"torch.utility.dok\"></a>\n# Torch utility functions #\n\nThese functions are used in all Torch package for creating and handling classes.\nThe most interesting function is probably [`torch.class()`](#torch.class) which allows\nthe user to create easily new classes. [`torch.typename()`](#torch.typename) might\nalso be interesting to check what is the class of a given *Torch7* object.\n\nThe other functions are for more advanced users.\n\n\n<a name=\"torch.class\"></a>\n### [metatable] torch.class(name, [parentName], [module]) ###\n\nCreates a new `Torch` class called `name`. If `parentName` is provided, the class will inherit\n`parentName` methods. A class is a table which has a particular metatable.\n\nIf `module` is not provided and if `name` is of the form\n`package.className` then the class `className` will be added to the\nspecified `package`. In that case, `package` has to be a valid (and\nalready loaded) package. If `name` does not contain any `.`, then the class\nwill be defined in the global environment.\n\nIf `module` is provided table, the class will be defined in this table at\nkey `className`.\n\nOne \\[or two\\] (meta)tables are returned. These tables contain all the method\nprovided by the class [and its parent class if it has been provided]. After\na call to `torch.class()` you have to fill-up properly the metatable.\n\nAfter the class definition is complete, constructing a new class `name` will be achieved by a call to `name()`.\nThis call will first call the method ```lua__init()``` if it exists, passing all arguments of `name()`.\n\n```lua\n-- for naming convenience\ndo\n   --- creates a class \"Foo\"\n   local Foo = torch.class('Foo')\n\n   --- the initializer\n   function Foo:__init()\n      self.contents = 'this is some text'\n   end\n\n   --- a method\n   function Foo:print()\n      print(self.contents)\n   end\n\n   --- another one\n   function Foo:bip()\n      print('bip')\n   end\n\nend\n\n--- now create an instance of Foo\nfoo = Foo()\n\n--- try it out\nfoo:print()\n\n--- create a class torch.Bar which\n--- inherits from Foo\ndo\n   local Bar, parent = torch.class('torch.Bar', 'Foo')\n\n   --- the initializer\n   function Bar:__init(stuff)\n      --- call the parent initializer on ourself\n      parent.__init(self)\n\n      --- do some stuff\n      self.stuff = stuff\n   end\n\n   --- a new method\n   function Bar:boing()\n      print('boing!')\n   end\n\n   --- override parent's method\n   function Bar:print()\n      print(self.contents)\n      print(self.stuff)\n   end\nend\n\n--- create a new instance and use it\nbar = torch.Bar('ha ha!')\nbar:print() -- overrided method\nbar:boing() -- child method\nbar:bip()   -- parent's method\n```\n\nFor advanced users, it is worth mentionning that `torch.class()` actually\ncalls [`torch.newmetatable()`](#torch.newmetatable) with a particular\nconstructor. The constructor creates a Lua table and set the right\nmetatable on it, and then calls ```lua__init()``` if it exists in the\nmetatable. It also sets a [factory](#torch.factory) field ```lua__factory``` such that it\nis possible to create an empty object of this class.\n\n\n<a name=\"torch.type\"></a>\n### [string] torch.type(object) ###\n\nChecks if `object` has a metatable. If it does, and if it corresponds to a\n`Torch` class, then returns a string containing the name of the\nclass. Otherwise, it returns the Lua `type(object)` of the object.\nUnlike [`torch.typename()`](#torch.typename), all outputs are strings:\n\n```lua\n> torch.type(torch.Tensor())\ntorch.DoubleTensor\n> torch.type({})\ntable\n> torch.type(7)\nnumber\n```\n\n\n<a name=\"torch.typename\"></a>\n### [string] torch.typename(object) ###\n\nChecks if `object` has a metatable. If it does, and if it corresponds to a\n`Torch` class, then returns a string containing the name of the\nclass. Returns `nil` in any other cases.\n\n```lua\n> torch.typename(torch.Tensor())\ntorch.DoubleTensor\n> torch.typename({})\n\n> torch.typename(7)\n\n```\n\nA Torch class is a class created with [`torch.class()`](#torch.class) or\n[`torch.newmetatable()`](#torch.newmetatable).\n\n\n<a name=\"torch.typename2id\"></a>\n### [userdata] torch.typename2id(string) ###\n\nGiven a Torch class name specified by `string`, returns a unique\ncorresponding id (defined by a `lightuserdata` pointing on the internal\nstructure of the class). This might be useful to do a *fast* check of the\nclass of an object (if used with [`torch.id()`](#torch.id)), avoiding string\ncomparisons.\n\nReturns `nil` if `string` does not specify a Torch object.\n\n\n<a name=\"torch.id\"></a>\n### [userdata] torch.id(object) ###\n\nReturns a unique id corresponding to the `class` of the given *Torch7* object.\nThe id is defined by a `lightuserdata` pointing on the internal structure\nof the class.\n\nReturns `nil` if `object` is not a Torch object.\n\nThis is different from the `object` id returned by [`torch.pointer()`](#torch.pointer).\n\n\n<a name=\"torch.isTypeOf\"></a>\n### [boolean] isTypeOf(object, typeSpec) ###\n\nChecks if a given `object` is an instance of the type specified by `typeSpec`.\n`typeSpec` can be a string (including a `string.find` pattern) or the constructor\nobject for a Torch class. This function traverses up the class hierarchy,\nso if b is an instance of B which is a subclass of A, then\n`torch.isTypeOf(b, B)` and `torch.isTypeOf(b, A)` will both return `true`.\n\n\n<a name=\"torch.newmetatable\"></a>\n### [table] torch.newmetatable(name, parentName, constructor) ###\n\nRegister a new metatable as a Torch type with the given string `name`. The new metatable is returned.\n\nIf the string `parentName` is not `nil` and is a valid Torch type (previously created\nby `torch.newmetatable()`) then set the corresponding metatable as a metatable to the returned new\nmetatable.\n\nIf the given `constructor` function is not `nil`, then assign to the variable `name` the given constructor.\nThe given `name` might be of the form `package.className`, in which case the `className` will be local to the\nspecified `package`. In that case, `package` must be a valid and already loaded package.\n\n\n<a name=\"torch.factory\"></a>\n### [function] torch.factory(name) ###\n\nReturns the factory function of the Torch class `name`. If the class name is invalid or if the class\nhas no factory, then returns `nil`.\n\nA Torch class is a class created with [`torch.class()`](#torch.class) or\n[`torch.newmetatable()`](#torch.newmetatable).\n\nA factory function is able to return a new (empty) object of its corresponding class. This is helpful for\n[object serialization](file.md#torch.File.serialization).\n\n\n<a name=\"torch.getmetatable\"></a>\n### [table] torch.getmetatable(string) ###\n\nGiven a `string`, returns a metatable corresponding to the Torch class described\nby `string`. Returns `nil` if the class does not exist.\n\nA Torch class is a class created with [`torch.class()`](#torch.class) or\n[`torch.newmetatable()`](#torch.newmetatable).\n\nExample:\n\n```lua\n> for k, v in pairs(torch.getmetatable('torch.CharStorage')) do print(k, v) end\n\n__index__       function: 0x1a4ba80\n__typename      torch.CharStorage\nwrite           function: 0x1a49cc0\n__tostring__    function: 0x1a586e0\n__newindex__    function: 0x1a4ba40\nstring          function: 0x1a4d860\n__version       1\nread            function: 0x1a4d840\ncopy            function: 0x1a49c80\n__len__         function: 0x1a37440\nfill            function: 0x1a375c0\nresize          function: 0x1a37580\n__index         table: 0x1a4a080\nsize            function: 0x1a4ba20\n```\n\n\n<a name=\"torch.isequal\"></a>\n### [boolean] torch.isequal(object1, object2) ###\n\nIf the two objects given as arguments are *Lua* tables (or *Torch7* objects), then returns `true` if and only if the\ntables (or Torch objects) have the same address in memory. Returns `false` in any other cases.\n\nA Torch class is a class created with [`torch.class()`](#TorchClass) or\n[`torch.newmetatable()`](#torch.newmetatable).\n\n\n<a name=\"torch.getdefaulttensortype\"></a>\n### [string] torch.getdefaulttensortype() ###\n\nReturns a string representing the default tensor type currently in use\nby *Torch7*.\n\n\n<a name=\"torch.getenv\"></a>\n### [table] torch.getenv(function or userdata) ###\n\nReturns the Lua `table` environment of the given `function` or the given\n`userdata`.  To know more about environments, please read the documentation\nof [`lua_setfenv()`](http://www.lua.org/manual/5.1/manual.html#lua_setfenv)\nand [`lua_getfenv()`](http://www.lua.org/manual/5.1/manual.html#lua_getfenv).\n\n\n<a name=\"torch.version\"></a>\n### [number] torch.version(object) ###\n\nReturns the field ```lua__version``` of a given object. This might\nbe helpful to handle variations in a class over time.\n\n\n<a name=\"torch.pointer\"></a>\n### [number] torch.pointer(object) ###\n\nReturns a unique id (pointer) of the given `object`, which can be a *Torch7*\nobject, a table, a thread or a function.\n\nThis is different from the `class` id returned by [`torch.id()`](#torch.id).\n\n\n<a name=\"torch.setdefaulttensortype\"></a>\n### torch.setdefaulttensortype([typename]) ###\n\nSets the default tensor type for all the tensors allocated from this\npoint on. Valid types are:\n\n  * `torch.ByteTensor`\n  * `torch.CharTensor`\n  * `torch.ShortTensor`\n  * `torch.IntTensor`\n  * `torch.FloatTensor`\n  * `torch.DoubleTensor`\n\n\n<a name=\"torch.setenv\"></a>\n### torch.setenv(function or userdata, table) ###\n\nAssign `table` as the Lua environment of the given `function` or the given\n`userdata`.  To know more about environments, please read the documentation\nof [`lua_setfenv()`](http://www.lua.org/manual/5.1/manual.html#lua_setfenv)\nand [`lua_getfenv()`](http://www.lua.org/manual/5.1/manual.html#lua_getfenv).\n\n\n<a name=\"torch.setmetatable\"></a>\n### [object] torch.setmetatable(table, classname) ###\n\nSet the metatable of the given `table` to the metatable of the Torch\nobject named `classname`.  This function has to be used with a lot\nof care.\n\n\n<a name=\"torch.getconstructortable\"></a>\n### [table] torch.getconstructortable(string) ###\n\nBUGGY\nReturn the constructor table of the Torch class specified by `string`.\n\n\n<a name=\"torch.totable\"></a>\n### [table] torch.totable(object) ###\n\nConverts a Tensor or a Storage to a lua table. Also available as methods: `tensor:totable()` and `storage:totable()`.\nMultidimensional Tensors are converted to a set of nested tables, matching the shape of the source Tensor.\n\n```lua\n> print(torch.totable(torch.Tensor({1, 2, 3})))\n{\n  1 : 1\n  2 : 2\n  3 : 3\n}\n```\n"
  },
  {
    "path": "general.h",
    "content": "#ifndef TORCH_GENERAL_INC\n#define TORCH_GENERAL_INC\n\n#include <stdlib.h>\n#include <string.h>\n#include <stddef.h>\n\n#include \"luaT.h\"\n#include \"TH.h\"\n\n#if (defined(_MSC_VER) || defined(__MINGW32__))\n\n#define snprintf _snprintf\n#define popen _popen\n#define pclose _pclose\n\n#endif\n\n#if LUA_VERSION_NUM >= 503\n/* one can simply enable LUA_COMPAT_5_2 to be backward compatible.\nHowever, this does not work when we are trying to use system-installed lua,\nhence these redefines\n*/\n#define luaL_optlong(L,n,d)     ((long)luaL_optinteger(L, (n), (d)))\n#define luaL_checklong(L,n)     ((long)luaL_checkinteger(L, (n)))\n#define luaL_checkint(L,n)      ((int)luaL_checkinteger(L, (n)))\n#endif\n\n#endif\n"
  },
  {
    "path": "generic/Storage.c",
    "content": "#ifndef TH_GENERIC_FILE\n#define TH_GENERIC_FILE \"generic/Storage.c\"\n#else\n\n#include \"luaG.h\"\n\nstatic int torch_Storage_(new)(lua_State *L)\n{\n  int index = 1;\n  THStorage *storage;\n  THAllocator *allocator = luaT_toudata(L, index, \"torch.Allocator\");\n  if (allocator) index++;\n\n  if(lua_type(L, index) == LUA_TSTRING)\n  {\n    if (allocator)\n      THError(\"Passing allocator not supported when using file mapping\");\n\n    const char *fileName = luaL_checkstring(L, index);\n    int isShared = 0;\n    if(luaT_optboolean(L, index + 1, 0))\n      isShared = TH_ALLOCATOR_MAPPED_SHARED;\n    ptrdiff_t size = luaL_optinteger(L, index + 2, 0);\n    if (isShared && luaT_optboolean(L, index + 3, 0))\n      isShared = TH_ALLOCATOR_MAPPED_SHAREDMEM;\n    storage = THStorage_(newWithMapping)(fileName, size, isShared);\n  }\n  else if(lua_type(L, index) == LUA_TTABLE)\n  {\n    ptrdiff_t size = lua_objlen(L, index);\n    ptrdiff_t i;\n    if (allocator)\n      storage = THStorage_(newWithAllocator)(size, allocator, NULL);\n    else\n      storage = THStorage_(newWithSize)(size);\n    for(i = 1; i <= size; i++)\n    {\n      lua_rawgeti(L, index, i);\n      if(!lua_isnumber(L, -1))\n      {\n        THStorage_(free)(storage);\n        luaL_error(L, \"element at index %d is not a number\", i);\n      }\n      THStorage_(set)(storage, i-1, LUA_NUMBER_TO_REAL(lua_tonumber(L, -1)));\n      lua_pop(L, 1);\n    }\n  }\n  else if(lua_type(L, index) == LUA_TUSERDATA)\n  {\n    if (allocator)\n      THError(\"Passing allocator not supported when using storage views\");\n\n    THStorage *src = luaT_checkudata(L, index, torch_Storage);\n    real *ptr = src->data;\n    ptrdiff_t offset = luaL_optinteger(L, index + 1, 1) - 1;\n    if (offset < 0 || offset >= src->size) {\n      luaL_error(L, \"offset out of bounds\");\n    }\n    ptrdiff_t size = luaL_optinteger(L, index + 2, src->size - offset);\n    if (size < 1 || size > (src->size - offset)) {\n      luaL_error(L, \"size out of bounds\");\n    }\n    storage = THStorage_(newWithData)(ptr + offset, size);\n    storage->flag = TH_STORAGE_REFCOUNTED | TH_STORAGE_VIEW;\n    storage->view = src;\n    THStorage_(retain)(storage->view);\n  }\n  else if(lua_type(L, index + 1) == LUA_TNUMBER)\n  {\n    ptrdiff_t size = luaL_optinteger(L, index, 0);\n    real *ptr = (real *)luaL_optinteger(L, index + 1, 0);\n    if (allocator)\n      storage = THStorage_(newWithDataAndAllocator)(ptr, size, allocator, NULL);\n    else\n      storage = THStorage_(newWithData)(ptr, size);\n    storage->flag = TH_STORAGE_REFCOUNTED;\n  }\n  else\n  {\n    ptrdiff_t size = luaL_optinteger(L, index, 0);\n    if (allocator)\n      storage = THStorage_(newWithAllocator)(size, allocator, NULL);\n    else\n      storage = THStorage_(newWithSize)(size);\n  }\n  luaT_pushudata(L, storage, torch_Storage);\n  return 1;\n}\n\nstatic int torch_Storage_(retain)(lua_State *L)\n{\n  THStorage *storage = luaT_checkudata(L, 1, torch_Storage);\n  THStorage_(retain)(storage);\n  return 0;\n}\n\nstatic int torch_Storage_(free)(lua_State *L)\n{\n  THStorage *storage = luaT_checkudata(L, 1, torch_Storage);\n  THStorage_(free)(storage);\n  return 0;\n}\n\nstatic int torch_Storage_(resize)(lua_State *L)\n{\n  THStorage *storage = luaT_checkudata(L, 1, torch_Storage);\n  ptrdiff_t size = luaL_checkinteger(L, 2);\n/*  int keepContent = luaT_optboolean(L, 3, 0); */\n  THStorage_(resize)(storage, size);/*, keepContent); */\n  lua_settop(L, 1);\n  return 1;\n}\n\nstatic int torch_Storage_(copy)(lua_State *L)\n{\n  THStorage *storage = luaT_checkudata(L, 1, torch_Storage);\n  void *src;\n  if( (src = luaT_toudata(L, 2, torch_Storage)) )\n    THStorage_(copy)(storage, src);\n  else if( (src = luaT_toudata(L, 2, \"torch.ByteStorage\")) )\n    THStorage_(copyByte)(storage, src);\n  else if( (src = luaT_toudata(L, 2, \"torch.CharStorage\")) )\n    THStorage_(copyChar)(storage, src);\n  else if( (src = luaT_toudata(L, 2, \"torch.ShortStorage\")) )\n    THStorage_(copyShort)(storage, src);\n  else if( (src = luaT_toudata(L, 2, \"torch.IntStorage\")) )\n    THStorage_(copyInt)(storage, src);\n  else if( (src = luaT_toudata(L, 2, \"torch.LongStorage\")) )\n    THStorage_(copyLong)(storage, src);\n  else if( (src = luaT_toudata(L, 2, \"torch.FloatStorage\")) )\n    THStorage_(copyFloat)(storage, src);\n  else if( (src = luaT_toudata(L, 2, \"torch.DoubleStorage\")) )\n    THStorage_(copyDouble)(storage, src);\n  else if( (src = luaT_toudata(L, 2, \"torch.HalfStorage\")) )\n    THStorage_(copyHalf)(storage, src);\n  else\n    luaL_typerror(L, 2, \"torch.*Storage\");\n  lua_settop(L, 1);\n  return 1;\n}\n\nstatic int torch_Storage_(fill)(lua_State *L)\n{\n  THStorage *storage = luaT_checkudata(L, 1, torch_Storage);\n  real value = luaG_(checkreal)(L, 2);\n  THStorage_(fill)(storage, value);\n  lua_settop(L, 1);\n  return 1;\n}\n\nstatic int torch_Storage_(elementSize)(lua_State *L)\n{\n  luaT_pushinteger(L, THStorage_(elementSize)());\n  return 1;\n}\n\nstatic int torch_Storage_(__len__)(lua_State *L)\n{\n  THStorage *storage = luaT_checkudata(L, 1, torch_Storage);\n  luaT_pushinteger(L, storage->size);\n  return 1;\n}\n\nstatic int torch_Storage_(__newindex__)(lua_State *L)\n{\n  if(lua_isnumber(L, 2))\n  {\n    THStorage *storage = luaT_checkudata(L, 1, torch_Storage);\n    ptrdiff_t index = luaL_checkinteger(L, 2) - 1;\n    real number = luaG_(checkreal)(L, 3);\n    THStorage_(set)(storage, index, number);\n    lua_pushboolean(L, 1);\n  }\n  else\n    lua_pushboolean(L, 0);\n\n  return 1;\n}\n\nstatic int torch_Storage_(__index__)(lua_State *L)\n{\n  if(lua_isnumber(L, 2))\n  {\n    THStorage *storage = luaT_checkudata(L, 1, torch_Storage);\n    ptrdiff_t index = luaL_checkinteger(L, 2) - 1;\n    luaG_(pushreal)(L, THStorage_(get)(storage, index));\n    lua_pushboolean(L, 1);\n    return 2;\n  }\n  else\n  {\n    lua_pushboolean(L, 0);\n    return 1;\n  }\n}\n\n#if defined(TH_REAL_IS_CHAR) || defined(TH_REAL_IS_BYTE)\nstatic int torch_Storage_(string)(lua_State *L)\n{\n  THStorage *storage = luaT_checkudata(L, 1, torch_Storage);\n  if(lua_isstring(L, -1))\n  {\n    size_t len = 0;\n    const char *str = lua_tolstring(L, -1, &len);\n    THStorage_(resize)(storage, len);\n    memmove(storage->data, str, len);\n    lua_settop(L, 1);\n  }\n  else\n    lua_pushlstring(L, (char*)storage->data, storage->size);\n\n  return 1; /* either storage or string */\n}\n#endif\n\nstatic int torch_Storage_(totable)(lua_State *L)\n{\n  THStorage *storage = luaT_checkudata(L, 1, torch_Storage);\n  ptrdiff_t i;\n\n  lua_newtable(L);\n  for(i = 0; i < storage->size; i++)\n  {\n    luaG_(pushreal)(L, storage->data[i]);\n    lua_rawseti(L, -2, i+1);\n  }\n  return 1;\n}\n\nstatic int torch_Storage_(factory)(lua_State *L)\n{\n  THStorage *storage = THStorage_(new)();\n  luaT_pushudata(L, storage, torch_Storage);\n  return 1;\n}\n\nstatic int torch_Storage_(write)(lua_State *L)\n{\n  THStorage *storage = luaT_checkudata(L, 1, torch_Storage);\n  THFile *file = luaT_checkudata(L, 2, \"torch.File\");\n\n#ifdef DEBUG\n  THAssert(storage->size < LONG_MAX);\n#endif\n  THFile_writeLongScalar(file, storage->size);\n  THFile_writeRealRaw(file, storage->data, storage->size);\n\n  return 0;\n}\n\nstatic int torch_Storage_(read)(lua_State *L)\n{\n  THStorage *storage = luaT_checkudata(L, 1, torch_Storage);\n  THFile *file = luaT_checkudata(L, 2, \"torch.File\");\n  ptrdiff_t size = THFile_readLongScalar(file);\n\n  THStorage_(resize)(storage, size);\n  THFile_readRealRaw(file, storage->data, storage->size);\n\n  return 0;\n}\n\nstatic const struct luaL_Reg torch_Storage_(_) [] = {\n  {\"retain\", torch_Storage_(retain)},\n  {\"free\", torch_Storage_(free)},\n  {\"size\", torch_Storage_(__len__)},\n  {\"elementSize\", torch_Storage_(elementSize)},\n  {\"__len__\", torch_Storage_(__len__)},\n  {\"__newindex__\", torch_Storage_(__newindex__)},\n  {\"__index__\", torch_Storage_(__index__)},\n  {\"resize\", torch_Storage_(resize)},\n  {\"fill\", torch_Storage_(fill)},\n  {\"copy\", torch_Storage_(copy)},\n  {\"totable\", torch_Storage_(totable)},\n  {\"write\", torch_Storage_(write)},\n  {\"read\", torch_Storage_(read)},\n#if defined(TH_REAL_IS_CHAR) || defined(TH_REAL_IS_BYTE)\n  {\"string\", torch_Storage_(string)},\n#endif\n  {NULL, NULL}\n};\n\nvoid torch_Storage_(init)(lua_State *L)\n{\n  luaT_newmetatable(L, torch_Storage, NULL,\n                    torch_Storage_(new), torch_Storage_(free), torch_Storage_(factory));\n  luaT_setfuncs(L, torch_Storage_(_), 0);\n  lua_pop(L, 1);\n}\n\n#endif\n"
  },
  {
    "path": "generic/Tensor.c",
    "content": "#ifndef TH_GENERIC_FILE\n#define TH_GENERIC_FILE \"generic/Tensor.c\"\n#else\n\n#include \"luaG.h\"\n\nstatic void torch_Tensor_(c_readTensorStorageSizeStride)(lua_State *L, int index, int allowNone, int allowTensor, int allowStorage, int allowStride,\n                                                         THStorage **storage_, ptrdiff_t *storageOffset_, THLongStorage **size_, THLongStorage **stride_);\n\nstatic void torch_Tensor_(c_readSizeStride)(lua_State *L, int index, int allowStride, THLongStorage **size_, THLongStorage **stride_);\n\nstatic int torch_Tensor_(size)(lua_State *L)\n{\n  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);\n  if(lua_isnumber(L,2))\n  {\n    int dim = luaL_checkint(L, 2)-1;\n    THArgCheck(dim >= 0 && dim < tensor->nDimension, 2, \"dimension %d out of range of %dD tensor\",\n        dim+1, THTensor_(nDimension)(tensor));\n    luaT_pushlong(L, tensor->size[dim]);\n  }\n  else\n  {\n    THLongStorage *size = THTensor_(newSizeOf)(tensor);\n    luaT_pushudata(L, size, \"torch.LongStorage\");\n  }\n  return 1;\n}\n\nstatic int torch_Tensor_(elementSize)(lua_State *L)\n{\n  luaT_pushinteger(L, THStorage_(elementSize)());\n  return 1;\n}\n\nstatic int torch_Tensor_(stride)(lua_State *L)\n{\n  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);\n  if(lua_isnumber(L,2))\n  {\n    int dim = luaL_checkint(L, 2)-1;\n    THArgCheck(dim >= 0 && dim < tensor->nDimension, 2, \"dimension %d out of range of %dD tensor\",\n        dim+1, THTensor_(nDimension)(tensor));\n    luaT_pushlong(L, tensor->stride[dim]);\n  }\n  else\n  {\n    THLongStorage *storage = THLongStorage_newWithSize(tensor->nDimension);\n    memmove(storage->data, tensor->stride, sizeof(long)*tensor->nDimension);\n    luaT_pushudata(L, storage, \"torch.LongStorage\");\n  }\n  return 1;\n}\n\nstatic int torch_Tensor_(nDimension)(lua_State *L)\n{\n  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);\n  luaT_pushinteger(L, tensor->nDimension);\n  return 1;\n}\n\nstatic int torch_Tensor_(storage)(lua_State *L)\n{\n  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);\n  if(tensor->storage)\n  {\n    THStorage_(retain)(tensor->storage);\n    luaT_pushudata(L, tensor->storage, torch_Storage);\n  }\n  else\n    lua_pushnil(L);\n\n  return 1;\n}\n\nstatic int torch_Tensor_(storageOffset)(lua_State *L)\n{\n  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);\n  luaT_pushinteger(L, tensor->storageOffset+1);\n  return 1;\n}\n\nstatic int torch_Tensor_(new)(lua_State *L)\n{\n  THTensor *tensor;\n  ptrdiff_t storageOffset;\n  THLongStorage *size, *stride;\n\n  if(lua_type(L, 1) == LUA_TTABLE)\n  {\n    ptrdiff_t i, j;\n    THLongStorage *counter;\n    ptrdiff_t si = 0;\n    int dimension = 0;\n    int is_finished = 0;\n\n    lua_settop(L, 1);\n    size = THLongStorage_new();\n\n    while( (lua_type(L, -1) == LUA_TTABLE) && (lua_objlen(L, -1) > 0) )\n    {\n      THLongStorage_resize(size, dimension+1);\n      size->data[dimension] = lua_objlen(L, -1);\n      dimension++;\n      lua_rawgeti(L, -1, 1);\n    }\n    lua_pop(L, 1);\n\n    counter = THLongStorage_newWithSize(size->size);\n    THLongStorage_fill(counter, 0);\n\n    tensor = THTensor_(newWithSize)(size, NULL);\n\n    if(size->size == 0)\n      is_finished = 1;\n\n    while(!is_finished)\n    {\n      if(!lua_istable(L, -1))\n      {\n        THLongStorage_free(size);\n        THLongStorage_free(counter);\n        THTensor_(free)(tensor);\n        THError(\"invalid tensor definition\");\n      }\n\n      if(lua_objlen(L, -1) != size->data[size->size-1])\n      {\n        THLongStorage_free(size);\n        THLongStorage_free(counter);\n        THTensor_(free)(tensor);\n        THError(\"invalid tensor sizes\");\n      }\n\n      for(i = 0; i < size->data[size->size-1]; i++)\n      {\n        lua_rawgeti(L, -1, i+1);\n        if(!lua_isnumber(L, -1))\n        {\n          THLongStorage_free(size);\n          THLongStorage_free(counter);\n          THTensor_(free)(tensor);\n          THError(\"invalid element (not a number)\");\n        }\n        THStorage_(set)(THTensor_(storage)(tensor), si++, luaG_(checkreal)(L, -1));\n        lua_pop(L, 1);\n      }\n\n      if(size->size == 1)\n        break;\n\n      for(i = size->size-2; i >= 0; i--)\n      {\n        if(++counter->data[i] == size->data[i])\n        {\n          if(i == 0)\n          {\n            is_finished = 1;\n            break;\n          }\n          else\n          {\n            counter->data[i] = 0;\n            lua_pop(L, 1);\n          }\n        }\n        else\n        {\n          lua_pop(L, 1);\n          for(j = i; j < size->size-1; j++)\n          {\n            if(!lua_istable(L, -1))\n            {\n              THLongStorage_free(size);\n              THLongStorage_free(counter);\n              THTensor_(free)(tensor);\n              THError(\"invalid tensor definition\");\n            }\n            if(lua_objlen(L, -1) != size->data[j])\n            {\n              THLongStorage_free(size);\n              THLongStorage_free(counter);\n              THTensor_(free)(tensor);\n              THError(\"invalid tensor sizes\");\n            }\n            lua_rawgeti(L, -1, counter->data[j]+1);\n          }\n          break;\n        }\n      }\n    }\n\n    THLongStorage_free(size);\n    THLongStorage_free(counter);\n  }\n  else\n  {\n    THStorage *storage;\n\n    torch_Tensor_(c_readTensorStorageSizeStride)(L, 1, 1, 1, 1, 1,\n                                                 &storage, &storageOffset, &size, &stride);\n\n    tensor = THTensor_(newWithStorage)(storage, storageOffset, size, stride);\n\n    THLongStorage_free(size);\n    THLongStorage_free(stride);\n  }\n\n  luaT_pushudata(L, tensor, torch_Tensor);\n  return 1;\n}\n\nstatic int torch_Tensor_(set)(lua_State *L)\n{\n  THTensor *self = luaT_checkudata(L, 1, torch_Tensor);\n  THStorage *storage;\n  ptrdiff_t storageOffset;\n  THLongStorage *size, *stride;\n\n  torch_Tensor_(c_readTensorStorageSizeStride)(L, 2, 1, 1, 1, 1,\n                                               &storage, &storageOffset, &size, &stride);\n\n  THTensor_(setStorage)(self, storage, storageOffset, size, stride);\n\n  THLongStorage_free(size);\n  THLongStorage_free(stride);\n\n  lua_settop(L, 1);\n  return 1;\n}\n\nstatic int torch_Tensor_(clone)(lua_State *L)\n{\n  THTensor *self = luaT_checkudata(L, 1, torch_Tensor);\n  self = THTensor_(newClone)(self);\n  luaT_pushudata(L, self, torch_Tensor);\n  return 1;\n}\n\nstatic int torch_Tensor_(contiguous)(lua_State *L)\n{\n  THTensor *self = luaT_checkudata(L, 1, torch_Tensor);\n  self = THTensor_(newContiguous)(self);\n  luaT_pushudata(L, self, torch_Tensor);\n  return 1;\n}\n\n/* Resize */\nstatic int torch_Tensor_(resizeAs)(lua_State *L)\n{\n  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);\n  THTensor *src = luaT_checkudata(L, 2, torch_Tensor);\n  THTensor_(resizeAs)(tensor, src);\n  lua_settop(L, 1);\n  return 1;\n}\n\nstatic int torch_Tensor_(resize)(lua_State *L)\n{\n  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);\n  THLongStorage *size, *stride;\n\n  torch_Tensor_(c_readSizeStride)(L, 2, 0, &size, &stride);\n\n  THTensor_(resize)(tensor, size, stride);\n\n  THLongStorage_free(size);\n  THLongStorage_free(stride);\n\n  lua_settop(L, 1);\n  return 1;\n}\n\nstatic int torch_Tensor_(narrow)(lua_State *L)\n{\n  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);\n  int dimension = luaL_checkint(L, 2)-1;\n  long firstIndex = luaL_checklong(L, 3)-1;\n  long size = luaL_checklong(L, 4);\n\n/*  THArgCheck( (dimension >= 0) && (dimension < tensor->nDimension), 2, \"out of range\");\n  THArgCheck( (firstIndex >= 0) && (firstIndex < tensor->size[dimension]), 3, \"out of range\");\n  THArgCheck( (size > 0) && (firstIndex+size <= tensor->size[dimension]), 4, \"out of range\");\n*/\n  tensor = THTensor_(newWithTensor)(tensor);\n  THTensor_(narrow)(tensor, NULL, dimension, firstIndex, size);\n  luaT_pushudata(L, tensor, torch_Tensor);\n  return 1;\n}\n\nstatic int torch_Tensor_(sub)(lua_State *L)\n{\n  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);\n  long d0s = -1, d0e = -1, d1s = -1, d1e = -1, d2s = -1, d2e = -1, d3s = -1, d3e = -1;\n\n  d0s = luaL_checklong(L, 2)-1;\n  d0e = luaL_checklong(L, 3)-1;\n  if(d0s < 0)\n    d0s += tensor->size[0]+1;\n  if(d0e < 0)\n    d0e += tensor->size[0]+1;\n  THArgCheck(tensor->nDimension > 0, 2, \"invalid dimension\");\n  THArgCheck(d0s >= 0 && d0s < tensor->size[0], 2, \"out of range\");\n  THArgCheck(d0e >= 0 && d0e < tensor->size[0], 3, \"out of range\");\n  THArgCheck(d0e >= d0s, 3, \"end smaller than beginning\");\n\n  if(!lua_isnone(L, 4))\n  {\n    d1s = luaL_checklong(L, 4)-1;\n    d1e = luaL_checklong(L, 5)-1;\n    if(d1s < 0)\n      d1s += tensor->size[1]+1;\n    if(d1e < 0)\n      d1e += tensor->size[1]+1;\n    THArgCheck(tensor->nDimension > 1, 4, \"invalid dimension\");\n    THArgCheck(d1s >= 0 && d1s < tensor->size[1], 4, \"out of range\");\n    THArgCheck(d1e >= 0 && d1e < tensor->size[1], 5, \"out of range\");\n    THArgCheck(d1e >= d1s, 5, \"end smaller than beginning\");\n\n    if(!lua_isnone(L, 6))\n    {\n      d2s = luaL_checklong(L, 6)-1;\n      d2e = luaL_checklong(L, 7)-1;\n      if(d2s < 0)\n        d2s += tensor->size[2]+1;\n      if(d2e < 0)\n        d2e += tensor->size[2]+1;\n      THArgCheck(tensor->nDimension > 2, 6, \"invalid dimension\");\n      THArgCheck(d2s >= 0 && d2s < tensor->size[2], 6, \"out of range\");\n      THArgCheck(d2e >= 0 && d2e < tensor->size[2], 7, \"out of range\");\n      THArgCheck(d2e >= d2s, 7, \"end smaller than beginning\");\n\n      if(!lua_isnone(L, 8))\n      {\n        d3s = luaL_checklong(L, 8)-1;\n        d3e = luaL_checklong(L, 9)-1;\n        if(d3s < 0)\n          d3s += tensor->size[3]+1;\n        if(d3e < 0)\n          d3e += tensor->size[3]+1;\n        THArgCheck(tensor->nDimension > 3, 8, \"invalid dimension\");\n        THArgCheck(d3s >= 0 && d3s < tensor->size[3], 8, \"out of range\");\n        THArgCheck(d3e >= 0 && d3e < tensor->size[3], 9, \"out of range\");\n        THArgCheck(d3e >= d3s, 9, \"end smaller than beginning\");\n      }\n    }\n  }\n\n  tensor = THTensor_(newWithTensor)(tensor);\n  THTensor_(narrow)(tensor, NULL, 0, d0s, d0e-d0s+1);\n  if(d1s >= 0)\n    THTensor_(narrow)(tensor, NULL, 1, d1s, d1e-d1s+1);\n  if(d2s >= 0)\n    THTensor_(narrow)(tensor, NULL, 2, d2s, d2e-d2s+1);\n  if(d3s >= 0)\n    THTensor_(narrow)(tensor, NULL, 3, d3s, d3e-d3s+1);\n  luaT_pushudata(L, tensor, torch_Tensor);\n  return 1;\n}\n\nstatic int torch_Tensor_(select)(lua_State *L)\n{\n  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);\n  int dimension = luaL_checkint(L, 2)-1;\n  long sliceIndex = luaL_checklong(L, 3)-1;\n\n/*   THArgCheck(src->nDimension > 1, 1, \"cannot select on a vector\");\n  THArgCheck((dimension >= 0) && (dimension < src->nDimension), 2, \"out of range\");\n  THArgCheck((sliceIndex >= 0) && (sliceIndex < src->size[dimension]), 3, \"out of range\");\n*/\n\n  if(tensor->nDimension > 1)\n  {\n    tensor = THTensor_(newWithTensor)(tensor);\n    THTensor_(select)(tensor, NULL, dimension, sliceIndex);\n    luaT_pushudata(L, tensor, torch_Tensor);\n  }\n  else\n  {\n    THArgCheck(tensor->nDimension == 1, 1, \"empty Tensor\");\n    luaG_(pushreal)(L, THTensor_(get1d)(tensor, sliceIndex));\n  }\n\n  return 1;\n}\n\n#ifndef TH_REAL_IS_HALF\nstatic int torch_Tensor_(indexSelect)(lua_State *L)\n{\n  int narg = lua_gettop(L);\n  THTensor *tensor, *src;\n  THLongTensor *index;\n  int dim;\n  if (narg == 3)\n  {\n    tensor = THTensor_(new)();\n    src = luaT_checkudata(L, 1, torch_Tensor);\n    dim = luaL_checkint(L, 2) - 1;\n    index = luaT_checkudata(L, 3, \"torch.LongTensor\");\n    luaT_pushudata(L,tensor,torch_Tensor);\n  }\n  else if(narg == 4)\n  {\n    src = luaT_checkudata(L, 2, torch_Tensor);\n    dim = luaL_checkint(L, 3) - 1;\n    index = luaT_checkudata(L, 4, \"torch.LongTensor\");\n    tensor = luaT_checkudata(L,1,torch_Tensor);\n  }\n  else\n  {\n    THError(torch_Tensor \", number, torch.LongTensor | \" torch_Tensor \", \" torch_Tensor \", number, torch.LongTensor expected\");\n    return 0;\n  }\n\n  THTensor_(indexSelect)(tensor,src,dim,index);\n\n  return 1;\n}\n\nstatic int torch_Tensor_(indexCopy)(lua_State *L)\n{\n  int narg = lua_gettop(L);\n  THTensor *tensor, *src;\n  THLongTensor *index;\n  int dim;\n  if(narg == 4)\n  {\n    dim = luaL_checkint(L, 2) - 1;\n    index = luaT_checkudata(L, 3, \"torch.LongTensor\");\n    src = luaT_checkudata(L, 4, torch_Tensor);\n    tensor = luaT_checkudata(L,1,torch_Tensor);\n  }\n  else\n  {\n    THError( torch_Tensor \", number, torch.LongTensor, \" torch_Tensor \" expected\");\n    return 0;\n  }\n\n  THTensor_(indexCopy)(tensor,dim,index,src);\n\n  return 1;\n}\n\nstatic int torch_Tensor_(indexAdd)(lua_State *L)\n{\n  int narg = lua_gettop(L);\n  THTensor *tensor, *src;\n  THLongTensor *index;\n  int dim;\n  if(narg == 4)\n  {\n    dim = luaL_checkint(L, 2) - 1;\n    index = luaT_checkudata(L, 3, \"torch.LongTensor\");\n    src = luaT_checkudata(L, 4, torch_Tensor);\n    tensor = luaT_checkudata(L,1,torch_Tensor);\n  }\n  else\n  {\n    THError( torch_Tensor \", number, torch.LongTensor, \" torch_Tensor \" expected\");\n    return 0;\n  }\n\n  THTensor_(indexAdd)(tensor,dim,index,src);\n\n  return 1;\n}\n\nstatic int torch_Tensor_(indexFill)(lua_State *L)\n{\n  int narg = lua_gettop(L);\n  THTensor *tensor;\n  THLongTensor *index;\n  real val;\n  int dim;\n  if(narg == 4)\n  {\n    dim = luaL_checkint(L, 2) - 1;\n    index = luaT_checkudata(L, 3, \"torch.LongTensor\");\n    val = luaG_(checkreal)(L, 4);\n    tensor = luaT_checkudata(L,1,torch_Tensor);\n  }\n  else\n  {\n    THError( torch_Tensor \", number, torch.LongTensor, number expected\");\n    return 0;\n  }\n\n  THTensor_(indexFill)(tensor,dim,index,val);\n\n  return 1;\n}\n\nstatic int torch_Tensor_(maskedSelect)(lua_State *L)\n{\n  int narg = lua_gettop(L);\n  THTensor *tensor, *src;\n  THByteTensor *mask;\n\n  if (narg == 2)\n  {\n    tensor = THTensor_(new)();\n    src = luaT_checkudata(L, 1, torch_Tensor);\n    mask = luaT_checkudata(L, 2, \"torch.ByteTensor\");\n    luaT_pushudata(L,tensor,torch_Tensor);\n  }\n  else if(narg == 3)\n  {\n    src = luaT_checkudata(L, 2, torch_Tensor);\n    mask = luaT_checkudata(L, 3, \"torch.ByteTensor\");\n    tensor = luaT_checkudata(L,1,torch_Tensor);\n  }\n  else\n  {\n    THError( torch_Tensor \", torch.ByteTensor | \" torch_Tensor \", \" torch_Tensor \", torch.ByteTensor expected\");\n    return 0;\n  }\n\n  THTensor_(maskedSelect)(tensor,src,mask);\n\n  return 1;\n}\n\nstatic int torch_Tensor_(maskedCopy)(lua_State *L)\n{\n  int narg = lua_gettop(L);\n  THTensor *tensor, *src;\n  THByteTensor *mask;\n\n  if(narg == 3)\n  {\n    mask = luaT_checkudata(L, 2, \"torch.ByteTensor\");\n    src = luaT_checkudata(L, 3, torch_Tensor);\n    tensor = luaT_checkudata(L,1,torch_Tensor);\n  }\n  else\n  {\n    THError( torch_Tensor \", torch.ByteTensor, \" torch_Tensor \" expected\");\n    return 0;\n  }\n\n  THTensor_(maskedCopy)(tensor,mask,src);\n\n  /* return destination */\n  lua_pop(L, 2);\n\n  return 1;\n}\n\nstatic int torch_Tensor_(maskedFill)(lua_State *L)\n{\n  int narg = lua_gettop(L);\n  THTensor *tensor;\n  THByteTensor *mask;\n  real val;\n  if(narg == 3)\n  {\n    mask = luaT_checkudata(L, 2, \"torch.ByteTensor\");\n    val = luaG_(checkreal)(L, 3);\n    tensor = luaT_checkudata(L,1,torch_Tensor);\n  }\n  else\n  {\n    THError( torch_Tensor \", torch.ByteTensor, number expected\");\n    return 0;\n  }\n\n  THTensor_(maskedFill)(tensor,mask,val);\n\n  return 1;\n}\n#endif\n\nstatic int torch_Tensor_(transpose)(lua_State *L)\n{\n  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);\n  int dimension1 = luaL_checkint(L, 2)-1;\n  int dimension2 = luaL_checkint(L, 3)-1;\n\n/*\n  THArgCheck( (dimension1 >= 0) && (dimension1 < src->nDimension), 2, \"out of range\");\n  THArgCheck( (dimension2 >= 0) && (dimension2 < src->nDimension), 3, \"out of range\");\n*/\n\n  tensor = THTensor_(newWithTensor)(tensor);\n  THTensor_(transpose)(tensor, NULL, dimension1, dimension2);\n  luaT_pushudata(L, tensor, torch_Tensor);\n  return 1;\n}\n\nstatic int torch_Tensor_(t)(lua_State *L)\n{\n  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);\n\n  THArgCheck(tensor->nDimension == 2, 1, \"Tensor must have 2 dimensions\");\n\n  tensor = THTensor_(newWithTensor)(tensor);\n  THTensor_(transpose)(tensor, NULL, 0, 1);\n  luaT_pushudata(L, tensor, torch_Tensor);\n  return 1;\n}\n\nstatic int torch_Tensor_(unfold)(lua_State *L)\n{\n  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);\n  int dimension = luaL_checkint(L, 2)-1;\n  long size = luaL_checklong(L, 3);\n  long step = luaL_checklong(L, 4);\n\n/*\n  THArgCheck( (src->nDimension > 0), 1, \"cannot unfold an empty tensor\");\n  THArgCheck(dimension < src->nDimension, 2, \"out of range\");\n  THArgCheck(size <= src->size[dimension], 3, \"out of range\");\n*/\n\n  tensor = THTensor_(newWithTensor)(tensor);\n  THTensor_(unfold)(tensor, NULL, dimension, size, step);\n  luaT_pushudata(L, tensor, torch_Tensor);\n  return 1;\n}\n\n/* is contiguous? [a bit like in TnXIterator] */\nstatic int torch_Tensor_(isContiguous)(lua_State *L)\n{\n  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);\n  lua_pushboolean(L, THTensor_(isContiguous)(tensor));\n  return 1;\n}\n\nstatic int torch_Tensor_(isSize)(lua_State *L)\n{\n  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);\n  THLongStorage *size = luaT_checkudata(L, 2, \"torch.LongStorage\");\n  lua_pushboolean(L, THTensor_(isSize)(tensor, size));\n  return 1;\n}\n\nstatic int torch_Tensor_(isSameSizeAs)(lua_State *L)\n{\n  THTensor *tensor1 = luaT_checkudata(L, 1, torch_Tensor);\n  THTensor *tensor2 = luaT_checkudata(L, 2, torch_Tensor);\n  lua_pushboolean(L, THTensor_(isSameSizeAs)(tensor1, tensor2));\n  return 1;\n}\n\nstatic int torch_Tensor_(isSetTo)(lua_State *L)\n{\n  THTensor *tensor1 = luaT_checkudata(L, 1, torch_Tensor);\n  THTensor *tensor2 = luaT_checkudata(L, 2, torch_Tensor);\n  lua_pushboolean(L, THTensor_(isSetTo)(tensor1, tensor2));\n  return 1;\n}\n\nstatic int torch_Tensor_(nElement)(lua_State *L)\n{\n  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);\n  luaT_pushinteger(L, THTensor_(nElement)(tensor));\n  return 1;\n}\n\nstatic int torch_Tensor_(copy)(lua_State *L)\n{\n  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);\n  void *src;\n  if( (src = luaT_toudata(L, 2, torch_Tensor)) )\n    THTensor_(copy)(tensor, src);\n  else if( (src = luaT_toudata(L, 2, \"torch.ByteTensor\")) )\n    THTensor_(copyByte)(tensor, src);\n  else if( (src = luaT_toudata(L, 2, \"torch.CharTensor\")) )\n    THTensor_(copyChar)(tensor, src);\n  else if( (src = luaT_toudata(L, 2, \"torch.ShortTensor\")) )\n    THTensor_(copyShort)(tensor, src);\n  else if( (src = luaT_toudata(L, 2, \"torch.IntTensor\")) )\n    THTensor_(copyInt)(tensor, src);\n  else if( (src = luaT_toudata(L, 2, \"torch.LongTensor\")) )\n    THTensor_(copyLong)(tensor, src);\n  else if( (src = luaT_toudata(L, 2, \"torch.FloatTensor\")) )\n    THTensor_(copyFloat)(tensor, src);\n  else if( (src = luaT_toudata(L, 2, \"torch.DoubleTensor\")) )\n    THTensor_(copyDouble)(tensor, src);\n  else if( (src = luaT_toudata(L, 2, \"torch.HalfTensor\")) )\n    THTensor_(copyHalf)(tensor, src);\n  else\n    luaL_typerror(L, 2, \"torch.*Tensor\");\n  lua_settop(L, 1);\n  return 1;\n}\n\nstatic int torch_Tensor_(__newindex__)(lua_State *L)\n{\n  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);\n  THLongStorage *idx = NULL;\n  THByteTensor *mask;\n\n  if(lua_isnumber(L, 2))\n  {\n    void *src;\n    long index = luaL_checklong(L,2)-1;\n    THArgCheck(tensor->nDimension > 0, 1, \"empty tensor\");\n    if (index < 0) index = tensor->size[0] + index + 1;\n\n    if (lua_isnumber(L,3)) {\n      real value = luaG_(checkreal)(L,3);\n      if (tensor->nDimension == 1) {\n        THArgCheck(index >= 0 && index < tensor->size[0], 2, \"out of range\");\n        THStorage_(set)(tensor->storage, tensor->storageOffset+index*tensor->stride[0], value);\n      } else {\n#ifndef TH_REAL_IS_HALF\n        tensor = THTensor_(newWithTensor)(tensor);\n        THTensor_(narrow)(tensor, NULL, 0, index, 1);\n        THTensor_(fill)(tensor, value);\n        THTensor_(free)(tensor);\n#else\n        THError(\"fill on torch.HalfTensor not yet supported\");\n#endif\n      }\n    } else if( (src = luaT_toudata(L, 3, torch_Tensor)) ) {\n      tensor = THTensor_(newWithTensor)(tensor);\n      THTensor_(narrow)(tensor, NULL, 0, index, 1);\n      THTensor_(copy)(tensor, src);\n      THTensor_(free)(tensor);\n    } else if( (src = luaT_toudata(L, 3, \"torch.ByteTensor\")) ) {\n      tensor = THTensor_(newWithTensor)(tensor);\n      THTensor_(narrow)(tensor, NULL, 0, index, 1);\n      THTensor_(copyByte)(tensor, src);\n      THTensor_(free)(tensor);\n    } else if( (src = luaT_toudata(L, 3, \"torch.CharTensor\")) ) {\n      tensor = THTensor_(newWithTensor)(tensor);\n      THTensor_(narrow)(tensor, NULL, 0, index, 1);\n      THTensor_(copyChar)(tensor, src);\n      THTensor_(free)(tensor);\n    } else if( (src = luaT_toudata(L, 3, \"torch.ShortTensor\")) ) {\n      tensor = THTensor_(newWithTensor)(tensor);\n      THTensor_(narrow)(tensor, NULL, 0, index, 1);\n      THTensor_(copyShort)(tensor, src);\n      THTensor_(free)(tensor);\n    } else if( (src = luaT_toudata(L, 3, \"torch.IntTensor\")) ) {\n      tensor = THTensor_(newWithTensor)(tensor);\n      THTensor_(narrow)(tensor, NULL, 0, index, 1);\n      THTensor_(copyInt)(tensor, src);\n      THTensor_(free)(tensor);\n    } else if( (src = luaT_toudata(L, 3, \"torch.LongTensor\")) ) {\n      tensor = THTensor_(newWithTensor)(tensor);\n      THTensor_(narrow)(tensor, NULL, 0, index, 1);\n      THTensor_(copyLong)(tensor, src);\n      THTensor_(free)(tensor);\n    } else if( (src = luaT_toudata(L, 3, \"torch.FloatTensor\")) ) {\n      tensor = THTensor_(newWithTensor)(tensor);\n      THTensor_(narrow)(tensor, NULL, 0, index, 1);\n      THTensor_(copyFloat)(tensor, src);\n      THTensor_(free)(tensor);\n    } else if( (src = luaT_toudata(L, 3, \"torch.DoubleTensor\")) ) {\n      tensor = THTensor_(newWithTensor)(tensor);\n      THTensor_(narrow)(tensor, NULL, 0, index, 1);\n      THTensor_(copyDouble)(tensor, src);\n      THTensor_(free)(tensor);\n    } else if( (src = luaT_toudata(L, 3, \"torch.HalfTensor\")) ) {\n      tensor = THTensor_(newWithTensor)(tensor);\n      THTensor_(narrow)(tensor, NULL, 0, index, 1);\n      THTensor_(copyHalf)(tensor, src);\n      THTensor_(free)(tensor);\n    } else {\n      luaL_typerror(L, 3, \"torch.*Tensor\");\n    }\n    lua_pushboolean(L, 1);\n  }\n  else if((idx = luaT_toudata(L, 2, \"torch.LongStorage\")))\n  {\n    ptrdiff_t index = THTensor_(storageOffset)(tensor);\n    real value = luaG_(checkreal)(L,3);\n    int dim;\n\n    THArgCheck(idx->size == tensor->nDimension, 2, \"invalid size\");\n\n    for(dim = 0; dim < idx->size; dim++)\n    {\n      long z = idx->data[dim]-1;\n      if (z < 0) z = tensor->size[dim] + z + 1;\n      THArgCheck((z >= 0) && (z < tensor->size[dim]), 2, \"index out of bound\");\n      index += z*tensor->stride[dim];\n    }\n\n    THStorage_(set)(tensor->storage, index, value);\n    lua_pushboolean(L, 1);\n  }\n  else if(lua_istable(L, 2))\n  {\n    int dim;\n    int cdim = 0;\n    int ndims;\n    int done = 0;\n    ndims = tensor->nDimension;\n    THArgCheck(lua_objlen(L, 2) <= ndims, 2, \"too many indices provided\");\n    tensor = THTensor_(newWithTensor)(tensor);\n    for(dim = 0; dim < ndims; dim++)\n    {\n      lua_rawgeti(L, 2, dim+1);\n      if(lua_isnumber(L, -1))\n      {\n        long z = lua_tonumber(L, -1)-1;\n        lua_pop(L, 1);\n        if (z < 0) z = tensor->size[cdim] + z + 1;\n        THArgCheck((z >= 0) && (z < tensor->size[cdim]), 2, \"index out of bound\");\n        if(tensor->nDimension == 1) {\n          real value = luaG_(checkreal)(L,3);\n          done = 1;\n          THStorage_(set)(tensor->storage, tensor->storageOffset+z*tensor->stride[0], value);\n        } else {\n          THTensor_(select)(tensor, NULL, cdim, z);\n        }\n      }\n      else if (lua_istable(L, -1))\n      {\n        long start = 0;\n        long end = tensor->size[cdim]-1;\n        lua_rawgeti(L, -1, 1);\n        if(lua_isnumber(L, -1)) {\n          start = lua_tonumber(L, -1)-1;\n          end = start;\n        }\n        lua_pop(L, 1);\n        if (start < 0) start = tensor->size[cdim] + start + 1;\n        THArgCheck((start >= 0) && (start < tensor->size[cdim]), 2, \"start index out of bound\");\n\n        lua_rawgeti(L, -1, 2);\n        if(lua_isnumber(L, -1)) {\n          end = lua_tonumber(L, -1)-1;\n        }\n        lua_pop(L, 2);\n        if (end < 0) end = tensor->size[cdim] + end + 1;\n        THArgCheck((end >= 0) && (end < tensor->size[cdim]), 2, \"end index out of bound\");\n\n        THArgCheck((end >= start), 2, \"end index must be greater or equal to start index\");\n\n        THTensor_(narrow)(tensor, NULL, cdim++, start, end-start+1);\n      }\n      else\n      {\n        break;\n      }\n    }\n    if(!done) {\n      /* doing a copy */\n      void *src;\n      if (lua_isnumber(L,3)) {\n#ifndef TH_REAL_IS_HALF\n        THTensor_(fill)(tensor, LUA_NUMBER_TO_REAL(lua_tonumber(L,3)));\n#else\n        THError(\"fill on torch.HalfTensor not yet supported\");\n#endif\n      } else if( (src = luaT_toudata(L, 3, torch_Tensor)) ) {\n        THTensor_(copy)(tensor, src);\n      } else if( (src = luaT_toudata(L, 3, \"torch.ByteTensor\")) ) {\n        THTensor_(copyByte)(tensor, src);\n      } else if( (src = luaT_toudata(L, 3, \"torch.CharTensor\")) ) {\n        THTensor_(copyChar)(tensor, src);\n      } else if( (src = luaT_toudata(L, 3, \"torch.ShortTensor\")) ) {\n        THTensor_(copyShort)(tensor, src);\n      } else if( (src = luaT_toudata(L, 3, \"torch.IntTensor\")) ) {\n        THTensor_(copyInt)(tensor, src);\n      } else if( (src = luaT_toudata(L, 3, \"torch.LongTensor\")) ) {\n        THTensor_(copyLong)(tensor, src);\n      } else if( (src = luaT_toudata(L, 3, \"torch.FloatTensor\")) ) {\n        THTensor_(copyFloat)(tensor, src);\n      } else if( (src = luaT_toudata(L, 3, \"torch.DoubleTensor\")) ) {\n        THTensor_(copyDouble)(tensor, src);\n      } else if( (src = luaT_toudata(L, 3, \"torch.HalfTensor\")) ) {\n        THTensor_(copyHalf)(tensor, src);\n      } else {\n        luaL_typerror(L, 3, \"torch.*Tensor\");\n      }\n    }\n    THTensor_(free)(tensor);\n    lua_pushboolean(L, 1);\n  }\n  else if((mask = luaT_toudata(L, 2, \"torch.ByteTensor\")))\n  {\n#ifndef TH_REAL_IS_HALF\n    THTensor *vals;\n    if (lua_isnumber(L, 3))\n    {\n      THTensor_(maskedFill)(tensor, mask, luaG_(checkreal)(L,3));\n    }\n    else if((vals = luaT_toudata(L, 3, torch_Tensor)))\n    {\n      THTensor_(maskedCopy)(tensor, mask, vals);\n    }\n    else\n    {\n      THError(\"number or \" torch_Tensor \" expected\");\n    }\n#else\n    THError(\"ByteTensor indexing not yet supported with half types\");\n#endif\n  }\n  else\n    lua_pushboolean(L, 0);\n\n  return 1;\n}\n\nstatic int torch_Tensor_(__index__)(lua_State *L)\n{\n  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);\n  THLongStorage *idx = NULL;\n  THByteTensor *mask;\n\n  if(lua_isnumber(L, 2))\n  {\n    long index = luaL_checklong(L,2)-1;\n\n    THArgCheck(tensor->nDimension > 0, 1, \"empty tensor\");\n    if (index < 0) index = tensor->size[0] + index + 1;\n    THArgCheck(index >= 0 && index < tensor->size[0], 2, \"out of range\");\n\n    if(tensor->nDimension == 1)\n    {\n      luaG_(pushreal)(L, THStorage_(get)(tensor->storage, tensor->storageOffset+index*tensor->stride[0]));\n    }\n    else\n    {\n      tensor = THTensor_(newWithTensor)(tensor);\n      THTensor_(select)(tensor, NULL, 0, index);\n      luaT_pushudata(L, tensor, torch_Tensor);\n    }\n    lua_pushboolean(L, 1);\n    return 2;\n  }\n  else if((idx = luaT_toudata(L, 2, \"torch.LongStorage\")))\n  {\n    ptrdiff_t index = THTensor_(storageOffset)(tensor);\n    int dim;\n\n    THArgCheck(idx->size == tensor->nDimension, 2, \"invalid size\");\n\n    for(dim = 0; dim < idx->size; dim++)\n    {\n      long z = idx->data[dim]-1;\n      if (z < 0) z = tensor->size[dim] + z + 1;\n      THArgCheck((z >= 0) && (z < tensor->size[dim]), 2, \"index out of bound\");\n      index += z*tensor->stride[dim];\n    }\n    luaG_(pushreal)(L, THStorage_(get)(THTensor_(storage)(tensor), index));\n    lua_pushboolean(L, 1);\n    return 2;\n  }\n  else if(lua_istable(L, 2))\n  {\n    int dim;\n    int cdim = 0;\n    int ndims;\n    int done = 0;\n\n    ndims = tensor->nDimension;\n    THArgCheck(lua_objlen(L, 2) <= ndims, 2, \"too many indices provided\");\n    tensor = THTensor_(newWithTensor)(tensor);\n\n    for(dim = 0; dim < ndims; dim++)\n    {\n      lua_rawgeti(L, 2, dim+1);\n      if(lua_isnumber(L, -1))\n      {\n        long z = lua_tonumber(L, -1)-1;\n        lua_pop(L, 1);\n        if (z < 0) z = tensor->size[cdim] + z + 1;\n        THArgCheck((z >= 0) && (z < tensor->size[cdim]), 2, \"index out of bound\");\n        if(tensor->nDimension == 1) {\n          done = 1;\n          luaG_(pushreal)(L, THStorage_(get)(tensor->storage, tensor->storageOffset+z*tensor->stride[0]));\n        } else {\n          THTensor_(select)(tensor, NULL, cdim, z);\n        }\n      }\n      else if (lua_istable(L, -1))\n      {\n        long start = 0;\n        long end = tensor->size[cdim]-1;\n        lua_rawgeti(L, -1, 1);\n        if(lua_isnumber(L, -1)) {\n          start = lua_tonumber(L, -1)-1;\n          end = start;\n        }\n        lua_pop(L, 1);\n        if (start < 0) start = tensor->size[cdim] + start + 1;\n        THArgCheck((start >= 0) && (start < tensor->size[cdim]), 2, \"start index out of bound\");\n\n        lua_rawgeti(L, -1, 2);\n        if(lua_isnumber(L, -1)) {\n          end = lua_tonumber(L, -1)-1;\n        }\n        lua_pop(L, 2);\n        if (end < 0) end = tensor->size[cdim] + end + 1;\n        THArgCheck((end >= 0) && (end < tensor->size[cdim]), 2, \"end index out of bound\");\n\n        THArgCheck((end >= start), 2, \"end index must be greater or equal to start index\");\n\n        THTensor_(narrow)(tensor, NULL, cdim++, start, end-start+1);\n      }\n      else\n      {\n        break;\n      }\n    }\n    if(!done) {\n      luaT_pushudata(L, tensor, torch_Tensor);\n    } else {\n      THTensor_(free)(tensor);\n    }\n    lua_pushboolean(L, 1);\n    return 2;\n  }\n  else if((mask = luaT_toudata(L, 2, \"torch.ByteTensor\")))\n  {\n#ifndef TH_REAL_IS_HALF\n    THTensor *vals = THTensor_(new)();\n    THTensor_(maskedSelect)(vals, tensor, mask);\n    luaT_pushudata(L, vals, torch_Tensor);\n    lua_pushboolean(L, 1);\n    return 2;\n#else\n    THError(\"ByteTensor based indexing not yetsupported with half type\");\n    return 0;\n#endif\n  }\n  else\n  {\n    lua_pushboolean(L, 0);\n    return 1;\n  }\n}\n\nstatic int torch_Tensor_(retain)(lua_State *L)\n{\n  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);\n  THTensor_(retain)(tensor);\n  return 0;\n}\n\nstatic int torch_Tensor_(free)(lua_State *L)\n{\n  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);\n  THTensor_(free)(tensor);\n  return 0;\n}\n\n/* helpful functions */\nstatic void torch_Tensor_(c_readSizeStride)(lua_State *L, int index, int allowStride, THLongStorage **size_, THLongStorage **stride_)\n{\n  THLongStorage *size = NULL;\n  THLongStorage *stride = NULL;\n\n  if( (size = luaT_toudata(L, index, \"torch.LongStorage\")) )\n  {\n    if(!lua_isnoneornil(L, index+1))\n    {\n      if( (stride = luaT_toudata(L, index+1, \"torch.LongStorage\")) )\n        THArgCheck(stride->size == size->size, index+1, \"provided stride and size are inconsistent\");\n      else\n        THArgCheck(0, index+1, \"torch.LongStorage expected\");\n    }\n    THLongStorage_retain(size);\n    if(stride)\n      THLongStorage_retain(stride);\n  }\n  else\n  {\n    int i;\n\n    size = THLongStorage_newWithSize(8);\n    stride = THLongStorage_newWithSize(8);\n    THLongStorage_fill(size, -1);\n    THLongStorage_fill(stride, -1);\n\n    if(allowStride)\n    {\n      for(i = 0; i < 8; i++)\n      {\n        if(lua_isnone(L, index+2*i))\n          break;\n        size->data[i] = luaL_checklong(L, index+2*i);\n\n        if(lua_isnone(L, index+2*i+1))\n          break;\n        stride->data[i] = luaL_checklong(L, index+2*i+1);\n      }\n    }\n    else\n    {\n      for(i = 0; i < 8; i++)\n      {\n        if(lua_isnone(L, index+i))\n          break;\n        size->data[i] = luaL_checklong(L, index+i);\n      }\n    }\n  }\n\n  *size_ = size;\n  *stride_ = stride;\n}\n\nstatic void torch_Tensor_(c_readTensorStorageSizeStride)(lua_State *L, int index, int allowNone, int allowTensor, int allowStorage, int allowStride,\n                                                         THStorage **storage_, ptrdiff_t *storageOffset_, THLongStorage **size_, THLongStorage **stride_)\n{\n  THTensor *src = NULL;\n  THStorage *storage = NULL;\n\n  int arg1Type = lua_type(L, index);\n\n  if( allowNone && (arg1Type == LUA_TNONE) )\n  {\n    *storage_ = NULL;\n    *storageOffset_ = 0;\n    *size_ = NULL;\n    *stride_ = NULL;\n    return;\n  }\n  else if( allowTensor && (arg1Type == LUA_TUSERDATA) && (src = luaT_toudata(L, index, torch_Tensor)) )\n  {\n    *storage_ = src->storage;\n    *storageOffset_ = src->storageOffset;\n    *size_ = THTensor_(newSizeOf)(src);\n    *stride_ = THTensor_(newStrideOf)(src);\n    return;\n  }\n  else if( allowStorage && (arg1Type == LUA_TUSERDATA) && (storage = luaT_toudata(L, index, torch_Storage)) )\n  {\n    *storage_ = storage;\n    if(lua_isnone(L, index+1))\n    {\n      *storageOffset_ = 0;\n      *size_ = THLongStorage_newWithSize1(storage->size);\n      *stride_ = THLongStorage_newWithSize1(1);\n    }\n    else\n    {\n      *storageOffset_ = luaL_checkinteger(L, index+1)-1;\n      torch_Tensor_(c_readSizeStride)(L, index+2, allowStride, size_, stride_);\n    }\n    return;\n  }\n  else if( (arg1Type == LUA_TNUMBER) || (luaT_toudata(L, index, \"torch.LongStorage\")) )\n  {\n    *storage_ = NULL;\n    *storageOffset_ = 0;\n    torch_Tensor_(c_readSizeStride)(L, index, 0, size_, stride_);\n\n    return;\n  }\n\n  *storage_ = NULL;\n  *storageOffset_ = 0;\n  if(allowTensor && allowStorage)\n      THArgCheck(0, index, \"expecting number or \" torch_Tensor \" or \" torch_Storage );\n  else if(allowTensor)\n      THArgCheck(0, index, \"expecting number or \" torch_Tensor );\n  else if(allowStorage)\n      THArgCheck(0, index, \"expecting number or \" torch_Storage );\n  else\n      THArgCheck(0, index, \"expecting number\");\n}\n\n#ifndef TH_REAL_IS_HALF\nstatic int torch_Tensor_(apply)(lua_State *L)\n{\n  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);\n  luaL_checktype(L, 2, LUA_TFUNCTION);\n  lua_settop(L, 2);\n\n  TH_TENSOR_APPLY(real, tensor,\n                  lua_pushvalue(L, 2);\n                  luaG_(pushreal)(L, *tensor_data);\n                  lua_call(L, 1, 1);\n                  if(lua_isnumber(L, 3))\n                  {\n                    *tensor_data = luaG_(checkreal)(L, 3);\n                    lua_pop(L, 1);\n                  }\n                  else if(lua_isnil(L, 3))\n                    lua_pop(L, 1);\n                  else\n                    THError(\"given function should return a number or nil\"););\n\n  lua_settop(L, 1);\n  return 1;\n}\n\nstatic int torch_Tensor_(map)(lua_State *L)\n{\n  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);\n  THTensor *src = luaT_checkudata(L, 2, torch_Tensor);\n  luaL_checktype(L, 3, LUA_TFUNCTION);\n  lua_settop(L, 3);\n\n  TH_TENSOR_APPLY2(real, tensor, real, src,\n                  lua_pushvalue(L, 3);\n                  luaG_(pushreal)(L, *tensor_data);\n                  luaG_(pushreal)(L, *src_data);\n                  lua_call(L, 2, 1);\n                  if(lua_isnumber(L, 4))\n                  {\n                    *tensor_data = luaG_(checkreal)(L, 4);\n                    lua_pop(L, 1);\n                  }\n                  else if(lua_isnil(L, 4))\n                    lua_pop(L, 1);\n                  else\n                    THError(\"given function should return a number or nil\"););\n\n  lua_settop(L, 1);\n  return 1;\n}\n\nstatic int torch_Tensor_(map2)(lua_State *L)\n{\n  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);\n  THTensor *src1 = luaT_checkudata(L, 2, torch_Tensor);\n  THTensor *src2 = luaT_checkudata(L, 3, torch_Tensor);\n  luaL_checktype(L, 4, LUA_TFUNCTION);\n  lua_settop(L, 4);\n\n  TH_TENSOR_APPLY3(real, tensor, real, src1, real, src2,\n                  lua_pushvalue(L, 4);\n                  luaG_(pushreal)(L, *tensor_data);\n                  luaG_(pushreal)(L, *src1_data);\n                  luaG_(pushreal)(L, *src2_data);\n                  lua_call(L, 3, 1);\n                  if(lua_isnumber(L, 5))\n                  {\n                    *tensor_data = luaG_(checkreal)(L, 5);\n                    lua_pop(L, 1);\n                  }\n                  else if(lua_isnil(L, 5))\n                    lua_pop(L, 1);\n                  else\n                    THError(\"given function should return a number or nil\"););\n\n  lua_settop(L, 1);\n  return 1;\n}\n#endif\n\nstatic int torch_Tensor_(factory)(lua_State *L)\n{\n  THTensor *tensor = THTensor_(new)();\n  luaT_pushudata(L, tensor, torch_Tensor);\n  return 1;\n}\n\nstatic int torch_Tensor_(write)(lua_State *L)\n{\n  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);\n  THFile *file = luaT_checkudata(L, 2, \"torch.File\");\n\n  THFile_writeIntScalar(file, tensor->nDimension);\n  THFile_writeLongRaw(file, tensor->size, tensor->nDimension);\n  THFile_writeLongRaw(file, tensor->stride, tensor->nDimension);\n  THFile_writeLongScalar(file, tensor->storageOffset+1); /* to respect Lua convention */\n\n  lua_getfield(L, 2, \"writeObject\"); /* the method */\n  lua_pushvalue(L, 2); /* the file */\n  /* the storage */\n  if(tensor->storage)\n  {\n    THStorage_(retain)(tensor->storage);\n    luaT_pushudata(L, tensor->storage, torch_Storage);\n  }\n  else\n    lua_pushnil(L);\n\n  lua_call(L, 2, 0); /* call the method */\n\n  return 0;\n}\n\nstatic int torch_Tensor_(read)(lua_State *L)\n{\n  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);\n  THFile *file = luaT_checkudata(L, 2, \"torch.File\");\n\n  tensor->nDimension = THFile_readIntScalar(file);\n  tensor->size = THAlloc(sizeof(long)*tensor->nDimension);\n  tensor->stride = THAlloc(sizeof(long)*tensor->nDimension);\n  THFile_readLongRaw(file, tensor->size, tensor->nDimension);\n  THFile_readLongRaw(file, tensor->stride, tensor->nDimension);\n  tensor->storageOffset = THFile_readLongScalar(file);\n  tensor->storageOffset--;  /* to respect Lua convention */\n\n  lua_getfield(L, 2, \"readObject\"); /* the method */\n  lua_pushvalue(L, 2); /* the file */\n  lua_call(L, 1, 1); /* call the method */\n\n  tensor->storage = luaT_toudata(L, -1, torch_Storage);\n  if(tensor->storage)\n    THStorage_(retain)(tensor->storage);\n\n  return 0;\n}\n\nstatic const struct luaL_Reg torch_Tensor_(_) [] = {\n  {\"retain\", torch_Tensor_(retain)},\n  {\"free\", torch_Tensor_(free)},\n  {\"contiguous\", torch_Tensor_(contiguous)},\n  {\"size\", torch_Tensor_(size)},\n  {\"elementSize\", torch_Tensor_(elementSize)},\n  {\"__len__\", torch_Tensor_(size)},\n  {\"stride\", torch_Tensor_(stride)},\n  {\"dim\", torch_Tensor_(nDimension)},\n  {\"nDimension\", torch_Tensor_(nDimension)},\n  {\"set\", torch_Tensor_(set)},\n  {\"storage\", torch_Tensor_(storage)},\n  {\"storageOffset\", torch_Tensor_(storageOffset)},\n  {\"clone\", torch_Tensor_(clone)},\n  {\"contiguous\", torch_Tensor_(contiguous)},\n  {\"resizeAs\", torch_Tensor_(resizeAs)},\n  {\"resize\", torch_Tensor_(resize)},\n  {\"narrow\", torch_Tensor_(narrow)},\n  {\"sub\", torch_Tensor_(sub)},\n  {\"select\", torch_Tensor_(select)},\n#ifndef TH_REAL_IS_HALF\n  {\"index\", torch_Tensor_(indexSelect)},\n  {\"indexCopy\", torch_Tensor_(indexCopy)},\n  {\"indexAdd\", torch_Tensor_(indexAdd)},\n  {\"indexFill\", torch_Tensor_(indexFill)},\n  {\"maskedSelect\", torch_Tensor_(maskedSelect)},\n  {\"maskedCopy\", torch_Tensor_(maskedCopy)},\n  {\"maskedFill\", torch_Tensor_(maskedFill)},\n#endif\n  {\"transpose\", torch_Tensor_(transpose)},\n  {\"t\", torch_Tensor_(t)},\n  {\"unfold\", torch_Tensor_(unfold)},\n  {\"isContiguous\", torch_Tensor_(isContiguous)},\n  {\"isSameSizeAs\", torch_Tensor_(isSameSizeAs)},\n  {\"isSetTo\", torch_Tensor_(isSetTo)},\n  {\"isSize\", torch_Tensor_(isSize)},\n  {\"nElement\", torch_Tensor_(nElement)},\n  {\"copy\", torch_Tensor_(copy)},\n#ifndef TH_REAL_IS_HALF\n  {\"apply\", torch_Tensor_(apply)},\n  {\"map\", torch_Tensor_(map)},\n  {\"map2\", torch_Tensor_(map2)},\n#endif\n  {\"read\", torch_Tensor_(read)},\n  {\"write\", torch_Tensor_(write)},\n  {\"__index__\", torch_Tensor_(__index__)},\n  {\"__newindex__\", torch_Tensor_(__newindex__)},\n  {NULL, NULL}\n};\n\nvoid torch_Tensor_(init)(lua_State *L)\n{\n  luaT_newmetatable(L, torch_Tensor, NULL,\n                    torch_Tensor_(new), torch_Tensor_(free), torch_Tensor_(factory));\n  luaT_setfuncs(L, torch_Tensor_(_), 0);\n  lua_pop(L, 1);\n#ifndef TH_REAL_IS_HALF\n  THVector_(vectorDispatchInit)();\n#endif\n}\n\n#endif\n"
  },
  {
    "path": "generic/TensorOperator.c",
    "content": "#ifndef TH_GENERIC_FILE\n#define TH_GENERIC_FILE \"generic/TensorOperator.c\"\n#else\n\n#include \"luaG.h\"\n\nstatic int torch_TensorOperator_(__add__)(lua_State *L)\n{\n  THTensor *tensor1 = luaT_toudata(L, 1, torch_Tensor);\n  THTensor *tensor2 = luaT_toudata(L, 2, torch_Tensor);\n  THTensor *r;\n\n  if(!tensor1 && !tensor2)\n    luaL_error(L, \"expecting two \" torch_Tensor \"s or one \" torch_Tensor \" and one number\");\n  else\n  {\n    r = THTensor_(new)();\n    luaT_pushudata(L, r, torch_Tensor);\n\n    if(!tensor1 && tensor2)\n    {\n      THTensor_(resizeAs)(r, tensor2);\n      THTensor_(copy)(r, tensor2);\n      THTensor_(add)(r, r, luaG_(checkreal)(L, 1));\n    }\n    else if(tensor1 && !tensor2)\n    {\n      THTensor_(resizeAs)(r, tensor1);\n      THTensor_(copy)(r, tensor1);\n      THTensor_(add)(r, r, luaG_(checkreal)(L, 2));\n    }\n    else\n    {\n      THTensor_(resizeAs)(r, tensor1);\n      THTensor_(copy)(r, tensor1);\n      THTensor_(cadd)(r, r, 1, tensor2);\n    }\n  }\n  return 1;\n}\n\nstatic int torch_TensorOperator_(__sub__)(lua_State *L)\n{\n  THTensor *tensor1 = luaT_toudata(L, 1, torch_Tensor);\n  THTensor *tensor2 = luaT_toudata(L, 2, torch_Tensor);\n  THTensor *r;\n\n  if(!tensor1 && !tensor2)\n    luaL_error(L, \"expecting two \" torch_Tensor \"s or one \" torch_Tensor \" and one number\");\n  else\n  {\n    r = THTensor_(new)();\n    luaT_pushudata(L, r, torch_Tensor);\n\n    if(!tensor1 && tensor2)\n    {\n      THTensor_(resizeAs)(r, tensor2);\n      THTensor_(fill)(r, luaG_(checkreal)(L, 1));\n      THTensor_(cadd)(r, r, -1, tensor2);\n    }\n    else if(tensor1 && !tensor2)\n    {\n      THTensor_(resizeAs)(r, tensor1);\n      THTensor_(copy)(r, tensor1);\n      THTensor_(add)(r, r, -luaG_(checkreal)(L, 2));\n    }\n    else\n    {\n      THTensor_(resizeAs)(r, tensor1);\n      THTensor_(copy)(r, tensor1);\n      THTensor_(cadd)(r, r, -1, tensor2);\n    }\n  }\n  return 1;\n}\n\nstatic int torch_TensorOperator_(__unm__)(lua_State *L)\n{\n  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);\n  THTensor *r;\n\n  r = THTensor_(new)();\n  luaT_pushudata(L, r, torch_Tensor);\n  THTensor_(resizeAs)(r, tensor);\n  THTensor_(copy)(r, tensor);\n  THTensor_(mul)(r, r, -1);\n\n  return 1;\n}\n\nstatic int torch_TensorOperator_(__mul__)(lua_State *L)\n{\n  THTensor *tensor1 = luaT_toudata(L, 1, torch_Tensor);\n  THTensor *tensor2 = luaT_toudata(L, 2, torch_Tensor);\n  THTensor *r;\n\n  if(!tensor1 && !tensor2)\n    luaL_error(L, \"expecting two \" torch_Tensor \"s or one \" torch_Tensor \" and one number\");\n  else\n  {\n    r = THTensor_(new)();\n    luaT_pushudata(L, r, torch_Tensor);\n\n    if(!tensor1 && tensor2)\n    {\n      THTensor_(resizeAs)(r, tensor2);\n      THTensor_(copy)(r, tensor2);\n      THTensor_(mul)(r, r, luaG_(checkreal)(L, 1));\n    }\n    else if(tensor1 && !tensor2)\n    {\n      THTensor_(resizeAs)(r, tensor1);\n      THTensor_(copy)(r, tensor1);\n      THTensor_(mul)(r, r, luaG_(checkreal)(L, 2));\n    }\n    else\n    {\n      int dimt = tensor1->nDimension;\n      int dims = tensor2->nDimension;\n\n      if(dimt == 1 && dims == 1)\n        luaG_(pushreal)(L, THTensor_(dot)(tensor1, tensor2)); /* ok, we wasted r, but who cares */\n      else if(dimt == 2 && dims == 1)\n      {\n        THTensor_(resize1d)(r, tensor1->size[0]);\n        THTensor_(zero)(r);\n        THTensor_(addmv)(r, 1, r, 1, tensor1, tensor2);\n      }\n      else if(dimt == 2 && dims == 2)\n      {\n        THTensor_(resize2d)(r, tensor1->size[0], tensor2->size[1]);\n        THTensor_(zero)(r);\n        THTensor_(addmm)(r, 1, r, 1, tensor1, tensor2);\n      }\n      else\n        luaL_error(L, \"multiplication between %dD and %dD tensors not yet supported\", tensor1->nDimension, tensor2->nDimension);\n    }\n  }\n  return 1;\n}\n\nstatic int torch_TensorOperator_(__div__)(lua_State *L)\n{\n  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);\n  THTensor *r;\n\n  THArgCheck(lua_isnumber(L,2), 2, \"number expected\");\n\n  r = THTensor_(new)();\n  luaT_pushudata(L, r, torch_Tensor);\n\n  THTensor_(resizeAs)(r, tensor);\n  THTensor_(copy)(r, tensor);\n  THTensor_(div)(r, r, lua_tonumber(L, 2));\n\n  return 1;\n}\n\nstatic int torch_TensorOperator_(__mod__)(lua_State *L)\n{\n  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);\n  THTensor *r;\n\n  THArgCheck(lua_isnumber(L,2), 2, \"number expected\");\n\n  r = THTensor_(new)();\n  luaT_pushudata(L, r, torch_Tensor);\n\n  THTensor_(resizeAs)(r, tensor);\n  THTensor_(copy)(r, tensor);\n  THTensor_(remainder)(r, r, lua_tonumber(L, 2));\n\n  return 1;\n}\n\nstatic const struct luaL_Reg torch_TensorOperator_(_) [] = {\n  {\"__add__\", torch_TensorOperator_(__add__)},\n  {\"__sub__\", torch_TensorOperator_(__sub__)},\n  {\"__unm__\", torch_TensorOperator_(__unm__)},\n  {\"__mul__\", torch_TensorOperator_(__mul__)},\n  {\"__div__\", torch_TensorOperator_(__div__)},\n  {\"__mod__\", torch_TensorOperator_(__mod__)},\n  {NULL, NULL}\n};\n\nvoid torch_TensorOperator_(init)(lua_State *L)\n{\n  luaT_pushmetatable(L, torch_Tensor);\n  luaT_setfuncs(L, torch_TensorOperator_(_), 0);\n  lua_pop(L, 1);\n}\n\n#endif\n"
  },
  {
    "path": "generic/luaG.h",
    "content": "#if !defined(real) || !defined(TH_GENERIC_FILE)\n#error \"luaG.h must not be included outside of a generic file.\"\n#endif\n\n#ifndef luaG_\n#define luaG_(NAME) TH_CONCAT_3(luaG_,Real,NAME)\n#endif\n\n#undef REAL_TO_LUA_NUMBER\n#undef LUA_NUMBER_TO_REAL\n\n#if defined(TH_REAL_IS_HALF)\n# define REAL_TO_LUA_NUMBER(n)   (lua_Number)TH_half2float(n)\n# define LUA_NUMBER_TO_REAL(n)    TH_float2half((lua_Number)n)\n#else\n# define REAL_TO_LUA_NUMBER(n)   (lua_Number)(n)\n# define LUA_NUMBER_TO_REAL(n)   (real)n\n#endif\n\n\n\nstatic void luaG_(pushreal)(lua_State *L, real n) {\n#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_HALF) || LUA_VERSION_NUM < 503\n  lua_pushnumber(L, REAL_TO_LUA_NUMBER(n));\n#elif defined(TH_REAL_IS_BYTE) || defined(TH_REAL_IS_CHAR) || defined(TH_REAL_IS_SHORT) \\\n  || defined(TH_REAL_IS_INT) || defined(TH_REAL_IS_LONG)\n\tlua_pushinteger(L, (lua_Integer)n);\n#else\n\t#error \"unhandled real type in luaG_pushreal\"\n#endif\n}\n\nstatic real luaG_(checkreal)(lua_State *L, int idx) {\n#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_HALF)\n  return LUA_NUMBER_TO_REAL(luaL_checknumber(L, idx));\n#elif defined(TH_REAL_IS_BYTE) || defined(TH_REAL_IS_CHAR) || defined(TH_REAL_IS_SHORT) || defined(TH_REAL_IS_INT) || defined(TH_REAL_IS_LONG)\n        int type = lua_type(L, idx);\n        if (type == LUA_TSTRING) {\n          const char *str = lua_tolstring(L, idx, NULL);\n          long int num = strtol(str, NULL, 0);\n          return (real) num;\n        } else {\n#if LUA_VERSION_NUM < 503\n          return (lua_Number)luaL_checkinteger(L, idx);\n#else\n          return (lua_Integer)luaL_checkinteger(L, idx);\n#endif\n        }\n#else\n\t#error \"unhandled real type in luaG_checkreal\"\n#endif\n}\n\nstatic real luaG_(optreal)(lua_State *L, int idx, real n) {\n#if defined(TH_REAL_IS_HALF) || defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || LUA_VERSION_NUM < 503\n  return LUA_NUMBER_TO_REAL(luaL_optnumber(L, idx, REAL_TO_LUA_NUMBER(n)));\n#elif defined(TH_REAL_IS_BYTE) || defined(TH_REAL_IS_CHAR) || defined(TH_REAL_IS_SHORT) || defined(TH_REAL_IS_INT) || defined(TH_REAL_IS_LONG)\n\treturn (lua_Integer)luaL_optinteger(L, idx, (lua_Integer)n);\n#else\n\t#error \"unhandled real type in luaG_checkreal\"\n#endif\n}\n"
  },
  {
    "path": "init.c",
    "content": "#include \"general.h\"\n#include \"utils.h\"\n\nextern void torch_utils_init(lua_State *L);\nextern void torch_random_init(lua_State *L);\nextern void torch_File_init(lua_State *L);\nextern void torch_DiskFile_init(lua_State *L);\nextern void torch_MemoryFile_init(lua_State *L);\nextern void torch_PipeFile_init(lua_State *L);\nextern void torch_Timer_init(lua_State *L);\n\nextern void torch_ByteStorage_init(lua_State *L);\nextern void torch_CharStorage_init(lua_State *L);\nextern void torch_ShortStorage_init(lua_State *L);\nextern void torch_IntStorage_init(lua_State *L);\nextern void torch_LongStorage_init(lua_State *L);\nextern void torch_FloatStorage_init(lua_State *L);\nextern void torch_DoubleStorage_init(lua_State *L);\nextern void torch_HalfStorage_init(lua_State *L);\n\nextern void torch_ByteTensor_init(lua_State *L);\nextern void torch_CharTensor_init(lua_State *L);\nextern void torch_ShortTensor_init(lua_State *L);\nextern void torch_IntTensor_init(lua_State *L);\nextern void torch_LongTensor_init(lua_State *L);\nextern void torch_FloatTensor_init(lua_State *L);\nextern void torch_DoubleTensor_init(lua_State *L);\nextern void torch_HalfTensor_init(lua_State *L);\n\nextern void torch_ByteTensorOperator_init(lua_State *L);\nextern void torch_CharTensorOperator_init(lua_State *L);\nextern void torch_ShortTensorOperator_init(lua_State *L);\nextern void torch_IntTensorOperator_init(lua_State *L);\nextern void torch_LongTensorOperator_init(lua_State *L);\nextern void torch_FloatTensorOperator_init(lua_State *L);\nextern void torch_DoubleTensorOperator_init(lua_State *L);\n\n\nextern void torch_TensorMath_init(lua_State *L);\n\n\nLUA_EXTERNC DLL_EXPORT int luaopen_libtorch(lua_State *L);\n\nint luaopen_libtorch(lua_State *L)\n{\n\n  lua_newtable(L);\n  lua_pushvalue(L, -1);\n  lua_setglobal(L, \"torch\");\n\n  torch_utils_init(L);\n  torch_File_init(L);\n\n  torch_ByteStorage_init(L);\n  torch_CharStorage_init(L);\n  torch_ShortStorage_init(L);\n  torch_IntStorage_init(L);\n  torch_LongStorage_init(L);\n  torch_FloatStorage_init(L);\n  torch_DoubleStorage_init(L);\n  torch_HalfStorage_init(L);\n\n  torch_ByteTensor_init(L);\n  torch_CharTensor_init(L);\n  torch_ShortTensor_init(L);\n  torch_IntTensor_init(L);\n  torch_LongTensor_init(L);\n  torch_FloatTensor_init(L);\n  torch_DoubleTensor_init(L);\n  torch_HalfTensor_init(L);\n\n  torch_ByteTensorOperator_init(L);\n  torch_CharTensorOperator_init(L);\n  torch_ShortTensorOperator_init(L);\n  torch_IntTensorOperator_init(L);\n  torch_LongTensorOperator_init(L);\n  torch_FloatTensorOperator_init(L);\n  torch_DoubleTensorOperator_init(L);\n\n  torch_Timer_init(L);\n  torch_DiskFile_init(L);\n  torch_PipeFile_init(L);\n  torch_MemoryFile_init(L);\n\n  torch_TensorMath_init(L);\n\n  torch_random_init(L);\n\n  // Create 'torch.Allocator' type.\n  luaT_newmetatable(L, \"torch.Allocator\", NULL, NULL, NULL, NULL);\n\n  return 1;\n}\n"
  },
  {
    "path": "init.lua",
    "content": "-- We are using paths.require to appease mkl\n\n-- Make this work with LuaJIT in Lua 5.2 compatibility mode, which\n-- renames string.gfind (already deprecated in 5.1)\nif not string.gfind then\n   string.gfind = string.gmatch\nend\nif not table.unpack then\n   table.unpack = unpack\nend\n\nrequire \"paths\"\npaths.require \"libtorch\"\n\n-- Keep track of all thread local variables torch.\n-- if a Lua VM is passed to another thread thread local\n-- variables need to be updated.\nfunction torch.updatethreadlocals()\n   torch.updateerrorhandlers()\n   local tracking = torch._heaptracking\n   if tracking == nil then tracking = false end\n   torch.setheaptracking(tracking)\nend\n\n--- package stuff\nfunction torch.packageLuaPath(name)\n   if not name then\n      local ret = string.match(torch.packageLuaPath('torch'), '(.*)/')\n      if not ret then --windows?\n         ret = string.match(torch.packageLuaPath('torch'), '(.*)\\\\')\n      end\n      return ret\n   end\n   for path in string.gmatch(package.path, \"[^;]+\") do\n      path = string.gsub(path, \"%?\", name)\n      local f = io.open(path)\n      if f then\n         f:close()\n         local ret = string.match(path, \"(.*)/\")\n         if not ret then --windows?\n            ret = string.match(path, \"(.*)\\\\\")\n         end\n         return ret\n      end\n   end\nend\n\nlocal function include(file, depth)\n   paths.dofile(file, 3 + (depth or 0))\nend\nrawset(_G, 'include', include)\n\nfunction torch.include(package, file)\n   dofile(torch.packageLuaPath(package) .. '/' .. file)\nend\n\nfunction torch.class(...)\n   local tname, parenttname, module\n   if select('#', ...) == 3\n      and type(select(1, ...)) == 'string'\n      and type(select(2, ...)) == 'string'\n      and type(select(3, ...)) == 'table'\n   then\n      tname = select(1, ...)\n      parenttname = select(2, ...)\n      module = select(3, ...)\n   elseif select('#', ...) == 2\n      and type(select(1, ...)) == 'string'\n      and type(select(2, ...)) == 'string'\n   then\n      tname = select(1, ...)\n      parenttname = select(2, ...)\n   elseif select('#', ...) == 2\n      and type(select(1, ...)) == 'string'\n      and type(select(2, ...)) == 'table'\n   then\n      tname = select(1, ...)\n      module = select(2, ...)\n   elseif select('#', ...) == 1\n      and type(select(1, ...)) == 'string'\n   then\n      tname = select(1, ...)\n   else\n      error('<class name> [<parent class name>] [<module table>] expected')\n   end\n\n   local function constructor(...)\n      local self = {}\n      torch.setmetatable(self, tname)\n      if self.__init then\n         self:__init(...)\n      end\n      return self\n   end\n\n   local function factory()\n      local self = {}\n      torch.setmetatable(self, tname)\n      return self\n   end\n\n   local mt = torch.newmetatable(tname, parenttname, constructor, nil, factory, module)\n   local mpt\n   if parenttname then\n      mpt = torch.getmetatable(parenttname)\n   end\n   return mt, mpt\nend\n\nfunction torch.setdefaulttensortype(typename)\n   assert(type(typename) == 'string', 'string expected')\n   if torch.getconstructortable(typename) then\n      torch.Tensor = torch.getconstructortable(typename)\n      torch.Storage = torch.getconstructortable(torch.typename(torch.Tensor(1):storage()))\n   else\n      error(string.format(\"<%s> is not a string describing a torch object\", typename))\n   end\nend\n\nfunction torch.type(obj)\n   local class = torch.typename(obj)\n   if not class then\n      class = type(obj)\n   end\n   return class\nend\n\n--[[ See if a given object is an instance of the provided torch class. ]]\nfunction torch.isTypeOf(obj, typeSpec)\n   -- typeSpec can be provided as either a string, pattern, or the constructor.\n   -- If the constructor is used, we look in the __typename field of the\n   -- metatable to find a string to compare to.\n   if type(typeSpec) ~= 'string' then\n      typeSpec = getmetatable(typeSpec).__typename\n\t  assert(type(typeSpec) == 'string',\n             \"type must be provided as [regexp] string, or factory\")\n   end\n\n   local mt = getmetatable(obj)\n   while mt do\n      if type(mt) == 'table' and mt.__typename then\n         local match = mt.__typename:match(typeSpec)\n         -- Require full match for non-pattern specs\n         if match and (match ~= typeSpec or match == mt.__typename) then\n            return true\n         end\n      end\n      mt = getmetatable(mt)\n   end\n   return false\nend\n\ntorch.setdefaulttensortype('torch.DoubleTensor')\n\nrequire('torch.Tensor')\nrequire('torch.File')\nrequire('torch.CmdLine')\nrequire('torch.FFInterface')\nrequire('torch.Tester')\nrequire('torch.TestSuite')\nrequire('torch.test')\nfunction torch.totable(obj)\n   if torch.isTensor(obj) or torch.isStorage(obj) then\n      return obj:totable()\n   else\n      error(\"obj must be a Storage or a Tensor\")\n   end\nend\n\nfunction torch.isTensor(obj)\n   local typename = torch.typename(obj)\n   if typename and typename:find('torch.*Tensor') then\n      return true\n   end\n   return false\nend\n\nfunction torch.isStorage(obj)\n   local typename = torch.typename(obj)\n   if typename and typename:find('torch.*Storage') then\n      return true\n   end\n   return false\nend\n-- alias for convenience\ntorch.Tensor.isTensor = torch.isTensor\n\n-- remove this line to disable automatic heap-tracking for garbage collection\ntorch.setheaptracking(true)\n\nfunction torch.multinomialAliasSetup(probs, state)\n   if torch.type(state) == 'table' then \n      state[1], state[2] = torch.multinomialAliasSetup_(probs, state[1], state[2])\n   else\n      state = {}\n      state[1], state[2] = torch.multinomialAliasSetup_(probs)\n   end\n   return state\nend\n\nfunction torch.multinomialAlias(output, state)\n   torch.DoubleTensor.multinomialAlias_(output, state[1], state[2])\n   return output\nend\n\nreturn torch\n"
  },
  {
    "path": "lib/CMakeLists.txt",
    "content": "SET(TH_INSTALL_BIN_SUBDIR \"${Torch_INSTALL_BIN_SUBDIR}\")\nSET(TH_INSTALL_LIB_SUBDIR \"${Torch_INSTALL_LIB_SUBDIR}\")\nSET(TH_INSTALL_INCLUDE_SUBDIR \"${Torch_INSTALL_INCLUDE_SUBDIR}\")\nSET(TH_INSTALL_CMAKE_SUBDIR \"${Torch_INSTALL_CMAKE_SUBDIR}\")\n\nADD_SUBDIRECTORY(TH)\nADD_SUBDIRECTORY(luaT)\n"
  },
  {
    "path": "lib/TH/CMakeLists.txt",
    "content": "cmake_minimum_required(VERSION 2.6)\n\n# avoid some cmake warnings\nIF(POLICY CMP0026)\n CMAKE_POLICY(SET CMP0026 OLD)\nENDIF()\n\nSET(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${CMAKE_MODULE_PATH})\nSET(CMAKE_LIBRARY_PATH /usr/lib/x86_64-linux-gnu/ ${CMAKE_LIBRARY_PATH})\n\n# Can be compiled standalone\nIF(NOT TH_INSTALL_BIN_SUBDIR\n    OR NOT TH_INSTALL_LIB_SUBDIR\n    OR NOT TH_INSTALL_INCLUDE_SUBDIR\n    OR NOT TH_INSTALL_CMAKE_SUBDIR)\n\n  SET(TH_INSTALL_BIN_SUBDIR \"bin\" CACHE PATH \"TH install binary subdirectory\")\n  SET(TH_INSTALL_LIB_SUBDIR \"lib\" CACHE PATH \"TH install library subdirectory\")\n  SET(TH_INSTALL_INCLUDE_SUBDIR \"include\" CACHE PATH \"TH install include subdirectory\")\n  SET(TH_INSTALL_CMAKE_SUBDIR \"share/cmake/TH\" CACHE PATH \"TH install cmake subdirectory\")\nENDIF()\n\n######################################################################\n###### macros section\n#####################################################################\nIF(NOT ADD_TORCH_LIBRARY)\nMACRO(ADD_TORCH_LIBRARY package type src)\n  IF (\"${type}\" STREQUAL \"STATIC\")\n    if (\"${src}\" MATCHES \"cu$\" OR \"${src}\" MATCHES \"cu;\")\n      CUDA_ADD_LIBRARY(${package} STATIC ${src})\n    else()\n      ADD_LIBRARY(${package} STATIC ${src})\n    endif()\n  ELSE()\n    if (\"${src}\" MATCHES \"cu$\" OR \"${src}\" MATCHES \"cu;\")\n      CUDA_ADD_LIBRARY(${package} ${type} ${src})\n    else()\n      ADD_LIBRARY(${package} ${type} ${src})\n    endif()\n  ENDIF()\nENDMACRO()\nENDIF()\n\n#######################################################################\n##### flags section\n######################################################################\n\nIF(MSVC)\n  # MSVC now supports C99 since VS2013/VS2015, however the standard version switch is not provided yet\n  # SET(CMAKE_C_FLAGS \"${CMAKE_C_FLAGS} /std:c99\")\nELSE(MSVC)\n  # enable gnu99 and not c99 because we use\n  # gnu extensions like posix_memalign\n  SET(CMAKE_C_FLAGS \"${CMAKE_C_FLAGS} -std=gnu99\")\nENDIF(MSVC)\n\nIF(MSVC)\n  ADD_DEFINITIONS(-D_CRT_SECURE_NO_DEPRECATE=1)  # respect the standard\nENDIF(MSVC)\n\nIF(UNIX)\n  # prevent Unknown CMake command \"check_function_exists\".\n  INCLUDE(CheckFunctionExists)\nENDIF(UNIX)\n\n# OpenMP support?\nSET(WITH_OPENMP ON CACHE BOOL \"OpenMP support if available?\")\nIF (APPLE AND CMAKE_COMPILER_IS_GNUCC)\n  EXEC_PROGRAM (uname ARGS -v  OUTPUT_VARIABLE DARWIN_VERSION)\n  STRING (REGEX MATCH \"[0-9]+\" DARWIN_VERSION ${DARWIN_VERSION})\n  MESSAGE (STATUS \"MAC OS Darwin Version: ${DARWIN_VERSION}\")\n  IF (DARWIN_VERSION GREATER 9)\n    SET(APPLE_OPENMP_SUCKS 1)\n  ENDIF (DARWIN_VERSION GREATER 9)\n  EXECUTE_PROCESS (COMMAND ${CMAKE_C_COMPILER} -dumpversion\n    OUTPUT_VARIABLE GCC_VERSION)\n  IF (APPLE_OPENMP_SUCKS AND GCC_VERSION VERSION_LESS 4.6.2)\n    MESSAGE(STATUS \"Warning: Disabling OpenMP (unstable with this version of GCC)\")\n    MESSAGE(STATUS \" Install GCC >= 4.6.2 or change your OS to enable OpenMP\")\n    SET(CMAKE_C_FLAGS \"${CMAKE_C_FLAGS} -Wno-unknown-pragmas\")\n    SET(WITH_OPENMP OFF CACHE BOOL \"OpenMP support if available?\" FORCE)\n  ENDIF ()\nENDIF ()\n\nIF (WITH_OPENMP)\n  FIND_PACKAGE(OpenMP)\n  IF(OPENMP_FOUND)\n    MESSAGE(STATUS \"Compiling with OpenMP support\")\n    SET(CMAKE_C_FLAGS \"${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}\")\n    SET(CMAKE_CXX_FLAGS \"${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}\")\n    SET(CMAKE_EXE_LINKER_FLAGS \"${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}\")\n  ENDIF(OPENMP_FOUND)\nENDIF (WITH_OPENMP)\n\n# ARM specific flags\nFIND_PACKAGE(ARM)\nIF (ASIMD_FOUND)\n  MESSAGE(STATUS \"asimd/Neon found with compiler flag : -D__NEON__\")\n  SET(CMAKE_C_FLAGS \"-D__NEON__ ${CMAKE_C_FLAGS}\")\nELSEIF (NEON_FOUND)\n  MESSAGE(STATUS \"Neon found with compiler flag : -mfpu=neon -D__NEON__\")\n  SET(CMAKE_C_FLAGS \"-mfpu=neon -D__NEON__ ${CMAKE_C_FLAGS}\")\nENDIF (ASIMD_FOUND)\nIF (CORTEXA8_FOUND)\n  MESSAGE(STATUS \"Cortex-A8 Found with compiler flag : -mcpu=cortex-a8\")\n  SET(CMAKE_C_FLAGS \"-mcpu=cortex-a8 -fprefetch-loop-arrays ${CMAKE_C_FLAGS}\")\nENDIF (CORTEXA8_FOUND)\nIF (CORTEXA9_FOUND)\n  MESSAGE(STATUS \"Cortex-A9 Found with compiler flag : -mcpu=cortex-a9\")\n  SET(CMAKE_C_FLAGS \"-mcpu=cortex-a9 ${CMAKE_C_FLAGS}\")\nENDIF (CORTEXA9_FOUND)\n\nINCLUDE (CheckIncludeFile)\nINCLUDE (CheckCSourceCompiles)\nCHECK_INCLUDE_FILE(cpuid.h HAVE_CPUID_H)\n# Check for a cpuid intrinsic\nIF(HAVE_CPUID_H)\n    CHECK_C_SOURCE_COMPILES(\"#include <cpuid.h>\n        int main()\n        {\n            unsigned int eax, ebx, ecx, edx;\n            return __get_cpuid(0, &eax, &ebx, &ecx, &edx);\n        }\" HAVE_GCC_GET_CPUID)\nENDIF()\nIF(HAVE_GCC_GET_CPUID)\n  SET(CMAKE_C_FLAGS \"${CMAKE_C_FLAGS} -DHAVE_GCC_GET_CPUID\")\nENDIF(HAVE_GCC_GET_CPUID)\n\nCHECK_C_SOURCE_COMPILES(\"#include <stdint.h>\n    static inline void cpuid(uint32_t *eax, uint32_t *ebx,\n    \t\t\t uint32_t *ecx, uint32_t *edx)\n    {\n      uint32_t a = *eax, b, c = *ecx, d;\n      asm volatile ( \\\"cpuid\\\" : \\\"+a\\\"(a), \\\"=b\\\"(b), \\\"+c\\\"(c), \\\"=d\\\"(d) );\n      *eax = a; *ebx = b; *ecx = c; *edx = d;\n    }\n    int main() {\n      uint32_t a,b,c,d;\n      cpuid(&a, &b, &c, &d);\n      return 0;\n    }\" NO_GCC_EBX_FPIC_BUG)\n\nIF(NOT NO_GCC_EBX_FPIC_BUG)\n  SET(CMAKE_C_FLAGS \"${CMAKE_C_FLAGS} -DUSE_GCC_GET_CPUID\")\nENDIF(NOT NO_GCC_EBX_FPIC_BUG)\n\n\nFIND_PACKAGE(SSE) # checks SSE, AVX and AVX2\nIF(C_SSE2_FOUND)\n  MESSAGE(STATUS \"SSE2 Found\")\n  SET(CMAKE_C_FLAGS \"${C_SSE2_FLAGS} -DUSE_SSE2 ${CMAKE_C_FLAGS}\")\nENDIF(C_SSE2_FOUND)\nIF(C_SSE3_FOUND)\n  MESSAGE(STATUS \"SSE3 Found\")\n  SET(CMAKE_C_FLAGS \"${C_SSE3_FLAGS} -DUSE_SSE3 ${CMAKE_C_FLAGS}\")\nENDIF(C_SSE3_FOUND)\n# we don't set -mavx and -mavx2 flags globally, but only for specific files\n# however, we want to enable the AVX codepaths, so we still need to\n# add USE_AVX and USE_AVX2 macro defines\nIF(C_AVX_FOUND)\n  MESSAGE(STATUS \"AVX Found\")\n  SET(CMAKE_C_FLAGS \"-DUSE_AVX ${CMAKE_C_FLAGS}\")\nENDIF(C_AVX_FOUND)\nIF(C_AVX2_FOUND)\n  MESSAGE(STATUS \"AVX2 Found\")\n  SET(CMAKE_C_FLAGS \"-DUSE_AVX2 ${CMAKE_C_FLAGS}\")\nENDIF(C_AVX2_FOUND)\n\nCHECK_C_SOURCE_RUNS(\"\n#include <stdatomic.h>\nint main()\n{\n  int a;\n  int oa;\n  atomic_store(&a, 1);\n  atomic_fetch_add(&a, 1);\n  oa = atomic_load(&a);\n  if(!atomic_compare_exchange_strong(&a, &oa, 3))\n    return -1;\n  return 0;\n}\n\" HAS_C11_ATOMICS)\n\nIF(NOT HAS_C11_ATOMICS)\n  CHECK_C_SOURCE_RUNS(\"\n#include <intrin.h>\nint main()\n{\n  long a;\n  _InterlockedExchange(&a, 1);\n  _InterlockedExchangeAdd(&a, 1);\n  if(_InterlockedCompareExchange(&a, 3, 2) != 2)\n    return -1;\n  return 0;\n}\n\" HAS_MSC_ATOMICS)\n\n  CHECK_C_SOURCE_RUNS(\"\nint main()\n{\n  int a;\n  __sync_lock_test_and_set(&a, 1);\n  __sync_fetch_and_add(&a, 1);\n  if(!__sync_bool_compare_and_swap(&a, 2, 3))\n    return -1;\n  return 0;\n}\n\" HAS_GCC_ATOMICS)\nENDIF()\n\n#######################################################################\n##### sources section\n######################################################################\n\n# IF ANY SIMD FOUND\nIF(C_AVX2_FOUND OR C_AVX_FOUND OR C_SSE4_2_FOUND OR C_SSE4_1_FOUND)\n  SET(simd generic/simd/convolve.c)\nENDIF(C_AVX2_FOUND OR C_AVX_FOUND OR C_SSE4_2_FOUND OR C_SSE4_1_FOUND)\n\n# IF SSE4 FOUND\nIF(C_SSE4_1_FOUND AND C_SSE4_2_FOUND)\n  SET(CMAKE_C_FLAGS \"${C_SSE4_1_FLAGS} -DUSE_SSE4_1 ${C_SSE4_2_FLAGS} -DUSE_SSE4_2 ${CMAKE_C_FLAGS}\")\n  IF(MSVC)\n    SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve5x5_sse.c PROPERTIES COMPILE_FLAGS \"/Ox /fp:fast\")\n  ELSE(MSVC)\n    SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve5x5_sse.c PROPERTIES COMPILE_FLAGS \"-O3 -ffast-math\")\n  ENDIF(MSVC)\n  SET(simd ${simd} generic/simd/convolve5x5_sse.c)\nENDIF(C_SSE4_1_FOUND AND C_SSE4_2_FOUND)\n\n# IF AVX FOUND\nIF(C_AVX_FOUND)\n  IF(MSVC)\n    SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve5x5_avx.c PROPERTIES COMPILE_FLAGS \"/Ox /fp:fast ${C_AVX_FLAGS}\")\n    SET_SOURCE_FILES_PROPERTIES(vector/AVX.c PROPERTIES COMPILE_FLAGS \"/Ox /arch:AVX ${C_AVX_FLAGS}\")\n  ELSE(MSVC)\n    SET_SOURCE_FILES_PROPERTIES(generic/simd/convolve5x5_avx.c PROPERTIES COMPILE_FLAGS \"-O3 -ffast-math ${C_AVX_FLAGS}\")\n    SET_SOURCE_FILES_PROPERTIES(vector/AVX.c PROPERTIES COMPILE_FLAGS \"-O3 ${C_AVX_FLAGS}\")\n  ENDIF(MSVC)\n  SET(simd ${simd} vector/AVX.c generic/simd/convolve5x5_avx.c)\nENDIF(C_AVX_FOUND)\n\nIF(C_AVX2_FOUND)\n  IF(MSVC)\n    SET_SOURCE_FILES_PROPERTIES(vector/AVX2.c PROPERTIES COMPILE_FLAGS \"/Ox /arch:AVX2 ${C_AVX2_FLAGS}\")\n  ELSE(MSVC)\n    SET_SOURCE_FILES_PROPERTIES(vector/AVX2.c PROPERTIES COMPILE_FLAGS \"-O3 ${C_AVX2_FLAGS}\")\n  ENDIF(MSVC)\n  SET(simd ${simd} vector/AVX2.c)\nENDIF(C_AVX2_FOUND)\n\nSET(hdr\n  THGeneral.h THHalf.h THAllocator.h THSize.h THStorage.h THTensor.h THTensorApply.h THBlas.h THMath.h\n  THLapack.h THLogAdd.h THRandom.h THVector.h THAtomic.h )\n\nSET(src\n  THGeneral.c THHalf.c THAllocator.c THSize.c THStorage.c THTensor.c THBlas.c THLapack.c\n  THLogAdd.c THRandom.c THFile.c THDiskFile.c THMemoryFile.c THAtomic.c THVector.c)\n\nSET(src ${src} ${hdr} ${simd})\n\n#######################################################################\n##### build section\n######################################################################\n\nADD_TORCH_LIBRARY(TH SHARED \"${src}\")\n\nIF (BUILD_STATIC OR \"$ENV{STATIC_TH}\" STREQUAL \"YES\")\n  ADD_TORCH_LIBRARY(TH_static STATIC \"${src}\")\n  SET_TARGET_PROPERTIES(TH_static PROPERTIES\n    COMPILE_FLAGS \"-fPIC\")\n  SET_TARGET_PROPERTIES(TH_static PROPERTIES\n    PREFIX \"lib\" IMPORT_PREFIX \"lib\" OUTPUT_NAME \"TH\")\nENDIF()\n\nIF(NOT TH_SO_VERSION)\n  SET(TH_SO_VERSION 0)\nENDIF(NOT TH_SO_VERSION)\nMESSAGE(STATUS \"TH_SO_VERSION: ${TH_SO_VERSION}\")\nSET_TARGET_PROPERTIES(TH PROPERTIES\n  VERSION   ${TH_SO_VERSION}\n  SOVERSION ${TH_SO_VERSION})\n\nIF(HAS_C11_ATOMICS)\n  ADD_DEFINITIONS(-DUSE_C11_ATOMICS=1)\n  MESSAGE(STATUS \"Atomics: using C11 intrinsics\")\nELSEIF(HAS_MSC_ATOMICS)\n  ADD_DEFINITIONS(-DUSE_MSC_ATOMICS=1)\n  MESSAGE(STATUS \"Atomics: using MSVC intrinsics\")\nELSEIF(HAS_GCC_ATOMICS)\n  ADD_DEFINITIONS(-DUSE_GCC_ATOMICS=1)\n    MESSAGE(STATUS \"Atomics: using GCC intrinsics\")\nELSE()\n  SET(CMAKE_THREAD_PREFER_PTHREAD TRUE)\n  FIND_PACKAGE(Threads)\n  IF(THREADS_FOUND)\n    ADD_DEFINITIONS(-DUSE_PTHREAD_ATOMICS=1)\n    TARGET_LINK_LIBRARIES(TH ${CMAKE_THREAD_LIBS_INIT})\n    MESSAGE(STATUS \"Atomics: using pthread\")\n  ENDIF()\nENDIF()\n\nFIND_PACKAGE(BLAS)\nIF(BLAS_FOUND)\n  SET(USE_BLAS 1)\n  IF ($ENV{TH_BINARY_BUILD})\n    MESSAGE(STATUS \"TH_BINARY_BUILD detected. Enabling special linkage.\")\n    TARGET_LINK_LIBRARIES(TH \"${BLAS_LIBRARIES};${BLAS_LIBRARIES};${BLAS_LIBRARIES}\")\n  ELSE ($ENV{TH_BINARY_BUILD})\n    TARGET_LINK_LIBRARIES(TH ${BLAS_LIBRARIES})\n  ENDIF ($ENV{TH_BINARY_BUILD})\n  \n  IF(BLAS_INFO STREQUAL \"mkl\")\n    ADD_DEFINITIONS(-DTH_BLAS_MKL)\n  ENDIF()\nENDIF(BLAS_FOUND)\n\nFIND_PACKAGE(LAPACK)\nIF(LAPACK_FOUND)\n  SET(USE_LAPACK 1)\n  TARGET_LINK_LIBRARIES(TH ${LAPACK_LIBRARIES})\nENDIF(LAPACK_FOUND)\n\nIF (UNIX AND NOT APPLE)\n   INCLUDE(CheckLibraryExists)\n   # https://github.com/libgit2/libgit2/issues/2128#issuecomment-35649830\n   CHECK_LIBRARY_EXISTS(rt clock_gettime \"time.h\" NEED_LIBRT)\n   IF(NEED_LIBRT)\n     TARGET_LINK_LIBRARIES(TH rt)\n     SET(CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES} rt)\n   ENDIF(NEED_LIBRT)\nENDIF(UNIX AND NOT APPLE)\n\nIF(UNIX)\n  SET(CMAKE_EXTRA_INCLUDE_FILES \"sys/mman.h\")\n  CHECK_FUNCTION_EXISTS(mmap HAVE_MMAP)\n  IF(HAVE_MMAP)\n    ADD_DEFINITIONS(-DHAVE_MMAP=1)\n  ENDIF(HAVE_MMAP)\n  # done for lseek: https://www.gnu.org/software/libc/manual/html_node/File-Position-Primitive.html\n  ADD_DEFINITIONS(-D_FILE_OFFSET_BITS=64)\n  CHECK_FUNCTION_EXISTS(shm_open HAVE_SHM_OPEN)\n  IF(HAVE_SHM_OPEN)\n    ADD_DEFINITIONS(-DHAVE_SHM_OPEN=1)\n  ENDIF(HAVE_SHM_OPEN)\n  CHECK_FUNCTION_EXISTS(shm_unlink HAVE_SHM_UNLINK)\n  IF(HAVE_SHM_UNLINK)\n    ADD_DEFINITIONS(-DHAVE_SHM_UNLINK=1)\n  ENDIF(HAVE_SHM_UNLINK)\n  CHECK_FUNCTION_EXISTS(malloc_usable_size HAVE_MALLOC_USABLE_SIZE)\n  IF(HAVE_MALLOC_USABLE_SIZE)\n    ADD_DEFINITIONS(-DHAVE_MALLOC_USABLE_SIZE=1)\n  ENDIF(HAVE_MALLOC_USABLE_SIZE)\nENDIF(UNIX)\n\nIF(NOT MSVC)\n  TARGET_LINK_LIBRARIES(TH m)\nENDIF(NOT MSVC)\n\n# Is __thread supported?\nIF(NOT MSVC)\n  CHECK_C_SOURCE_COMPILES(\"static __thread int x = 1; int main() { return x; }\" C_HAS_THREAD)\nELSE(NOT MSVC)\n  CHECK_C_SOURCE_COMPILES(\"static __declspec( thread ) int x = 1; int main() { return x; }\" C_HAS_THREAD)\nENDIF(NOT MSVC)\nIF(NOT C_HAS_THREAD)\n  MESSAGE(STATUS \"Warning: __thread is not supported, generating thread-unsafe code\")\nELSE(NOT C_HAS_THREAD)\n  SET(CMAKE_C_FLAGS \"${CMAKE_C_FLAGS} -DTH_HAVE_THREAD\")\nENDIF(NOT C_HAS_THREAD)\n\nINCLUDE_DIRECTORIES(\"${CMAKE_CURRENT_BINARY_DIR}\")\nCONFIGURE_FILE(THGeneral.h.in \"${CMAKE_CURRENT_BINARY_DIR}/THGeneral.h\")\n\n\n#######################################################################\n##### install section\n######################################################################\n\nINSTALL(TARGETS TH\n  EXPORT TH-exports\n  RUNTIME DESTINATION \"${TH_INSTALL_BIN_SUBDIR}\"\n  LIBRARY DESTINATION \"${TH_INSTALL_LIB_SUBDIR}\"\n  ARCHIVE DESTINATION \"${TH_INSTALL_LIB_SUBDIR}\")\n\nINSTALL(FILES\n  TH.h\n  THAllocator.h\n  THMath.h\n  THBlas.h\n  THDiskFile.h\n  THFile.h\n  THFilePrivate.h\n  ${CMAKE_CURRENT_BINARY_DIR}/THGeneral.h\n  THGenerateAllTypes.h\n  THGenerateDoubleType.h\n  THGenerateFloatType.h\n  THGenerateHalfType.h\n  THGenerateLongType.h\n  THGenerateIntType.h\n  THGenerateShortType.h\n  THGenerateCharType.h\n  THGenerateByteType.h\n  THGenerateFloatTypes.h\n  THGenerateIntTypes.h\n  THLapack.h\n  THLogAdd.h\n  THMemoryFile.h\n  THRandom.h\n  THSize.h\n  THStorage.h\n  THTensor.h\n  THTensorApply.h\n  THTensorDimApply.h\n  THTensorMacros.h\n  THVector.h\n  THAtomic.h\n  THHalf.h\n  DESTINATION \"${TH_INSTALL_INCLUDE_SUBDIR}/TH\")\n\nINSTALL(FILES\n  vector/AVX.h\n  vector/AVX2.h\n  DESTINATION \"${TH_INSTALL_INCLUDE_SUBDIR}/TH/vector\")\n\nINSTALL(FILES\n  generic/THBlas.c\n  generic/THBlas.h\n  generic/THLapack.c\n  generic/THLapack.h\n  generic/THStorage.c\n  generic/THStorage.h\n  generic/THStorageCopy.c\n  generic/THStorageCopy.h\n  generic/THTensor.c\n  generic/THTensor.h\n  generic/THTensorConv.c\n  generic/THTensorConv.h\n  generic/THTensorCopy.c\n  generic/THTensorCopy.h\n  generic/THTensorLapack.c\n  generic/THTensorLapack.h\n  generic/THTensorMath.c\n  generic/THTensorMath.h\n  generic/THTensorRandom.c\n  generic/THTensorRandom.h\n  generic/THVectorDispatch.c\n  generic/THVector.h\n  DESTINATION \"${TH_INSTALL_INCLUDE_SUBDIR}/TH/generic\")\n\n\nIF (WIN32 AND NOT CYGWIN)\n  SET(BLAS_INSTALL_LIBRARIES \"OFF\"\n    CACHE BOOL \"Copy the required BLAS DLLs into the TH install dirs\")\nENDIF (WIN32 AND NOT CYGWIN)\n\nMACRO(Install_Required_Library ln)\n    get_filename_component(libpath ${ln} PATH)\n    get_filename_component(libname ${ln} NAME_WE)\n    file(GLOB libdlls \"${libpath}/${libname}*.dll\")\n    install(PROGRAMS ${libdlls}\n      DESTINATION \"${TH_INSTALL_BIN_SUBDIR}\")\nENDMACRO(Install_Required_Library libname)\n\nIF (BLAS_FOUND AND BLAS_INSTALL_LIBRARIES)\n  IF (BLAS_goto2_LIBRARY)\n    Install_Required_Library(${BLAS_goto2_LIBRARY})\n    Install_Required_Library(\"${libpath}/libgfortran\")\n    Install_Required_Library(\"${libpath}/libquadmath\")\n    Install_Required_Library(\"${libpath}/libgcc\")\n  ENDIF()\n  IF (BLAS_openblas_LIBRARY)\n    Install_Required_Library(${BLAS_openblas_LIBRARY})\n    Install_Required_Library(\"${libpath}/libquadmath\")\n    Install_Required_Library(\"${libpath}/libgfortran\")\n    Install_Required_Library(\"${libpath}/libgcc\")\n  ENDIF()\nENDIF()\n\n# Create THConfig.cmake\nGET_TARGET_PROPERTY(TH_OUTPUT_NAME TH LOCATION)\nGET_FILENAME_COMPONENT(TH_OUTPUT_NAME ${TH_OUTPUT_NAME} NAME)\nSET(TH_LIBRARIES \"${CMAKE_INSTALL_PREFIX}/${TH_INSTALL_LIB_SUBDIR}/${TH_OUTPUT_NAME}\")\nSET(TH_INCLUDE_DIR \"${CMAKE_INSTALL_PREFIX}/${TH_INSTALL_INCLUDE_SUBDIR}/TH\")\nCONFIGURE_FILE(THConfig.cmake.in \"${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/THConfig.cmake\")\nINSTALL(FILES \"${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/THConfig.cmake\"\n  DESTINATION \"${TH_INSTALL_CMAKE_SUBDIR}\")\n"
  },
  {
    "path": "lib/TH/README.md",
    "content": "Environment variables control the disabling of certain explicit SIMD optimizations.\n\n```\nx64 options:\nTH_NO_AVX2=1 # disable AVX2 codepaths\nTH_NO_AVX=1  # disable AVX codepaths\nTH_NO_SSE=1  # disable SSE codepaths\n\nppc64le options:\nTH_NO_VSX=1  # disable VSX codepaths\n```\n"
  },
  {
    "path": "lib/TH/TH.h",
    "content": "#ifndef TH_INC\n#define TH_INC\n\n#include \"THGeneral.h\"\n\n#include \"THBlas.h\"\n#ifdef USE_LAPACK\n#include \"THLapack.h\"\n#endif\n\n#include \"THAtomic.h\"\n#include \"THVector.h\"\n#include \"THLogAdd.h\"\n#include \"THRandom.h\"\n#include \"THSize.h\"\n#include \"THStorage.h\"\n#include \"THTensor.h\"\n#include \"THTensorApply.h\"\n#include \"THTensorDimApply.h\"\n\n#include \"THFile.h\"\n#include \"THDiskFile.h\"\n#include \"THMemoryFile.h\"\n\n#endif\n"
  },
  {
    "path": "lib/TH/THAllocator.c",
    "content": "#include \"THAllocator.h\"\n#include \"THAtomic.h\"\n\n/* stuff for mapped files */\n#ifdef _WIN32\n#include <windows.h>\n#endif\n\n#if HAVE_MMAP\n#include <sys/types.h>\n#include <sys/mman.h>\n#include <sys/stat.h>\n#include <fcntl.h>\n#include <unistd.h>\n#endif\n/* end of stuff for mapped files */\n\nstatic void *THDefaultAllocator_alloc(void* ctx, ptrdiff_t size) {\n  return THAlloc(size);\n}\n\nstatic void *THDefaultAllocator_realloc(void* ctx, void* ptr, ptrdiff_t size) {\n  return THRealloc(ptr, size);\n}\n\nstatic void THDefaultAllocator_free(void* ctx, void* ptr) {\n  THFree(ptr);\n}\n\nTHAllocator THDefaultAllocator = {\n  &THDefaultAllocator_alloc,\n  &THDefaultAllocator_realloc,\n  &THDefaultAllocator_free\n};\n\n#if defined(_WIN32) || defined(HAVE_MMAP)\n\nstruct THMapAllocatorContext_ {\n  char *filename; /* file name */\n  int flags;\n  ptrdiff_t size; /* mapped size */\n  int fd;\n};\n\n#define TH_ALLOC_ALIGNMENT 64\n\ntypedef struct {\n  int refcount;\n} THMapInfo;\n\nchar * unknown_filename = \"filename not specified\";\n\nTHMapAllocatorContext *THMapAllocatorContext_new(const char *filename, int flags)\n{\n  THMapAllocatorContext *ctx = THAlloc(sizeof(THMapAllocatorContext));\n\n  if (!(flags & TH_ALLOCATOR_MAPPED_SHARED) && !(flags & TH_ALLOCATOR_MAPPED_SHAREDMEM))\n    flags &= ~TH_ALLOCATOR_MAPPED_NOCREATE;\n  if ((flags ^ TH_ALLOCATOR_MAPPED_EXCLUSIVE) == 0)\n    THError(\"TH_ALLOCATOR_MAPPED_EXCLUSIVE flag requires opening the file \"\n        \"in shared mode\");\n\n  if (filename) {\n    ctx->filename = THAlloc(strlen(filename)+1);\n    strcpy(ctx->filename, filename);\n  } else {\n    ctx->filename = unknown_filename;\n  }\n  ctx->flags = flags;\n  ctx->size = 0;\n  ctx->fd = -1;\n\n  return ctx;\n}\n\nTHMapAllocatorContext *THMapAllocatorContext_newWithFd(const char *filename, int fd, int flags)\n{\n  THMapAllocatorContext *ctx = THMapAllocatorContext_new(filename, flags);\n  ctx->fd = fd;\n\n  return ctx;\n}\n\nchar * THMapAllocatorContext_filename(THMapAllocatorContext *ctx)\n{\n  return ctx->filename;\n}\n\nint THMapAllocatorContext_fd(THMapAllocatorContext *ctx)\n{\n  return ctx->fd;\n}\n\nptrdiff_t THMapAllocatorContext_size(THMapAllocatorContext *ctx)\n{\n  return ctx->size;\n}\n\nvoid THMapAllocatorContext_free(THMapAllocatorContext *ctx)\n{\n  if (ctx->filename != unknown_filename)\n    THFree(ctx->filename);\n  THFree(ctx);\n}\n\nstatic void *_map_alloc(void* ctx_, ptrdiff_t size)\n{\n  THMapAllocatorContext *ctx = ctx_;\n  void *data = NULL;\n\n#ifdef _WIN32\n  {\n    HANDLE hfile;\n    HANDLE hmfile;\n    LARGE_INTEGER hfilesz;\n\n    if (ctx->flags & TH_ALLOCATOR_MAPPED_EXCLUSIVE)\n      THError(\"exclusive file mapping is not supported on Windows\");\n    if (ctx->flags & TH_ALLOCATOR_MAPPED_NOCREATE)\n      THError(\"file mapping without creation is not supported on Windows\");\n    if (ctx->flags & TH_ALLOCATOR_MAPPED_KEEPFD)\n      THError(\"TH_ALLOCATOR_MAPPED_KEEPFD not supported on Windows\");\n    if (ctx->flags & TH_ALLOCATOR_MAPPED_FROMFD)\n      THError(\"TH_ALLOCATOR_MAPPED_FROMFD not supported on Windows\");\n\n    /* open file */\n    /* FILE_FLAG_RANDOM_ACCESS ? */\n    if(ctx->flags)\n    {\n      hfile = CreateFileA(ctx->filename, GENERIC_READ|GENERIC_WRITE, FILE_SHARE_WRITE|FILE_SHARE_READ, 0, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0);\n      if (hfile == INVALID_HANDLE_VALUE)\n        THError(\"could not open file <%s> in read-write mode; error code: <%d>\", ctx->filename, GetLastError());\n    }\n    else\n    {\n      hfile = CreateFileA(ctx->filename, GENERIC_READ, FILE_SHARE_WRITE|FILE_SHARE_READ, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);\n      if (hfile == INVALID_HANDLE_VALUE)\n        THError(\"could not open file <%s> in read-only mode; error code: <%d>\", ctx->filename, GetLastError());\n    }\n\n    if (GetFileSizeEx(hfile, &hfilesz) == 0)\n    {\n      THError(\"could not get file size: <%s>; error code: <%d>\", ctx->filename, GetLastError());\n    }\n\n    if(size > 0)\n    {\n      if(size > hfilesz.QuadPart)\n      {\n        if(ctx->flags)\n        {\n          hfilesz.QuadPart = size;\n          if(SetFilePointerEx(hfile, hfilesz, NULL, FILE_BEGIN) == 0)\n          {\n            CloseHandle(hfile);\n            THError(\"unable to stretch file <%s> to the right size; error code: <%d>\", ctx->filename, GetLastError());\n          }\n          if(SetEndOfFile(hfile) == 0)\n          {\n            CloseHandle(hfile);\n            THError(\"unable to write to file <%s>; error code: <%d>\", ctx->filename, GetLastError());\n          }\n        }\n        else\n        {\n          CloseHandle(hfile);\n          THError(\"file <%s> size is smaller than the required mapping size <%ld>; error code: <%d>\", ctx->filename, size, GetLastError());\n        }\n      }\n    }\n    else\n      size = hfilesz.QuadPart;\n\n    ctx->size = size; /* if we are here, it must be the right size */\n\n    hfilesz.QuadPart = ctx->size;\n\n    /* get map handle */\n    if(ctx->flags)\n    {\n      if( (hmfile = CreateFileMapping(hfile, NULL, PAGE_READWRITE, hfilesz.HighPart, hfilesz.LowPart, NULL)) == NULL )\n        THError(\"could not create a map on file <%s>; error code: <%d>\", ctx->filename, GetLastError());\n    }\n    else\n    {\n      if( (hmfile = CreateFileMapping(hfile, NULL, PAGE_WRITECOPY, hfilesz.HighPart, hfilesz.LowPart, NULL)) == NULL )\n        THError(\"could not create a map on file <%s>; error code: <%d>\", ctx->filename, GetLastError());\n    }\n\n    /* map the stuff */\n    if(ctx->flags)\n      data = MapViewOfFile(hmfile, FILE_MAP_ALL_ACCESS, 0, 0, 0);\n    else\n      data = MapViewOfFile(hmfile, FILE_MAP_COPY, 0, 0, 0);\n\n    CloseHandle(hfile);\n    CloseHandle(hmfile);\n  }\n#else /* _WIN32 */\n  {\n    /* open file */\n    int fd;\n    int flags;\n    struct stat file_stat;\n\n    if (ctx->flags & (TH_ALLOCATOR_MAPPED_SHARED | TH_ALLOCATOR_MAPPED_SHAREDMEM))\n      flags = O_RDWR | O_CREAT;\n    else\n      flags = O_RDONLY;\n\n    if (ctx->flags & TH_ALLOCATOR_MAPPED_EXCLUSIVE)\n      flags |= O_EXCL;\n    if (ctx->flags & TH_ALLOCATOR_MAPPED_NOCREATE)\n      flags &= ~O_CREAT;\n\n    if (!(ctx->flags & TH_ALLOCATOR_MAPPED_FROMFD)) {\n      if(ctx->flags & TH_ALLOCATOR_MAPPED_SHARED)\n      {\n        if((fd = open(ctx->filename, flags, (mode_t)0600)) == -1)\n          THError(\"unable to open file <%s> in read-write mode\", ctx->filename);\n      }\n      else if (ctx->flags & TH_ALLOCATOR_MAPPED_SHAREDMEM)\n      {\n#ifdef HAVE_SHM_OPEN\n        if((fd = shm_open(ctx->filename, flags, (mode_t)0600)) == -1)\n          THError(\"unable to open shared memory object <%s> in read-write mode\", ctx->filename);\n#else\n        THError(\"unable to open file <%s> in sharedmem mode, shm_open unavailable on this platform\", ctx->filename);\n#endif\n      }\n      else\n      {\n        if((fd = open(ctx->filename, O_RDONLY)) == -1)\n          THError(\"unable to open file <%s> in read-only mode\", ctx->filename);\n      }\n    } else {\n      fd = ctx->fd;\n    }\n\n    if(fstat(fd, &file_stat) == -1)\n    {\n      if (!(ctx->flags & TH_ALLOCATOR_MAPPED_FROMFD))\n        close(fd);\n      THError(\"unable to stat the file <%s>\", ctx->filename);\n    }\n\n    if(size > 0)\n    {\n      if(size > file_stat.st_size)\n      {\n        if(ctx->flags)\n        {\n          if(ftruncate(fd, size) == -1)\n            THError(\"unable to resize file <%s> to the right size\", ctx->filename);\n          if(fstat(fd, &file_stat) == -1 || file_stat.st_size < size)\n          {\n            close(fd);\n            THError(\"unable to stretch file <%s> to the right size\", ctx->filename);\n          }\n/* on OS X write returns with errno 45 (Opperation not supported) when used\n * with a file descriptor obtained via shm_open\n */\n#ifndef __APPLE__\n          if((write(fd, \"\", 1)) != 1) /* note that the string \"\" contains the '\\0' byte ... */\n          {\n            close(fd);\n            THError(\"unable to write to file <%s>\", ctx->filename);\n          }\n#endif\n        }\n        else\n        {\n          close(fd);\n          THError(\"file <%s> size is smaller than the required mapping size <%ld>\", ctx->filename, size);\n        }\n      }\n    }\n    else\n      size = file_stat.st_size;\n\n    ctx->size = size; /* if we are here, it must be the right size */\n\n    /* map it */\n    if (ctx->flags & (TH_ALLOCATOR_MAPPED_SHARED | TH_ALLOCATOR_MAPPED_SHAREDMEM))\n      data = mmap(NULL, ctx->size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);\n    else\n      data = mmap(NULL, ctx->size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);\n\n    if (ctx->flags & TH_ALLOCATOR_MAPPED_KEEPFD) {\n      ctx->fd = fd;\n    } else {\n      if(close(fd) == -1)\n        THError(\"Error closing file <%s>\", ctx->filename);\n      ctx->fd = -1;\n    }\n\n    if (ctx->flags & TH_ALLOCATOR_MAPPED_UNLINK) {\n      if (ctx->flags & TH_ALLOCATOR_MAPPED_SHAREDMEM)\n      {\n#ifdef HAVE_SHM_UNLINK\n        if (shm_unlink(ctx->filename) == -1)\n          THError(\"could not unlink the shared memory file %s\", ctx->filename);\n#else\n        THError(\"could not unlink the shared memory file %s, shm_unlink not available on platform\", ctx->filename);\n#endif\n      }\n      else\n      {\n        if (unlink(ctx->filename) == -1)\n          THError(\"could not unlink file %s\", ctx->filename);\n      }\n    }\n\n    if(data == MAP_FAILED)\n    {\n      data = NULL; /* let's be sure it is NULL */\n      THError(\"$ Torch: unable to mmap memory: you tried to mmap %dGB.\", ctx->size/1073741824);\n    }\n  }\n#endif\n\n  return data;\n}\n\nstatic void * THMapAllocator_alloc(void *ctx, ptrdiff_t size) {\n  return _map_alloc(ctx, size);\n}\n\nstatic void *THMapAllocator_realloc(void* ctx, void* ptr, ptrdiff_t size) {\n  THError(\"cannot realloc mapped data\");\n  return NULL;\n}\n\nstatic void THMapAllocator_free(void* ctx_, void* data) {\n  THMapAllocatorContext *ctx = ctx_;\n\n#ifdef _WIN32\n  if(UnmapViewOfFile(data) == 0)\n    THError(\"could not unmap the shared memory file\");\n#else /* _WIN32 */\n  if (ctx->flags & TH_ALLOCATOR_MAPPED_KEEPFD) {\n    if (close(ctx->fd) == -1)\n      THError(\"could not close file descriptor %d\", ctx->fd);\n  }\n\n  if (munmap(data, ctx->size))\n    THError(\"could not unmap the shared memory file\");\n\n  if (!(ctx->flags & (TH_ALLOCATOR_MAPPED_FROMFD | TH_ALLOCATOR_MAPPED_UNLINK)))\n  {\n    if (ctx->flags & TH_ALLOCATOR_MAPPED_SHAREDMEM)\n    {\n#ifdef HAVE_SHM_UNLINK\n      if (shm_unlink(ctx->filename) == -1)\n        THError(\"could not unlink the shared memory file %s\", ctx->filename);\n#else\n      THError(\"could not unlink the shared memory file %s, shm_unlink not available on platform\", ctx->filename);\n#endif\n    }\n  }\n#endif /* _WIN32 */\n\n  THMapAllocatorContext_free(ctx);\n}\n\n#else\n\nTHMapAllocatorContext *THMapAllocatorContext_new(const char *filename, int flags) {\n  THError(\"file mapping not supported on your system\");\n  return NULL;\n}\n\nvoid THMapAllocatorContext_free(THMapAllocatorContext *ctx) {\n  THError(\"file mapping not supported on your system\");\n}\n\nstatic void *THMapAllocator_alloc(void* ctx_, ptrdiff_t size) {\n  THError(\"file mapping not supported on your system\");\n  return NULL;\n}\n\nstatic void *THMapAllocator_realloc(void* ctx, void* ptr, ptrdiff_t size) {\n  THError(\"file mapping not supported on your system\");\n  return NULL;\n}\n\nstatic void THMapAllocator_free(void* ctx, void* data) {\n  THError(\"file mapping not supported on your system\");\n}\n\n#endif\n\n#if (defined(_WIN32) || defined(HAVE_MMAP)) && defined(TH_ATOMIC_IPC_REFCOUNT)\n\nstatic void * THRefcountedMapAllocator_alloc(void *_ctx, ptrdiff_t size) {\n  THMapAllocatorContext *ctx = _ctx;\n\n  if (ctx->flags & TH_ALLOCATOR_MAPPED_FROMFD)\n    THError(\"THRefcountedMapAllocator doesn't support TH_ALLOCATOR_MAPPED_FROMFD flag\");\n  if (ctx->flags & TH_ALLOCATOR_MAPPED_KEEPFD)\n    THError(\"THRefcountedMapAllocator doesn't support TH_ALLOCATOR_MAPPED_KEEPFD flag\");\n  if (ctx->flags & TH_ALLOCATOR_MAPPED_UNLINK)\n    THError(\"THRefcountedMapAllocator doesn't support TH_ALLOCATOR_MAPPED_UNLINK flag\");\n  if (!(ctx->flags & TH_ALLOCATOR_MAPPED_SHAREDMEM))\n    THError(\"THRefcountedMapAllocator requires TH_ALLOCATOR_MAPPED_SHAREDMEM flag\");\n\n  size = size + TH_ALLOC_ALIGNMENT;\n  void *ptr = _map_alloc(ctx, size);\n  char *data = ((char*)ptr) + TH_ALLOC_ALIGNMENT;\n  THMapInfo *map_info = (THMapInfo*)ptr;\n\n  if (ctx->flags & TH_ALLOCATOR_MAPPED_EXCLUSIVE)\n    map_info->refcount = 1;\n  else\n    THAtomicIncrementRef(&map_info->refcount);\n\n  return (void*)data;\n}\n\nstatic void *THRefcountedMapAllocator_realloc(void* ctx, void* ptr, ptrdiff_t size) {\n  THError(\"cannot realloc mapped data\");\n  return NULL;\n}\n\nstatic void THRefcountedMapAllocator_free(void* ctx_, void* data) {\n  THMapAllocatorContext *ctx = ctx_;\n\n#ifdef _WIN32\n  if(UnmapViewOfFile(data) == 0)\n    THError(\"could not unmap the shared memory file\");\n#else /* _WIN32 */\n\n  THMapInfo *info = (THMapInfo*)(((char*)data) - TH_ALLOC_ALIGNMENT);\n  if (THAtomicDecrementRef(&info->refcount)) {\n#ifdef HAVE_SHM_UNLINK\n    if (shm_unlink(ctx->filename) == -1)\n      THError(\"could not unlink the shared memory file %s\", ctx->filename);\n#else\n    THError(\"could not unlink the shared memory file %s, shm_unlink not available on platform\", ctx->filename);\n#endif /* HAVE_SHM_UNLINK */\n  }\n  if (munmap(info, ctx->size))\n    THError(\"could not unmap the shared memory file %s\", ctx->filename);\n#endif /* _WIN32 */\n\n  THMapAllocatorContext_free(ctx);\n}\n\nvoid THRefcountedMapAllocator_incref(THMapAllocatorContext *ctx, void *data)\n{\n  THMapInfo *map_info = (THMapInfo*)(((char*)data) - TH_ALLOC_ALIGNMENT);\n  THAtomicIncrementRef(&map_info->refcount);\n}\n\nint THRefcountedMapAllocator_decref(THMapAllocatorContext *ctx, void *data)\n{\n  THMapInfo *map_info = (THMapInfo*)(((char*)data) - TH_ALLOC_ALIGNMENT);\n  return THAtomicDecrementRef(&map_info->refcount);\n}\n\n#else\n\nstatic void * THRefcountedMapAllocator_alloc(void *ctx, ptrdiff_t size) {\n  THError(\"refcounted file mapping not supported on your system\");\n  return NULL;\n}\n\nstatic void *THRefcountedMapAllocator_realloc(void* ctx, void* ptr, ptrdiff_t size) {\n  THError(\"refcounted file mapping not supported on your system\");\n  return NULL;\n}\n\nstatic void THRefcountedMapAllocator_free(void* ctx_, void* data) {\n  THError(\"refcounted file mapping not supported on your system\");\n}\n\nvoid THRefcountedMapAllocator_incref(THMapAllocatorContext *ctx, void *data)\n{\n  THError(\"refcounted file mapping not supported on your system\");\n}\n\nint THRefcountedMapAllocator_decref(THMapAllocatorContext *ctx, void *data)\n{\n  THError(\"refcounted file mapping not supported on your system\");\n  return 0;\n}\n\n#endif\n\nTHAllocator THMapAllocator = {\n  &THMapAllocator_alloc,\n  &THMapAllocator_realloc,\n  &THMapAllocator_free\n};\n\nTHAllocator THRefcountedMapAllocator = {\n  &THRefcountedMapAllocator_alloc,\n  &THRefcountedMapAllocator_realloc,\n  &THRefcountedMapAllocator_free\n};\n"
  },
  {
    "path": "lib/TH/THAllocator.h",
    "content": "#ifndef TH_ALLOCATOR_INC\n#define TH_ALLOCATOR_INC\n\n#include \"THGeneral.h\"\n\n#define TH_ALLOCATOR_MAPPED_SHARED 1\n#define TH_ALLOCATOR_MAPPED_SHAREDMEM 2\n#define TH_ALLOCATOR_MAPPED_EXCLUSIVE 4\n#define TH_ALLOCATOR_MAPPED_NOCREATE 8\n#define TH_ALLOCATOR_MAPPED_KEEPFD 16\n#define TH_ALLOCATOR_MAPPED_FROMFD 32\n#define TH_ALLOCATOR_MAPPED_UNLINK 64\n\n/* Custom allocator\n */\ntypedef struct THAllocator {\n  void* (*malloc)(void*, ptrdiff_t);\n  void* (*realloc)(void*, void*, ptrdiff_t);\n  void (*free)(void*, void*);\n} THAllocator;\n\n/* default malloc/free allocator. malloc and realloc raise an error (using\n * THError) on allocation failure.\n */\nextern THAllocator THDefaultAllocator;\n\n/* file map allocator\n */\ntypedef struct THMapAllocatorContext_  THMapAllocatorContext;\nTH_API THMapAllocatorContext *THMapAllocatorContext_new(const char *filename, int flags);\nTH_API THMapAllocatorContext *THMapAllocatorContext_newWithFd(const char *filename,\n    int fd, int flags);\nTH_API char * THMapAllocatorContext_filename(THMapAllocatorContext *ctx);\nTH_API int THMapAllocatorContext_fd(THMapAllocatorContext *ctx);\nTH_API ptrdiff_t THMapAllocatorContext_size(THMapAllocatorContext *ctx);\nTH_API void THMapAllocatorContext_free(THMapAllocatorContext *ctx);\nTH_API void THRefcountedMapAllocator_incref(THMapAllocatorContext *ctx, void *data);\nTH_API int THRefcountedMapAllocator_decref(THMapAllocatorContext *ctx, void *data);\n\nextern THAllocator THMapAllocator;\nextern THAllocator THRefcountedMapAllocator;\n\n#endif\n"
  },
  {
    "path": "lib/TH/THAtomic.c",
    "content": "#include \"THAtomic.h\"\n\n/*\n  Note: I thank Leon Bottou for his useful comments.\n  Ronan.\n*/\n\n#if defined(USE_C11_ATOMICS)\n#include <stdatomic.h>\n#endif\n\n#if defined(USE_MSC_ATOMICS)\n#include <intrin.h>\n#include <assert.h>\n#endif\n\n#if !defined(USE_MSC_ATOMICS) && !defined(USE_GCC_ATOMICS) && defined(USE_PTHREAD_ATOMICS)\n#include <pthread.h>\nstatic pthread_mutex_t ptm = PTHREAD_MUTEX_INITIALIZER;\n#endif\n\nvoid THAtomicSet(int volatile *a, int newvalue)\n{\n#if defined(USE_C11_ATOMICS)\n  atomic_store(a, newvalue);\n#elif defined(USE_MSC_ATOMICS)\n  assert(sizeof(int) == sizeof(long));\n  _InterlockedExchange((long*)a, newvalue);\n#elif defined(USE_GCC_ATOMICS)\n  __sync_lock_test_and_set(a, newvalue);\n#else\n  int oldvalue;\n  do {\n    oldvalue = *a;\n  } while (!THAtomicCompareAndSwap(a, oldvalue, newvalue));\n#endif\n}\n\nint THAtomicGet(int volatile *a)\n{\n#if defined(USE_C11_ATOMICS)\n  return atomic_load(a);\n#else\n  int value;\n  do {\n    value = *a;\n  } while (!THAtomicCompareAndSwap(a, value, value));\n  return value;\n#endif\n}\n\nint THAtomicAdd(int volatile *a, int value)\n{\n#if defined(USE_C11_ATOMICS)\n  return atomic_fetch_add(a, value);\n#elif defined(USE_MSC_ATOMICS)\n  assert(sizeof(int) == sizeof(long));\n  return _InterlockedExchangeAdd((long*)a, value);\n#elif defined(USE_GCC_ATOMICS)\n  return __sync_fetch_and_add(a, value);\n#else\n  int oldvalue;\n  do {\n    oldvalue = *a;\n  } while (!THAtomicCompareAndSwap(a, oldvalue, (oldvalue + value)));\n  return oldvalue;\n#endif\n}\n\nvoid THAtomicIncrementRef(int volatile *a)\n{\n  THAtomicAdd(a, 1);\n}\n\nint THAtomicDecrementRef(int volatile *a)\n{\n  return (THAtomicAdd(a, -1) == 1);\n}\n\nint THAtomicCompareAndSwap(int volatile *a, int oldvalue, int newvalue)\n{\n#if defined(USE_C11_ATOMICS)\n  return atomic_compare_exchange_strong(a, &oldvalue, newvalue);\n#elif defined(USE_MSC_ATOMICS)\n  assert(sizeof(int) == sizeof(long));\n  return (_InterlockedCompareExchange((long*)a, (long)newvalue, (long)oldvalue) == (long)oldvalue);\n#elif defined(USE_GCC_ATOMICS)\n  return __sync_bool_compare_and_swap(a, oldvalue, newvalue);\n#elif defined(USE_PTHREAD_ATOMICS)\n  int ret = 0;\n  pthread_mutex_lock(&ptm);\n  if(*a == oldvalue) {\n    *a = newvalue;\n    ret = 1;\n  }\n  pthread_mutex_unlock(&ptm);\n  return ret;\n#else\n#warning THAtomic is not thread safe\n  if(*a == oldvalue) {\n    *a = newvalue;\n    return 1;\n  }\n  else\n    return 0;\n#endif\n}\n\nvoid THAtomicSetLong(long volatile *a, long newvalue)\n{\n#if defined(USE_C11_ATOMICS)\n  atomic_store(a, newvalue);\n#elif defined(USE_MSC_ATOMICS)\n  _InterlockedExchange(a, newvalue);\n#elif defined(USE_GCC_ATOMICS)\n  __sync_lock_test_and_set(a, newvalue);\n#else\n  long oldvalue;\n  do {\n    oldvalue = *a;\n  } while (!THAtomicCompareAndSwapLong(a, oldvalue, newvalue));\n#endif\n}\n\nlong THAtomicGetLong(long volatile *a)\n{\n#if defined(USE_C11_ATOMICS)\n  return atomic_load(a);\n#else\n  long value;\n  do {\n    value = *a;\n  } while (!THAtomicCompareAndSwapLong(a, value, value));\n  return value;\n#endif\n}\n\nlong THAtomicAddLong(long volatile *a, long value)\n{\n#if defined(USE_C11_ATOMICS)\n  return atomic_fetch_add(a, value);\n#elif defined(USE_MSC_ATOMICS)\n  return _InterlockedExchangeAdd(a, value);\n#elif defined(USE_GCC_ATOMICS)\n  return __sync_fetch_and_add(a, value);\n#else\n  long oldvalue;\n  do {\n    oldvalue = *a;\n  } while (!THAtomicCompareAndSwapLong(a, oldvalue, (oldvalue + value)));\n  return oldvalue;\n#endif\n}\n\nlong THAtomicCompareAndSwapLong(long volatile *a, long oldvalue, long newvalue)\n{\n#if defined(USE_C11_ATOMICS)\n  return atomic_compare_exchange_strong(a, &oldvalue, newvalue);\n#elif defined(USE_MSC_ATOMICS)\n  return (_InterlockedCompareExchange(a, newvalue, oldvalue) == oldvalue);\n#elif defined(USE_GCC_ATOMICS)\n  return __sync_bool_compare_and_swap(a, oldvalue, newvalue);\n#elif defined(USE_PTHREAD_ATOMICS)\n  long ret = 0;\n  pthread_mutex_lock(&ptm);\n  if(*a == oldvalue) {\n    *a = newvalue;\n    ret = 1;\n  }\n  pthread_mutex_unlock(&ptm);\n  return ret;\n#else\n#warning THAtomic is not thread safe\n  if(*a == oldvalue) {\n    *a = newvalue;\n    return 1;\n  }\n  else\n    return 0;\n#endif\n}\n\nvoid THAtomicSetPtrdiff(ptrdiff_t volatile *a, ptrdiff_t newvalue)\n{\n#if defined(USE_C11_ATOMICS)\n  atomic_store(a, newvalue);\n#elif defined(USE_MSC_ATOMICS)\n#ifdef _WIN64\n  _InterlockedExchange64(a, newvalue);\n#else\n  _InterlockedExchange(a, newvalue);\n#endif\n#elif defined(USE_GCC_ATOMICS)\n  __sync_lock_test_and_set(a, newvalue);\n#else\n  ptrdiff_t oldvalue;\n  do {\n    oldvalue = *a;\n  } while (!THAtomicCompareAndSwapPtrdiff(a, oldvalue, newvalue));\n#endif\n}\n\nptrdiff_t THAtomicGetPtrdiff(ptrdiff_t volatile *a)\n{\n#if defined(USE_C11_ATOMICS)\n  return atomic_load(a);\n#else\n  ptrdiff_t value;\n  do {\n    value = *a;\n  } while (!THAtomicCompareAndSwapPtrdiff(a, value, value));\n  return value;\n#endif\n}\n\nptrdiff_t THAtomicAddPtrdiff(ptrdiff_t volatile *a, ptrdiff_t value)\n{\n#if defined(USE_C11_ATOMICS)\n  return atomic_fetch_add(a, value);\n#elif defined(USE_MSC_ATOMICS)\n#ifdef _WIN64\n  return _InterlockedExchangeAdd64(a, value);\n#else\n  return _InterlockedExchangeAdd(a, value);\n#endif\n#elif defined(USE_GCC_ATOMICS)\n  return __sync_fetch_and_add(a, value);\n#else\n  ptrdiff_t oldvalue;\n  do {\n    oldvalue = *a;\n  } while (!THAtomicCompareAndSwapPtrdiff(a, oldvalue, (oldvalue + value)));\n  return oldvalue;\n#endif\n}\n\nptrdiff_t THAtomicCompareAndSwapPtrdiff(ptrdiff_t volatile *a, ptrdiff_t oldvalue, ptrdiff_t newvalue)\n{\n#if defined(USE_C11_ATOMICS)\n  return atomic_compare_exchange_strong(a, &oldvalue, newvalue);\n#elif defined(USE_MSC_ATOMICS)\n#ifdef _WIN64\n  return (_InterlockedCompareExchange64(a, newvalue, oldvalue) == oldvalue);\n#else\n  return (_InterlockedCompareExchange(a, newvalue, oldvalue) == oldvalue);\n#endif\n#elif defined(USE_GCC_ATOMICS)\n  return __sync_bool_compare_and_swap(a, oldvalue, newvalue);\n#elif defined(USE_PTHREAD_ATOMICS)\n  ptrdiff_t ret = 0;\n  pthread_mutex_lock(&ptm);\n  if(*a == oldvalue) {\n    *a = newvalue;\n    ret = 1;\n  }\n  pthread_mutex_unlock(&ptm);\n  return ret;\n#else\n#warning THAtomic is not thread safe\n  if(*a == oldvalue) {\n    *a = newvalue;\n    return 1;\n  }\n  else\n    return 0;\n#endif\n}\n"
  },
  {
    "path": "lib/TH/THAtomic.h",
    "content": "#ifndef TH_ATOMIC_INC\n#define TH_ATOMIC_INC\n\n#include \"THGeneral.h\"\n\n/******************************************************************************\n * Atomic operations for TH\n *  Five backends are integrated:\n *  - C11 atomic operations\n *  - MSVC intrinsics\n *  - GCC intrinsics\n *  - Pthread if none of the above is available\n *  - Unsafe mode in none of the above is available\n ******************************************************************************/\n\n\n/******************************************************************************\n * all-purpose functions\n ******************************************************************************/\n\n/*\n * *a = newvalue\n*/\nTH_API void THAtomicSet(int volatile *a, int newvalue);\n\n/*\n * return *a\n*/\nTH_API int THAtomicGet(int volatile *a);\n\n/*\n * *a += value,\n * return previous *a\n*/\nTH_API int THAtomicAdd(int volatile *a, int value);\n\n/*\n * check if (*a == oldvalue)\n * if true: set *a to newvalue, return 1\n * if false: return 0\n*/\nTH_API int THAtomicCompareAndSwap(int volatile *a, int oldvalue, int newvalue);\n\n\n/******************************************************************************\n * refcounting functions\n ******************************************************************************/\n\n/*\n * *a++\n*/\nTH_API void THAtomicIncrementRef(int volatile *a);\n\n/*\n * *a--,\n * return 1 if *a == 0 after the operation, 0 otherwise\n*/\nTH_API int THAtomicDecrementRef(int volatile *a);\n\n\n\n/******************************************************************************\n * functions for long type\n ******************************************************************************/\n\n/*\n * *a = newvalue\n*/\nTH_API void THAtomicSetLong(long volatile *a, long newvalue);\n\n/*\n * return *a\n*/\nTH_API long THAtomicGetLong(long volatile *a);\n\n/*\n * *a += value,\n * return previous *a\n*/\nTH_API long THAtomicAddLong(long volatile *a, long value);\n\n/*\n * check if (*a == oldvalue)\n * if true: set *a to newvalue, return 1\n * if false: return 0\n*/\nTH_API long THAtomicCompareAndSwapLong(long volatile *a, long oldvalue, long newvalue);\n\n\n\n/******************************************************************************\n * functions for ptrdiff_t type\n ******************************************************************************/\n\n/*\n * *a = newvalue\n*/\nTH_API void THAtomicSetPtrdiff(ptrdiff_t volatile *a, ptrdiff_t newvalue);\n\n/*\n * return *a\n*/\nTH_API ptrdiff_t THAtomicGetPtrdiff(ptrdiff_t volatile *a);\n\n/*\n * *a += value,\n * return previous *a\n*/\nTH_API ptrdiff_t THAtomicAddPtrdiff(ptrdiff_t volatile *a, ptrdiff_t value);\n\n/*\n * check if (*a == oldvalue)\n * if true: set *a to newvalue, return 1\n * if false: return 0\n*/\nTH_API ptrdiff_t THAtomicCompareAndSwapPtrdiff(ptrdiff_t volatile *a, ptrdiff_t oldvalue, ptrdiff_t newvalue);\n\n#if defined(USE_C11_ATOMICS) && defined(ATOMIC_INT_LOCK_FREE) && \\\n  ATOMIC_INT_LOCK_FREE == 2\n#define TH_ATOMIC_IPC_REFCOUNT 1\n#elif defined(USE_MSC_ATOMICS) || defined(USE_GCC_ATOMICS)\n#define TH_ATOMIC_IPC_REFCOUNT 1\n#endif\n\n#endif\n"
  },
  {
    "path": "lib/TH/THBlas.c",
    "content": "#include \"THBlas.h\"\n\n#include \"generic/THBlas.c\"\n#include \"THGenerateAllTypes.h\"\n"
  },
  {
    "path": "lib/TH/THBlas.h",
    "content": "#ifndef TH_BLAS_INC\n#define TH_BLAS_INC\n\n#include \"THGeneral.h\"\n\n#define THBlas_(NAME) TH_CONCAT_4(TH,Real,Blas_,NAME)\n\n#include \"generic/THBlas.h\"\n#include \"THGenerateAllTypes.h\"\n\n#endif\n"
  },
  {
    "path": "lib/TH/THConfig.cmake.in",
    "content": "# Find the TH includes and library\n#\n# TH_INCLUDE_DIR -- where to find the includes\n# TH_LIBRARIES -- list of libraries to link against\n# TH_FOUND -- set to 1 if found\n\nSET(TH_FOUND 1)\nSET(TH_INCLUDE_DIR \"@TH_INCLUDE_DIR@\")\nSET(TH_LIBRARIES \"@TH_LIBRARIES@\")\n"
  },
  {
    "path": "lib/TH/THDiskFile.c",
    "content": "#include \"THGeneral.h\"\n#include \"THDiskFile.h\"\n#include \"THFilePrivate.h\"\n\n#include <stdint.h>\n#ifndef LLONG_MAX\n#define LLONG_MAX 9223372036854775807LL\n#endif\n\ntypedef struct THDiskFile__\n{\n    THFile file;\n\n    FILE *handle;\n    char *name;\n    int isNativeEncoding;\n    int longSize;\n\n} THDiskFile;\n\nstatic int THDiskFile_isOpened(THFile *self)\n{\n  THDiskFile *dfself = (THDiskFile*)self;\n  return (dfself->handle != NULL);\n}\n\nconst char *THDiskFile_name(THFile *self)\n{\n  THDiskFile *dfself = (THDiskFile*)self;\n  return dfself->name;\n}\n\n/* workaround mac osx lion ***insane*** fread bug */\n#ifdef __APPLE__\nsize_t fread__(void *ptr, size_t size, size_t nitems, FILE *stream)\n{\n  size_t nread = 0;\n  while(!feof(stream) && !ferror(stream) && (nread < nitems))\n    nread += fread((char*)ptr+nread*size, size, THMin(2147483648/size, nitems-nread), stream);\n  return nread;\n}\n#else\n#define fread__ fread\n#endif\n\n#define READ_WRITE_METHODS(TYPE, TYPEC, ASCII_READ_ELEM, ASCII_WRITE_ELEM) \\\n  static size_t THDiskFile_read##TYPEC(THFile *self, TYPE *data, size_t n)  \\\n  {                                                                     \\\n    THDiskFile *dfself = (THDiskFile*)(self);                           \\\n    size_t nread = 0L;                                                    \\\n                                                                        \\\n    THArgCheck(dfself->handle != NULL, 1, \"attempt to use a closed file\"); \\\n    THArgCheck(dfself->file.isReadable, 1, \"attempt to read in a write-only file\"); \\\n                                                                        \\\n    if(dfself->file.isBinary)                                           \\\n    {                                                                   \\\n      nread = fread__(data, sizeof(TYPE), n, dfself->handle);           \\\n      if(!dfself->isNativeEncoding && (sizeof(TYPE) > 1) && (nread > 0)) \\\n        THDiskFile_reverseMemory(data, data, sizeof(TYPE), nread);      \\\n    }                                                                   \\\n    else                                                                \\\n    {                                                                   \\\n      size_t i;                                                           \\\n      for(i = 0; i < n; i++)                                            \\\n      {                                                                 \\\n        ASCII_READ_ELEM; /* increment here result and break if wrong */ \\\n      }                                                                 \\\n      if(dfself->file.isAutoSpacing && (n > 0))                         \\\n      {                                                                 \\\n        int c = fgetc(dfself->handle);                                  \\\n        if( (c != '\\n') && (c != EOF) )                                 \\\n          ungetc(c, dfself->handle);                                    \\\n      }                                                                 \\\n    }                                                                   \\\n                                                                        \\\n    if(nread != n)                                                      \\\n    {                                                                   \\\n      dfself->file.hasError = 1; /* shouldn't we put hasError to 0 all the time ? */ \\\n      if(!dfself->file.isQuiet)                                         \\\n        THError(\"read error: read %d blocks instead of %d\", nread, n);  \\\n    }                                                                   \\\n                                                                        \\\n    return nread;                                                       \\\n  }                                                                     \\\n                                                                        \\\n  static size_t THDiskFile_write##TYPEC(THFile *self, TYPE *data, size_t n) \\\n  {                                                                     \\\n    THDiskFile *dfself = (THDiskFile*)(self);                           \\\n    size_t nwrite = 0L;                                                   \\\n                                                                        \\\n    THArgCheck(dfself->handle != NULL, 1, \"attempt to use a closed file\"); \\\n    THArgCheck(dfself->file.isWritable, 1, \"attempt to write in a read-only file\"); \\\n                                                                        \\\n    if(dfself->file.isBinary)                                           \\\n    {                                                                   \\\n      if(dfself->isNativeEncoding)                                      \\\n      {                                                                 \\\n        nwrite = fwrite(data, sizeof(TYPE), n, dfself->handle);         \\\n      }                                                                 \\\n      else                                                              \\\n      {                                                                 \\\n        if(sizeof(TYPE) > 1)                                            \\\n        {                                                               \\\n          char *buffer = THAlloc(sizeof(TYPE)*n);                       \\\n          THDiskFile_reverseMemory(buffer, data, sizeof(TYPE), n);      \\\n          nwrite = fwrite(buffer, sizeof(TYPE), n, dfself->handle);     \\\n          THFree(buffer);                                               \\\n        }                                                               \\\n        else                                                            \\\n          nwrite = fwrite(data, sizeof(TYPE), n, dfself->handle);       \\\n      }                                                                 \\\n    }                                                                   \\\n    else                                                                \\\n    {                                                                   \\\n      size_t i;                                                           \\\n      for(i = 0; i < n; i++)                                            \\\n      {                                                                 \\\n        ASCII_WRITE_ELEM;                                               \\\n        if( dfself->file.isAutoSpacing && (i < n-1) )                   \\\n          fprintf(dfself->handle, \" \");                                 \\\n      }                                                                 \\\n      if(dfself->file.isAutoSpacing && (n > 0))                         \\\n        fprintf(dfself->handle, \"\\n\");                                  \\\n    }                                                                   \\\n                                                                        \\\n    if(nwrite != n)                                                     \\\n    {                                                                   \\\n      dfself->file.hasError = 1;                                        \\\n      if(!dfself->file.isQuiet)                                         \\\n        THError(\"write error: wrote %d blocks instead of %d\", nwrite, n); \\\n    }                                                                   \\\n                                                                        \\\n    return nwrite;                                                      \\\n}\n\nstatic int THDiskFile_mode(const char *mode, int *isReadable, int *isWritable)\n{\n  *isReadable = 0;\n  *isWritable = 0;\n  if(strlen(mode) == 1)\n  {\n    if(*mode == 'r')\n    {\n      *isReadable = 1;\n      return 1;\n    }\n    else if(*mode == 'w')\n    {\n      *isWritable = 1;\n      return 1;\n    }\n  }\n  else if(strlen(mode) == 2)\n  {\n    if(mode[0] == 'r' && mode[1] == 'w')\n    {\n      *isReadable = 1;\n      *isWritable = 1;\n      return 1;\n    }\n  }\n  return 0;\n}\n\nstatic void THDiskFile_synchronize(THFile *self)\n{\n  THDiskFile *dfself = (THDiskFile*)(self);\n  THArgCheck(dfself->handle != NULL, 1, \"attempt to use a closed file\");\n  fflush(dfself->handle);\n}\n\nstatic void THDiskFile_seek(THFile *self, size_t position)\n{\n  THDiskFile *dfself = (THDiskFile*)(self);\n\n  THArgCheck(dfself->handle != NULL, 1, \"attempt to use a closed file\");\n\n#if defined(_WIN64)\n  THArgCheck(position <= (size_t)INT64_MAX, 2, \"position must be smaller than INT64_MAX\");\n  if(_fseeki64(dfself->handle, (__int64)position, SEEK_SET) < 0)\n#elif defined(_WIN32)\n  THArgCheck(position <= (size_t)LONG_MAX, 2, \"position must be smaller than LONG_MAX\");\n  if(fseek(dfself->handle, (long)position, SEEK_SET) < 0)\n#else\n  THArgCheck(position <= (size_t)LLONG_MAX, 2, \"position must be smaller than LLONG_MAX\");\n  if(fseeko(dfself->handle, (off_t)position, SEEK_SET) < 0)\n#endif\n  {\n    dfself->file.hasError = 1;\n    if(!dfself->file.isQuiet)\n      THError(\"unable to seek to position %zu\", position);\n  }\n}\n\nstatic void THDiskFile_seekEnd(THFile *self)\n{\n  THDiskFile *dfself = (THDiskFile*)(self);\n\n  THArgCheck(dfself->handle != NULL, 1, \"attempt to use a closed file\");\n\n#if defined(_WIN64)\n  if(_fseeki64(dfself->handle, 0, SEEK_END) < 0)\n#elif defined(_WIN32)\n  if(fseek(dfself->handle, 0, SEEK_END) < 0)\n#else\n  if(fseeko(dfself->handle, 0, SEEK_END) < 0)\n#endif\n  {\n    dfself->file.hasError = 1;\n    if(!dfself->file.isQuiet)\n      THError(\"unable to seek at end of file\");\n  }\n}\n\nstatic size_t THDiskFile_position(THFile *self)\n{\n  THDiskFile *dfself = (THDiskFile*)(self);\n  THArgCheck(dfself->handle != NULL, 1, \"attempt to use a closed file\");\n\n#if defined(_WIN64)\n  __int64 offset = _ftelli64(dfself->handle);\n#elif defined(_WIN32)\n  long offset = ftell(dfself->handle);\n#else\n  off_t offset = ftello(dfself->handle);\n#endif\n  if (offset > -1)\n      return (size_t)offset;\n  else if(!dfself->file.isQuiet)\n      THError(\"unable to obtain disk file offset (maybe a long overflow occurred)\");\n\n  return 0;\n}\n\nstatic void THDiskFile_close(THFile *self)\n{\n  THDiskFile *dfself = (THDiskFile*)(self);\n  THArgCheck(dfself->handle != NULL, 1, \"attempt to use a closed file\");\n  fclose(dfself->handle);\n  dfself->handle = NULL;\n}\n\n/* Little and Big Endian */\n\nstatic void THDiskFile_reverseMemory(void *dst, const void *src, size_t blockSize, size_t numBlocks)\n{\n  if(blockSize > 1)\n  {\n    size_t halfBlockSize = blockSize/2;\n    char *charSrc = (char*)src;\n    char *charDst = (char*)dst;\n    size_t b, i;\n    for(b = 0; b < numBlocks; b++)\n    {\n      for(i = 0; i < halfBlockSize; i++)\n      {\n        char z = charSrc[i];\n        charDst[i] = charSrc[blockSize-1-i];\n        charDst[blockSize-1-i] = z;\n      }\n      charSrc += blockSize;\n      charDst += blockSize;\n    }\n  }\n}\n\nint THDiskFile_isLittleEndianCPU(void)\n{\n  int x = 7;\n  char *ptr = (char *)&x;\n\n  if(ptr[0] == 0)\n    return 0;\n  else\n    return 1;\n}\n\nint THDiskFile_isBigEndianCPU(void)\n{\n  return(!THDiskFile_isLittleEndianCPU());\n}\n\nvoid THDiskFile_nativeEndianEncoding(THFile *self)\n{\n  THDiskFile *dfself = (THDiskFile*)(self);\n  THArgCheck(dfself->handle != NULL, 1, \"attempt to use a closed file\");\n  dfself->isNativeEncoding = 1;\n}\n\nvoid THDiskFile_littleEndianEncoding(THFile *self)\n{\n  THDiskFile *dfself = (THDiskFile*)(self);\n  THArgCheck(dfself->handle != NULL, 1, \"attempt to use a closed file\");\n  dfself->isNativeEncoding = THDiskFile_isLittleEndianCPU();\n}\n\nvoid THDiskFile_bigEndianEncoding(THFile *self)\n{\n  THDiskFile *dfself = (THDiskFile*)(self);\n  THArgCheck(dfself->handle != NULL, 1, \"attempt to use a closed file\");\n  dfself->isNativeEncoding = !THDiskFile_isLittleEndianCPU();\n}\n\n/* End of Little and Big Endian Stuff */\n\nvoid THDiskFile_longSize(THFile *self, int size)\n{\n  THDiskFile *dfself = (THDiskFile*)(self);\n  THArgCheck(dfself->handle != NULL, 1, \"attempt to use a closed file\");\n  THArgCheck(size == 0 || size == 4 || size == 8, 1, \"Invalid long size specified\");\n  dfself->longSize = size;\n}\n\nvoid THDiskFile_noBuffer(THFile *self)\n{\n  THDiskFile *dfself = (THDiskFile*)(self);\n  THArgCheck(dfself->handle != NULL, 1, \"attempt to use a closed file\");\n  if (setvbuf(dfself->handle, NULL, _IONBF, 0)) {\n    THError(\"error: cannot disable buffer\");\n  }\n}\n\nstatic void THDiskFile_free(THFile *self)\n{\n  THDiskFile *dfself = (THDiskFile*)(self);\n  if(dfself->handle)\n    fclose(dfself->handle);\n  THFree(dfself->name);\n  THFree(dfself);\n}\n\n/* READ_WRITE_METHODS(int, Bool, */\n/*                    int value = 0; int ret = fscanf(file->handle, \"%d\", &value); array[i] = (value ? 1 : 0); if(ret <= 0) break; else result++, */\n/*                    int value = (array[i] ? 1 : 0); nElemWritten = fprintf(file->handle, \"%d\", value), */\n/*                    true) */\n\n/* Note that we do a trick */\nREAD_WRITE_METHODS(unsigned char, Byte,\n                   nread = fread(data, 1, n, dfself->handle); break,\n                   nwrite = fwrite(data, 1, n, dfself->handle); break)\n\nREAD_WRITE_METHODS(char, Char,\n                   nread = fread(data, 1, n, dfself->handle); break,\n                   nwrite = fwrite(data, 1, n, dfself->handle); break)\n\nREAD_WRITE_METHODS(short, Short,\n                   int ret = fscanf(dfself->handle, \"%hd\", &data[i]); if(ret <= 0) break; else nread++,\n                   int ret = fprintf(dfself->handle, \"%hd\", data[i]); if(ret <= 0) break; else nwrite++)\n\nREAD_WRITE_METHODS(int, Int,\n                   int ret = fscanf(dfself->handle, \"%d\", &data[i]); if(ret <= 0) break; else nread++,\n                   int ret = fprintf(dfself->handle, \"%d\", data[i]); if(ret <= 0) break; else nwrite++)\n\nREAD_WRITE_METHODS(float, Float,\n                   int ret = fscanf(dfself->handle, \"%g\", &data[i]); if(ret <= 0) break; else nread++,\n                   int ret = fprintf(dfself->handle, \"%.9g\", data[i]); if(ret <= 0) break; else nwrite++)\n\nREAD_WRITE_METHODS(THHalf, Half,\n                   float buf; int ret = fscanf(dfself->handle, \"%g\", &buf); if(ret <= 0) break; else { data[i]= TH_float2half(buf); nread++; },\n                   int ret = fprintf(dfself->handle, \"%.9g\", TH_half2float(data[i])); if(ret <= 0) break; else nwrite++)\n\nREAD_WRITE_METHODS(double, Double,\n                   int ret = fscanf(dfself->handle, \"%lg\", &data[i]); if(ret <= 0) break; else nread++,\n                   int ret = fprintf(dfself->handle, \"%.17g\", data[i]); if(ret <= 0) break; else nwrite++)\n\n\n/* For Long we need to rewrite everything, because of the special management of longSize */\nstatic size_t THDiskFile_readLong(THFile *self, long *data, size_t n)\n{\n  THDiskFile *dfself = (THDiskFile*)(self);\n  size_t nread = 0L;\n\n  THArgCheck(dfself->handle != NULL, 1, \"attempt to use a closed file\");\n  THArgCheck(dfself->file.isReadable, 1, \"attempt to read in a write-only file\");\n\n  if(dfself->file.isBinary)\n  {\n    if(dfself->longSize == 0 || dfself->longSize == sizeof(long))\n    {\n      nread = fread__(data, sizeof(long), n, dfself->handle);\n      if(!dfself->isNativeEncoding && (sizeof(long) > 1) && (nread > 0))\n        THDiskFile_reverseMemory(data, data, sizeof(long), nread);\n    } else if(dfself->longSize == 4)\n    {\n      nread = fread__(data, 4, n, dfself->handle);\n      if(!dfself->isNativeEncoding && (nread > 0))\n        THDiskFile_reverseMemory(data, data, 4, nread);\n      size_t i;\n      for(i = nread; i > 0; i--)\n        data[i-1] = ((int *)data)[i-1];\n    }\n    else /* if(dfself->longSize == 8) */\n    {\n      int big_endian = !THDiskFile_isLittleEndianCPU();\n      int32_t *buffer = THAlloc(8*n);\n      nread = fread__(buffer, 8, n, dfself->handle);\n      size_t i;\n      for(i = nread; i > 0; i--)\n        data[i-1] = buffer[2*(i-1) + big_endian];\n      THFree(buffer);\n      if(!dfself->isNativeEncoding && (nread > 0))\n        THDiskFile_reverseMemory(data, data, 4, nread);\n     }\n  }\n  else\n  {\n    size_t i;\n    for(i = 0; i < n; i++)\n    {\n      int ret = fscanf(dfself->handle, \"%ld\", &data[i]); if(ret <= 0) break; else nread++;\n    }\n    if(dfself->file.isAutoSpacing && (n > 0))\n    {\n      int c = fgetc(dfself->handle);\n      if( (c != '\\n') && (c != EOF) )\n        ungetc(c, dfself->handle);\n    }\n  }\n\n  if(nread != n)\n  {\n    dfself->file.hasError = 1; /* shouldn't we put hasError to 0 all the time ? */\n    if(!dfself->file.isQuiet)\n      THError(\"read error: read %d blocks instead of %d\", nread, n);\n  }\n\n  return nread;\n}\n\nstatic size_t THDiskFile_writeLong(THFile *self, long *data, size_t n)\n{\n  THDiskFile *dfself = (THDiskFile*)(self);\n  size_t nwrite = 0L;\n\n  THArgCheck(dfself->handle != NULL, 1, \"attempt to use a closed file\");\n  THArgCheck(dfself->file.isWritable, 1, \"attempt to write in a read-only file\");\n\n  if(dfself->file.isBinary)\n  {\n    if(dfself->longSize == 0 || dfself->longSize == sizeof(long))\n    {\n      if(dfself->isNativeEncoding)\n      {\n        nwrite = fwrite(data, sizeof(long), n, dfself->handle);\n      }\n      else\n      {\n        char *buffer = THAlloc(sizeof(long)*n);\n        THDiskFile_reverseMemory(buffer, data, sizeof(long), n);\n        nwrite = fwrite(buffer, sizeof(long), n, dfself->handle);\n        THFree(buffer);\n      }\n    } else if(dfself->longSize == 4)\n    {\n      int32_t *buffer = THAlloc(4*n);\n      size_t i;\n      for(i = 0; i < n; i++)\n        buffer[i] = data[i];\n      if(!dfself->isNativeEncoding)\n        THDiskFile_reverseMemory(buffer, buffer, 4, n);\n      nwrite = fwrite(buffer, 4, n, dfself->handle);\n      THFree(buffer);\n    }\n    else /* if(dfself->longSize == 8) */\n    {\n      int big_endian = !THDiskFile_isLittleEndianCPU();\n      int32_t *buffer = THAlloc(8*n);\n      size_t i;\n      for(i = 0; i < n; i++)\n      {\n        buffer[2*i + !big_endian] = 0;\n        buffer[2*i + big_endian] = data[i];\n      }\n      if(!dfself->isNativeEncoding)\n        THDiskFile_reverseMemory(buffer, buffer, 8, n);\n      nwrite = fwrite(buffer, 8, n, dfself->handle);\n      THFree(buffer);\n    }\n  }\n  else\n  {\n    size_t i;\n    for(i = 0; i < n; i++)\n    {\n      int ret = fprintf(dfself->handle, \"%ld\", data[i]); if(ret <= 0) break; else nwrite++;\n      if( dfself->file.isAutoSpacing && (i < n-1) )\n        fprintf(dfself->handle, \" \");\n    }\n    if(dfself->file.isAutoSpacing && (n > 0))\n      fprintf(dfself->handle, \"\\n\");\n  }\n\n  if(nwrite != n)\n  {\n    dfself->file.hasError = 1;\n    if(!dfself->file.isQuiet)\n      THError(\"write error: wrote %d blocks instead of %d\", nwrite, n);\n  }\n\n  return nwrite;\n}\n\nstatic size_t THDiskFile_readString(THFile *self, const char *format, char **str_)\n{\n  THDiskFile *dfself = (THDiskFile*)(self);\n  THArgCheck(dfself->handle != NULL, 1, \"attempt to use a closed file\");\n  THArgCheck(dfself->file.isReadable, 1, \"attempt to read in a write-only file\");\n  THArgCheck((strlen(format) >= 2 ? (format[0] == '*') && (format[1] == 'a' || format[1] == 'l') : 0), 2, \"format must be '*a' or '*l'\");\n\n/* note: the string won't survive long, as it is copied into lua */\n/* so 1024 is not that big... */\n#define TBRS_BSZ 1024L\n\n  if(format[1] == 'a')\n  {\n    char *p = THAlloc(TBRS_BSZ);\n    size_t total = TBRS_BSZ;\n    size_t pos = 0;\n\n    for (;;)\n    {\n      if(total-pos == 0) /* we need more space! */\n      {\n        total += TBRS_BSZ;\n        p = THRealloc(p, total);\n      }\n      pos += fread(p+pos, 1, total-pos, dfself->handle);\n      if (pos < total) /* eof? */\n      {\n        if(pos == 0)\n        {\n          THFree(p);\n          dfself->file.hasError = 1;\n          if(!dfself->file.isQuiet)\n            THError(\"read error: read 0 blocks instead of 1\");\n\n          *str_ = NULL;\n          return 0;\n        }\n        *str_ = p;\n        return pos;\n      }\n    }\n  }\n  else\n  {\n    char *p = THAlloc(TBRS_BSZ);\n    size_t total = TBRS_BSZ;\n    size_t pos = 0;\n    size_t size;\n\n    for (;;)\n    {\n      if(total-pos <= 1) /* we can only write '\\0' in there! */\n      {\n        total += TBRS_BSZ;\n        p = THRealloc(p, total);\n      }\n      if (fgets(p+pos, total-pos, dfself->handle) == NULL) /* eof? */\n      {\n        if(pos == 0)\n        {\n          THFree(p);\n          dfself->file.hasError = 1;\n          if(!dfself->file.isQuiet)\n            THError(\"read error: read 0 blocks instead of 1\");\n\n          *str_ = NULL;\n          return 0;\n        }\n        *str_ = p;\n        return pos;\n      }\n      size = strlen(p+pos);\n      if (size == 0 || (p+pos)[size-1] != '\\n')\n      {\n        pos += size;\n      }\n      else\n      {\n        pos += size-1; /* do not include `eol' */\n        *str_ = p;\n        return pos;\n      }\n    }\n  }\n\n  *str_ = NULL;\n  return 0;\n}\n\n\nstatic size_t THDiskFile_writeString(THFile *self, const char *str, size_t size)\n{\n  THDiskFile *dfself = (THDiskFile*)(self);\n  size_t nwrite;\n\n  THArgCheck(dfself->handle != NULL, 1, \"attempt to use a closed file\");\n  THArgCheck(dfself->file.isWritable, 1, \"attempt to write in a read-only file\");\n\n  nwrite = fwrite(str, 1, size, dfself->handle);\n  if(nwrite != size)\n  {\n    dfself->file.hasError = 1;\n    if(!dfself->file.isQuiet)\n      THError(\"write error: wrote %zu blocks instead of %zu\", nwrite, size);\n  }\n\n  return nwrite;\n}\n\nTHFile *THDiskFile_new(const char *name, const char *mode, int isQuiet)\n{\n  static struct THFileVTable vtable = {\n    THDiskFile_isOpened,\n\n    THDiskFile_readByte,\n    THDiskFile_readChar,\n    THDiskFile_readShort,\n    THDiskFile_readInt,\n    THDiskFile_readLong,\n    THDiskFile_readFloat,\n    THDiskFile_readDouble,\n    THDiskFile_readHalf,\n    THDiskFile_readString,\n\n    THDiskFile_writeByte,\n    THDiskFile_writeChar,\n    THDiskFile_writeShort,\n    THDiskFile_writeInt,\n    THDiskFile_writeLong,\n    THDiskFile_writeFloat,\n    THDiskFile_writeDouble,\n    THDiskFile_writeHalf,\n    THDiskFile_writeString,\n\n    THDiskFile_synchronize,\n    THDiskFile_seek,\n    THDiskFile_seekEnd,\n    THDiskFile_position,\n    THDiskFile_close,\n    THDiskFile_free\n  };\n\n  int isReadable;\n  int isWritable;\n  FILE *handle;\n  THDiskFile *self;\n\n  THArgCheck(THDiskFile_mode(mode, &isReadable, &isWritable), 2, \"file mode should be 'r','w' or 'rw'\");\n\n  if( isReadable && isWritable )\n  {\n    handle = fopen(name, \"r+b\");\n    if(!handle)\n    {\n      handle = fopen(name, \"wb\");\n      if(handle)\n      {\n        fclose(handle);\n        handle = fopen(name, \"r+b\");\n      }\n    }\n  }\n  else\n    handle = fopen(name, (isReadable ? \"rb\" : \"wb\"));\n\n  if(!handle)\n  {\n    if(isQuiet)\n      return 0;\n    else\n      THError(\"cannot open <%s> in mode %c%c\", name, (isReadable ? 'r' : ' '), (isWritable ? 'w' : ' '));\n  }\n\n  self = THAlloc(sizeof(THDiskFile));\n\n  self->handle = handle;\n  self->name = THAlloc(strlen(name)+1);\n  strcpy(self->name, name);\n  self->isNativeEncoding = 1;\n  self->longSize = 0;\n\n  self->file.vtable = &vtable;\n  self->file.isQuiet = isQuiet;\n  self->file.isReadable = isReadable;\n  self->file.isWritable = isWritable;\n  self->file.isBinary = 0;\n  self->file.isAutoSpacing = 1;\n  self->file.hasError = 0;\n\n  return (THFile*)self;\n}\n\n/* PipeFile */\n\nstatic int THPipeFile_mode(const char *mode, int *isReadable, int *isWritable)\n{\n  *isReadable = 0;\n  *isWritable = 0;\n  if(strlen(mode) == 1)\n  {\n    if(*mode == 'r')\n    {\n      *isReadable = 1;\n      return 1;\n    }\n    else if(*mode == 'w')\n    {\n      *isWritable = 1;\n      return 1;\n    }\n  }\n  return 0;\n}\n\nstatic void THPipeFile_free(THFile *self)\n{\n  THDiskFile *dfself = (THDiskFile*)(self);\n  if(dfself->handle)\n    pclose(dfself->handle);\n  THFree(dfself->name);\n  THFree(dfself);\n}\n\nTHFile *THPipeFile_new(const char *name, const char *mode, int isQuiet)\n{\n  static struct THFileVTable vtable = {\n    THDiskFile_isOpened,\n\n    THDiskFile_readByte,\n    THDiskFile_readChar,\n    THDiskFile_readShort,\n    THDiskFile_readInt,\n    THDiskFile_readLong,\n    THDiskFile_readFloat,\n    THDiskFile_readDouble,\n    THDiskFile_readHalf,\n    THDiskFile_readString,\n\n    THDiskFile_writeByte,\n    THDiskFile_writeChar,\n    THDiskFile_writeShort,\n    THDiskFile_writeInt,\n    THDiskFile_writeLong,\n    THDiskFile_writeFloat,\n    THDiskFile_writeDouble,\n    THDiskFile_writeHalf,\n    THDiskFile_writeString,\n\n    THDiskFile_synchronize,\n    THDiskFile_seek,\n    THDiskFile_seekEnd,\n    THDiskFile_position,\n    THDiskFile_close,\n    THPipeFile_free\n  };\n\n  int isReadable;\n  int isWritable;\n  FILE *handle;\n  THDiskFile *self;\n\n  THArgCheck(THPipeFile_mode(mode, &isReadable, &isWritable), 2, \"file mode should be 'r','w'\");\n\n#ifdef _WIN32\n  handle = _popen(name, (isReadable ? \"rb\" : \"wb\"));\n#else\n  handle = popen(name, (isReadable ? \"r\" : \"w\"));\n#endif\n\n  if(!handle)\n  {\n    if(isQuiet)\n      return 0;\n    else\n      THError(\"cannot open <%s> in mode %c%c.  This might be because eg the executable doesn't exist, but it could also be because you are out of memory.\", name, (isReadable ? 'r' : ' '), (isWritable ? 'w' : ' '));\n  }\n\n  self = THAlloc(sizeof(THDiskFile));\n\n  self->handle = handle;\n  self->name = THAlloc(strlen(name)+1);\n  strcpy(self->name, name);\n  self->isNativeEncoding = 1;\n  self->longSize = 0;\n\n  self->file.vtable = &vtable;\n  self->file.isQuiet = isQuiet;\n  self->file.isReadable = isReadable;\n  self->file.isWritable = isWritable;\n  self->file.isBinary = 0;\n  self->file.isAutoSpacing = 1;\n  self->file.hasError = 0;\n\n  return (THFile*)self;\n}\n"
  },
  {
    "path": "lib/TH/THDiskFile.h",
    "content": "#ifndef TH_DISK_FILE_INC\n#define TH_DISK_FILE_INC\n\n#include \"THFile.h\"\n\nTH_API THFile *THDiskFile_new(const char *name, const char *mode, int isQuiet);\nTH_API THFile *THPipeFile_new(const char *name, const char *mode, int isQuiet);\n\nTH_API const char *THDiskFile_name(THFile *self);\n\nTH_API int THDiskFile_isLittleEndianCPU(void);\nTH_API int THDiskFile_isBigEndianCPU(void);\nTH_API void THDiskFile_nativeEndianEncoding(THFile *self);\nTH_API void THDiskFile_littleEndianEncoding(THFile *self);\nTH_API void THDiskFile_bigEndianEncoding(THFile *self);\nTH_API void THDiskFile_longSize(THFile *self, int size);\nTH_API void THDiskFile_noBuffer(THFile *self);\n\n#endif\n"
  },
  {
    "path": "lib/TH/THFile.c",
    "content": "#include \"THFile.h\"\n#include \"THFilePrivate.h\"\n\n#define IMPLEMENT_THFILE_RW(TYPEC, TYPE)                          \\\n  size_t THFile_read##TYPEC##Raw(THFile *self, TYPE *data, size_t n)  \\\n  {                                                               \\\n    return (*self->vtable->read##TYPEC)(self, data, n);           \\\n  }                                                               \\\n                                                                  \\\n  size_t THFile_write##TYPEC##Raw(THFile *self, TYPE *data, size_t n) \\\n  {                                                               \\\n    return (*self->vtable->write##TYPEC)(self, data, n);          \\\n  }\n\nIMPLEMENT_THFILE_RW(Byte, unsigned char)\nIMPLEMENT_THFILE_RW(Char, char)\nIMPLEMENT_THFILE_RW(Short, short)\nIMPLEMENT_THFILE_RW(Int, int)\nIMPLEMENT_THFILE_RW(Long, long)\nIMPLEMENT_THFILE_RW(Float, float)\nIMPLEMENT_THFILE_RW(Double, double)\nIMPLEMENT_THFILE_RW(Half, THHalf)\n\nsize_t THFile_readStringRaw(THFile *self, const char *format, char **str_)\n{\n  return self->vtable->readString(self, format, str_);\n}\n\nsize_t THFile_writeStringRaw(THFile *self, const char *str, size_t size)\n{\n  return self->vtable->writeString(self, str, size);\n}\n\nvoid THFile_synchronize(THFile *self)\n{\n  self->vtable->synchronize(self);\n}\n\nvoid THFile_seek(THFile *self, size_t position)\n{\n  self->vtable->seek(self, position);\n}\n\nvoid THFile_seekEnd(THFile *self)\n{\n  self->vtable->seekEnd(self);\n}\n\nsize_t THFile_position(THFile *self)\n{\n  return self->vtable->position(self);\n}\n\nvoid THFile_close(THFile *self)\n{\n  self->vtable->close(self);\n}\n\nvoid THFile_free(THFile *self)\n{\n  self->vtable->free(self);\n}\n\nint THFile_isOpened(THFile *self)\n{\n  return self->vtable->isOpened(self);\n}\n\n#define IMPLEMENT_THFILE_FLAGS(FLAG) \\\n  int THFile_##FLAG(THFile *self)    \\\n  {                                  \\\n    return self->FLAG;               \\\n  }\n\nIMPLEMENT_THFILE_FLAGS(isQuiet)\nIMPLEMENT_THFILE_FLAGS(isReadable)\nIMPLEMENT_THFILE_FLAGS(isWritable)\nIMPLEMENT_THFILE_FLAGS(isBinary)\nIMPLEMENT_THFILE_FLAGS(isAutoSpacing)\nIMPLEMENT_THFILE_FLAGS(hasError)\n\nvoid THFile_binary(THFile *self)\n{\n  self->isBinary = 1;\n}\n\nvoid THFile_ascii(THFile *self)\n{\n  self->isBinary = 0;\n}\n\nvoid THFile_autoSpacing(THFile *self)\n{\n  self->isAutoSpacing = 1;\n}\n\nvoid THFile_noAutoSpacing(THFile *self)\n{\n  self->isAutoSpacing = 0;\n}\n\nvoid THFile_quiet(THFile *self)\n{\n  self->isQuiet = 1;\n}\n\nvoid THFile_pedantic(THFile *self)\n{\n  self->isQuiet = 0;\n}\n\nvoid THFile_clearError(THFile *self)\n{\n  self->hasError = 0;\n}\n\n#define IMPLEMENT_THFILE_SCALAR(TYPEC, TYPE)                  \\\n  TYPE THFile_read##TYPEC##Scalar(THFile *self)               \\\n  {                                                           \\\n    TYPE scalar;                                              \\\n    THFile_read##TYPEC##Raw(self, &scalar, 1);                \\\n    return scalar;                                            \\\n  }                                                           \\\n                                                              \\\n  void THFile_write##TYPEC##Scalar(THFile *self, TYPE scalar) \\\n  {                                                           \\\n    THFile_write##TYPEC##Raw(self, &scalar, 1);               \\\n  }\n\nIMPLEMENT_THFILE_SCALAR(Byte, unsigned char)\nIMPLEMENT_THFILE_SCALAR(Char, char)\nIMPLEMENT_THFILE_SCALAR(Short, short)\nIMPLEMENT_THFILE_SCALAR(Int, int)\nIMPLEMENT_THFILE_SCALAR(Long, long)\nIMPLEMENT_THFILE_SCALAR(Float, float)\nIMPLEMENT_THFILE_SCALAR(Double, double)\nIMPLEMENT_THFILE_SCALAR(Half, THHalf)\n\n#define IMPLEMENT_THFILE_STORAGE(TYPEC, TYPE)                           \\\n  size_t THFile_read##TYPEC(THFile *self, TH##TYPEC##Storage *storage)    \\\n  {                                                                     \\\n    return THFile_read##TYPEC##Raw(self, storage->data, storage->size); \\\n  }                                                                     \\\n                                                                        \\\n  size_t THFile_write##TYPEC(THFile *self, TH##TYPEC##Storage *storage)   \\\n  {                                                                     \\\n    return THFile_write##TYPEC##Raw(self, storage->data, storage->size); \\\n  }\n\nIMPLEMENT_THFILE_STORAGE(Byte, unsigned char)\nIMPLEMENT_THFILE_STORAGE(Char, char)\nIMPLEMENT_THFILE_STORAGE(Short, short)\nIMPLEMENT_THFILE_STORAGE(Int, int)\nIMPLEMENT_THFILE_STORAGE(Long, long)\nIMPLEMENT_THFILE_STORAGE(Float, float)\nIMPLEMENT_THFILE_STORAGE(Double, double)\nIMPLEMENT_THFILE_STORAGE(Half, THHalf)\n"
  },
  {
    "path": "lib/TH/THFile.h",
    "content": "#ifndef TH_FILE_INC\n#define TH_FILE_INC\n\n#include \"THStorage.h\"\n\ntypedef struct THFile__ THFile;\n\nTH_API int THFile_isOpened(THFile *self);\nTH_API int THFile_isQuiet(THFile *self);\nTH_API int THFile_isReadable(THFile *self);\nTH_API int THFile_isWritable(THFile *self);\nTH_API int THFile_isBinary(THFile *self);\nTH_API int THFile_isAutoSpacing(THFile *self);\nTH_API int THFile_hasError(THFile *self);\n\nTH_API void THFile_binary(THFile *self);\nTH_API void THFile_ascii(THFile *self);\nTH_API void THFile_autoSpacing(THFile *self);\nTH_API void THFile_noAutoSpacing(THFile *self);\nTH_API void THFile_quiet(THFile *self);\nTH_API void THFile_pedantic(THFile *self);\nTH_API void THFile_clearError(THFile *self);\n\n/* scalar */\nTH_API unsigned char THFile_readByteScalar(THFile *self);\nTH_API char THFile_readCharScalar(THFile *self);\nTH_API short THFile_readShortScalar(THFile *self);\nTH_API int THFile_readIntScalar(THFile *self);\nTH_API long THFile_readLongScalar(THFile *self);\nTH_API float THFile_readFloatScalar(THFile *self);\nTH_API double THFile_readDoubleScalar(THFile *self);\n\nTH_API void THFile_writeByteScalar(THFile *self, unsigned char scalar);\nTH_API void THFile_writeCharScalar(THFile *self, char scalar);\nTH_API void THFile_writeShortScalar(THFile *self, short scalar);\nTH_API void THFile_writeIntScalar(THFile *self, int scalar);\nTH_API void THFile_writeLongScalar(THFile *self, long scalar);\nTH_API void THFile_writeFloatScalar(THFile *self, float scalar);\nTH_API void THFile_writeDoubleScalar(THFile *self, double scalar);\n\n/* storage */\nTH_API size_t THFile_readByte(THFile *self, THByteStorage *storage);\nTH_API size_t THFile_readChar(THFile *self, THCharStorage *storage);\nTH_API size_t THFile_readShort(THFile *self, THShortStorage *storage);\nTH_API size_t THFile_readInt(THFile *self, THIntStorage *storage);\nTH_API size_t THFile_readLong(THFile *self, THLongStorage *storage);\nTH_API size_t THFile_readFloat(THFile *self, THFloatStorage *storage);\nTH_API size_t THFile_readDouble(THFile *self, THDoubleStorage *storage);\n\nTH_API size_t THFile_writeByte(THFile *self, THByteStorage *storage);\nTH_API size_t THFile_writeChar(THFile *self, THCharStorage *storage);\nTH_API size_t THFile_writeShort(THFile *self, THShortStorage *storage);\nTH_API size_t THFile_writeInt(THFile *self, THIntStorage *storage);\nTH_API size_t THFile_writeLong(THFile *self, THLongStorage *storage);\nTH_API size_t THFile_writeFloat(THFile *self, THFloatStorage *storage);\nTH_API size_t THFile_writeDouble(THFile *self, THDoubleStorage *storage);\n\n/* raw */\nTH_API size_t THFile_readByteRaw(THFile *self, unsigned char *data, size_t n);\nTH_API size_t THFile_readCharRaw(THFile *self, char *data, size_t n);\nTH_API size_t THFile_readShortRaw(THFile *self, short *data, size_t n);\nTH_API size_t THFile_readIntRaw(THFile *self, int *data, size_t n);\nTH_API size_t THFile_readLongRaw(THFile *self, long *data, size_t n);\nTH_API size_t THFile_readFloatRaw(THFile *self, float *data, size_t n);\nTH_API size_t THFile_readDoubleRaw(THFile *self, double *data, size_t n);\nTH_API size_t THFile_readStringRaw(THFile *self, const char *format, char **str_); /* you must deallocate str_ */\n\nTH_API size_t THFile_writeByteRaw(THFile *self, unsigned char *data, size_t n);\nTH_API size_t THFile_writeCharRaw(THFile *self, char *data, size_t n);\nTH_API size_t THFile_writeShortRaw(THFile *self, short *data, size_t n);\nTH_API size_t THFile_writeIntRaw(THFile *self, int *data, size_t n);\nTH_API size_t THFile_writeLongRaw(THFile *self, long *data, size_t n);\nTH_API size_t THFile_writeFloatRaw(THFile *self, float *data, size_t n);\nTH_API size_t THFile_writeDoubleRaw(THFile *self, double *data, size_t n);\nTH_API size_t THFile_writeStringRaw(THFile *self, const char *str, size_t size);\n\nTH_API THHalf THFile_readHalfScalar(THFile *self);\nTH_API void THFile_writeHalfScalar(THFile *self, THHalf scalar);\nTH_API size_t THFile_readHalf(THFile *self, THHalfStorage *storage);\nTH_API size_t THFile_writeHalf(THFile *self, THHalfStorage *storage);\nTH_API size_t THFile_readHalfRaw(THFile *self, THHalf* data, size_t size);\nTH_API size_t THFile_writeHalfRaw(THFile *self, THHalf* data, size_t size);\n\nTH_API void THFile_synchronize(THFile *self);\nTH_API void THFile_seek(THFile *self, size_t position);\nTH_API void THFile_seekEnd(THFile *self);\nTH_API size_t THFile_position(THFile *self);\nTH_API void THFile_close(THFile *self);\nTH_API void THFile_free(THFile *self);\n\n#endif\n"
  },
  {
    "path": "lib/TH/THFilePrivate.h",
    "content": "#include \"THGeneral.h\"\n\n#include \"THHalf.h\"\n\n\nstruct THFile__\n{\n    struct THFileVTable *vtable;\n\n    int isQuiet;\n    int isReadable;\n    int isWritable;\n    int isBinary;\n    int isAutoSpacing;\n    int hasError;\n};\n\n/* virtual table definition */\n\nstruct THFileVTable\n{\n    int (*isOpened)(THFile *self);\n\n    size_t (*readByte)(THFile *self, unsigned char *data, size_t n);\n    size_t (*readChar)(THFile *self, char *data, size_t n);\n    size_t (*readShort)(THFile *self, short *data, size_t n);\n    size_t (*readInt)(THFile *self, int *data, size_t n);\n    size_t (*readLong)(THFile *self, long *data, size_t n);\n    size_t (*readFloat)(THFile *self, float *data, size_t n);\n    size_t (*readDouble)(THFile *self, double *data, size_t n);\n    size_t (*readHalf)(THFile *self, THHalf *data, size_t n);\n    size_t (*readString)(THFile *self, const char *format, char **str_);\n\n    size_t (*writeByte)(THFile *self, unsigned char *data, size_t n);\n    size_t (*writeChar)(THFile *self, char *data, size_t n);\n    size_t (*writeShort)(THFile *self, short *data, size_t n);\n    size_t (*writeInt)(THFile *self, int *data, size_t n);\n    size_t (*writeLong)(THFile *self, long *data, size_t n);\n    size_t (*writeFloat)(THFile *self, float *data, size_t n);\n    size_t (*writeDouble)(THFile *self, double *data, size_t n);\n    size_t (*writeHalf)(THFile *self, THHalf *data, size_t n);\n    size_t (*writeString)(THFile *self, const char *str, size_t size);\n\n    void (*synchronize)(THFile *self);\n    void (*seek)(THFile *self, size_t position);\n    void (*seekEnd)(THFile *self);\n    size_t (*position)(THFile *self);\n    void (*close)(THFile *self);\n    void (*free)(THFile *self);\n};\n"
  },
  {
    "path": "lib/TH/THGeneral.c",
    "content": "#include \"THGeneral.h\"\n#include \"THAtomic.h\"\n\n#ifdef _OPENMP\n#include <omp.h>\n#endif\n\n#ifndef TH_HAVE_THREAD\n#define __thread\n#elif _MSC_VER\n#define __thread __declspec( thread )\n#endif\n\n#if (defined(__unix) || defined(_WIN32))\n  #if defined(__FreeBSD__)\n    #include <malloc_np.h>\n  #else\n    #include <malloc.h>\n  #endif\n#elif defined(__APPLE__)\n#include <malloc/malloc.h>\n#endif\n\n/* Torch Error Handling */\nstatic void defaultErrorHandlerFunction(const char *msg, void *data)\n{\n  printf(\"$ Error: %s\\n\", msg);\n  exit(-1);\n}\n\nstatic THErrorHandlerFunction defaultErrorHandler = defaultErrorHandlerFunction;\nstatic void *defaultErrorHandlerData;\nstatic __thread THErrorHandlerFunction threadErrorHandler = NULL;\nstatic __thread void *threadErrorHandlerData;\n\nvoid _THError(const char *file, const int line, const char *fmt, ...)\n{\n  char msg[2048];\n  va_list args;\n\n  /* vasprintf not standard */\n  /* vsnprintf: how to handle if does not exists? */\n  va_start(args, fmt);\n  int n = vsnprintf(msg, 2048, fmt, args);\n  va_end(args);\n\n  if(n < 2048) {\n    snprintf(msg + n, 2048 - n, \" at %s:%d\", file, line);\n  }\n\n  if (threadErrorHandler)\n    (*threadErrorHandler)(msg, threadErrorHandlerData);\n  else\n    (*defaultErrorHandler)(msg, defaultErrorHandlerData);\n}\n\nvoid _THAssertionFailed(const char *file, const int line, const char *exp, const char *fmt, ...) {\n  char msg[1024];\n  va_list args;\n  va_start(args, fmt);\n  vsnprintf(msg, 1024, fmt, args);\n  va_end(args);\n  _THError(file, line, \"Assertion `%s' failed. %s\", exp, msg);\n}\n\nvoid THSetErrorHandler(THErrorHandlerFunction new_handler, void *data)\n{\n  threadErrorHandler = new_handler;\n  threadErrorHandlerData = data;\n}\n\nvoid THSetDefaultErrorHandler(THErrorHandlerFunction new_handler, void *data)\n{\n  if (new_handler)\n    defaultErrorHandler = new_handler;\n  else\n    defaultErrorHandler = defaultErrorHandlerFunction;\n  defaultErrorHandlerData = data;\n}\n\n/* Torch Arg Checking Handling */\nstatic void defaultArgErrorHandlerFunction(int argNumber, const char *msg, void *data)\n{\n  if(msg)\n    printf(\"$ Invalid argument %d: %s\\n\", argNumber, msg);\n  else\n    printf(\"$ Invalid argument %d\\n\", argNumber);\n  exit(-1);\n}\n\nstatic THArgErrorHandlerFunction defaultArgErrorHandler = defaultArgErrorHandlerFunction;\nstatic void *defaultArgErrorHandlerData;\nstatic __thread THArgErrorHandlerFunction threadArgErrorHandler = NULL;\nstatic __thread void *threadArgErrorHandlerData;\n\nvoid _THArgCheck(const char *file, int line, int condition, int argNumber, const char *fmt, ...)\n{\n  if(!condition) {\n    char msg[2048];\n    va_list args;\n\n    /* vasprintf not standard */\n    /* vsnprintf: how to handle if does not exists? */\n    va_start(args, fmt);\n    int n = vsnprintf(msg, 2048, fmt, args);\n    va_end(args);\n\n    if(n < 2048) {\n      snprintf(msg + n, 2048 - n, \" at %s:%d\", file, line);\n    }\n\n    if (threadArgErrorHandler)\n      (*threadArgErrorHandler)(argNumber, msg, threadArgErrorHandlerData);\n    else\n      (*defaultArgErrorHandler)(argNumber, msg, defaultArgErrorHandlerData);\n  }\n}\n\nvoid THSetArgErrorHandler(THArgErrorHandlerFunction new_handler, void *data)\n{\n  threadArgErrorHandler = new_handler;\n  threadArgErrorHandlerData = data;\n}\n\nvoid THSetDefaultArgErrorHandler(THArgErrorHandlerFunction new_handler, void *data)\n{\n  if (new_handler)\n    defaultArgErrorHandler = new_handler;\n  else\n    defaultArgErrorHandler = defaultArgErrorHandlerFunction;\n  defaultArgErrorHandlerData = data;\n}\n\nstatic __thread void (*torchGCFunction)(void *data) = NULL;\nstatic __thread void *torchGCData;\nstatic ptrdiff_t heapSize = 0;\nstatic __thread ptrdiff_t heapDelta = 0;\nstatic const ptrdiff_t heapMaxDelta = (ptrdiff_t)1e6; // limit to +/- 1MB before updating heapSize\nstatic const ptrdiff_t heapMinDelta = (ptrdiff_t)-1e6;\nstatic __thread ptrdiff_t heapSoftmax = (ptrdiff_t)3e8; // 300MB, adjusted upward dynamically\nstatic const double heapSoftmaxGrowthThresh = 0.8; // grow softmax if >80% max after GC\nstatic const double heapSoftmaxGrowthFactor = 1.4; // grow softmax by 40%\n\n/* Optional hook for integrating with a garbage-collected frontend.\n *\n * If torch is running with a garbage-collected frontend (e.g. Lua),\n * the GC isn't aware of TH-allocated memory so may not know when it\n * needs to run. These hooks trigger the GC to run in two cases:\n *\n * (1) When a memory allocation (malloc, realloc, ...) fails\n * (2) When the total TH-allocated memory hits a dynamically-adjusted\n *     soft maximum.\n */\nvoid THSetGCHandler( void (*torchGCFunction_)(void *data), void *data )\n{\n  torchGCFunction = torchGCFunction_;\n  torchGCData = data;\n}\n\n/* it is guaranteed the allocated size is not bigger than PTRDIFF_MAX */\nstatic ptrdiff_t getAllocSize(void *ptr) {\n#if defined(__unix) && defined(HAVE_MALLOC_USABLE_SIZE)\n  return malloc_usable_size(ptr);\n#elif defined(__APPLE__)\n  return malloc_size(ptr);\n#elif defined(_WIN32)\n  if(ptr) { return _msize(ptr); } else { return 0; }\n#else\n  return 0;\n#endif\n}\n\nstatic ptrdiff_t applyHeapDelta() {\n  ptrdiff_t oldHeapSize = THAtomicAddPtrdiff(&heapSize, heapDelta);\n#ifdef DEBUG\n  if (heapDelta > 0 && oldHeapSize > PTRDIFF_MAX - heapDelta)\n    THError(\"applyHeapDelta: heapSize(%td) + increased(%td) > PTRDIFF_MAX, heapSize overflow!\", oldHeapSize, heapDelta);\n  if (heapDelta < 0 && oldHeapSize < PTRDIFF_MIN - heapDelta)\n    THError(\"applyHeapDelta: heapSize(%td) + decreased(%td) < PTRDIFF_MIN, heapSize underflow!\", oldHeapSize, heapDelta);\n#endif\n  ptrdiff_t newHeapSize = oldHeapSize + heapDelta;\n  heapDelta = 0;\n  return newHeapSize;\n}\n\n/* (1) if the torch-allocated heap size exceeds the soft max, run GC\n * (2) if post-GC heap size exceeds 80% of the soft max, increase the\n *     soft max by 40%\n */\nstatic void maybeTriggerGC(ptrdiff_t curHeapSize) {\n  if (torchGCFunction && curHeapSize > heapSoftmax) {\n    torchGCFunction(torchGCData);\n\n    // ensure heapSize is accurate before updating heapSoftmax\n    ptrdiff_t newHeapSize = applyHeapDelta();\n\n    if (newHeapSize > heapSoftmax * heapSoftmaxGrowthThresh) {\n      heapSoftmax = (ptrdiff_t)(heapSoftmax * heapSoftmaxGrowthFactor);\n    }\n  }\n}\n\n// hooks into the TH heap tracking\nvoid THHeapUpdate(ptrdiff_t size) {\n#ifdef DEBUG\n  if (size > 0 && heapDelta > PTRDIFF_MAX - size)\n    THError(\"THHeapUpdate: heapDelta(%td) + increased(%td) > PTRDIFF_MAX, heapDelta overflow!\", heapDelta, size);\n  if (size < 0 && heapDelta < PTRDIFF_MIN - size)\n    THError(\"THHeapUpdate: heapDelta(%td) + decreased(%td) < PTRDIFF_MIN, heapDelta underflow!\", heapDelta, size);\n#endif\n\n  heapDelta += size;\n\n  // batch updates to global heapSize to minimize thread contention\n  if (heapDelta < heapMaxDelta && heapDelta > heapMinDelta) {\n    return;\n  }\n\n  ptrdiff_t newHeapSize = applyHeapDelta();\n\n  if (size > 0) {\n    maybeTriggerGC(newHeapSize);\n  }\n}\n\nstatic void* THAllocInternal(ptrdiff_t size)\n{\n  void *ptr;\n\n  if (size > 5120)\n  {\n#if (defined(__unix) || defined(__APPLE__)) && (!defined(DISABLE_POSIX_MEMALIGN))\n    if (posix_memalign(&ptr, 64, size) != 0)\n      ptr = NULL;\n/*\n#elif defined(_WIN32)\n    ptr = _aligned_malloc(size, 64);\n*/\n#else\n    ptr = malloc(size);\n#endif\n  }\n  else\n  {\n    ptr = malloc(size);\n  }\n\n  THHeapUpdate(getAllocSize(ptr));\n  return ptr;\n}\n\nvoid* THAlloc(ptrdiff_t size)\n{\n  void *ptr;\n\n  if(size < 0)\n    THError(\"$ Torch: invalid memory size -- maybe an overflow?\");\n\n  if(size == 0)\n    return NULL;\n\n  ptr = THAllocInternal(size);\n\n  if(!ptr && torchGCFunction) {\n    torchGCFunction(torchGCData);\n    ptr = THAllocInternal(size);\n  }\n\n  if(!ptr)\n    THError(\"$ Torch: not enough memory: you tried to allocate %dGB. Buy new RAM!\", size/1073741824);\n\n  return ptr;\n}\n\nvoid* THRealloc(void *ptr, ptrdiff_t size)\n{\n  if(!ptr)\n    return(THAlloc(size));\n\n  if(size == 0)\n  {\n    THFree(ptr);\n    return NULL;\n  }\n\n  if(size < 0)\n    THError(\"$ Torch: invalid memory size -- maybe an overflow?\");\n\n  ptrdiff_t oldSize = -getAllocSize(ptr);\n  void *newptr = realloc(ptr, size);\n\n  if(!newptr && torchGCFunction) {\n    torchGCFunction(torchGCData);\n    newptr = realloc(ptr, size);\n  }\n\n  if(!newptr)\n    THError(\"$ Torch: not enough memory: you tried to reallocate %dGB. Buy new RAM!\", size/1073741824);\n\n  // update heapSize only after successfully reallocated\n  THHeapUpdate(oldSize + getAllocSize(newptr));\n\n  return newptr;\n}\n\nvoid THFree(void *ptr)\n{\n  THHeapUpdate(-getAllocSize(ptr));\n  free(ptr);\n}\n\ndouble THLog1p(const double x)\n{\n#if (defined(_MSC_VER) || defined(__MINGW32__))\n  volatile double y = 1 + x;\n  return log(y) - ((y-1)-x)/y ;  /* cancels errors with IEEE arithmetic */\n#else\n  return log1p(x);\n#endif\n}\n\nvoid THSetNumThreads(int num_threads)\n{\n#ifdef _OPENMP\n  omp_set_num_threads(num_threads);\n#endif\n}\n\nint THGetNumThreads(void)\n{\n#ifdef _OPENMP\n  return omp_get_max_threads();\n#else\n  return 1;\n#endif\n}\n\nint THGetNumCores(void)\n{\n#ifdef _OPENMP\n  return omp_get_num_procs();\n#else\n  return 1;\n#endif\n}\n\n#ifdef TH_BLAS_MKL\nextern int mkl_get_max_threads(void);\n#endif\n\nTH_API void THInferNumThreads(void)\n{\n#if defined(_OPENMP) && defined(TH_BLAS_MKL)\n  // If we are using MKL an OpenMP make sure the number of threads match.\n  // Otherwise, MKL and our OpenMP-enabled functions will keep changing the\n  // size of the OpenMP thread pool, resulting in worse performance (and memory\n  // leaks in GCC 5.4)\n  omp_set_num_threads(mkl_get_max_threads());\n#endif\n}\n\nTH_API THDescBuff _THSizeDesc(const long *size, const long ndim) {\n  const int L = TH_DESC_BUFF_LEN;\n  THDescBuff buf;\n  char *str = buf.str;\n  int n = 0;\n  n += snprintf(str, L-n, \"[\");\n  int i;\n  for(i = 0; i < ndim; i++) {\n    if(n >= L) break;\n    n += snprintf(str+n, L-n, \"%ld\", size[i]);\n    if(i < ndim-1) {\n      n += snprintf(str+n, L-n, \" x \");\n    }\n  }\n  if(n < L - 2) {\n    snprintf(str+n, L-n, \"]\");\n  } else {\n    snprintf(str+L-5, 5, \"...]\");\n  }\n  return buf;\n}\n\n"
  },
  {
    "path": "lib/TH/THGeneral.h.in",
    "content": "#ifndef TH_GENERAL_INC\n#define TH_GENERAL_INC\n\n#include <stdlib.h>\n#include <stdio.h>\n#include <stdarg.h>\n#include <math.h>\n#include <limits.h>\n#include <float.h>\n#include <time.h>\n#include <string.h>\n#include <stddef.h>\n\n#cmakedefine USE_BLAS\n#cmakedefine USE_LAPACK\n#cmakedefine BLAS_F2C\n#cmakedefine BLAS_USE_CBLAS_DOT\n\n#ifdef __cplusplus\n# define TH_EXTERNC extern \"C\"\n#else\n# define TH_EXTERNC extern\n#endif\n\n#ifdef _WIN32\n# ifdef TH_EXPORTS\n#  define TH_API TH_EXTERNC __declspec(dllexport)\n# else\n#  define TH_API TH_EXTERNC __declspec(dllimport)\n# endif\n#else\n# define TH_API TH_EXTERNC\n#endif\n\n#ifndef M_PI\n# define M_PI 3.14159265358979323846\n#endif\n\n#ifndef TH_INDEX_BASE\n#define TH_INDEX_BASE 1\n#endif\n\ntypedef void (*THErrorHandlerFunction)(const char *msg, void *data);\ntypedef void (*THArgErrorHandlerFunction)(int argNumber, const char *msg, void *data);\n\n#define TH_DESC_BUFF_LEN 64\ntypedef struct {\n    char str[TH_DESC_BUFF_LEN];\n} THDescBuff;\n\n\nTH_API double THLog1p(const double x);\nTH_API THDescBuff _THSizeDesc(const long *size, const long ndim);\nTH_API void _THError(const char *file, const int line, const char *fmt, ...);\nTH_API void _THAssertionFailed(const char *file, const int line, const char *exp, const char *fmt, ...);\nTH_API void THSetErrorHandler(THErrorHandlerFunction new_handler, void *data);\nTH_API void THSetDefaultErrorHandler(THErrorHandlerFunction new_handler, void *data);\nTH_API void _THArgCheck(const char *file, int line, int condition, int argNumber, const char *fmt, ...);\nTH_API void THSetArgErrorHandler(THArgErrorHandlerFunction new_handler, void *data);\nTH_API void THSetDefaultArgErrorHandler(THArgErrorHandlerFunction new_handler, void *data);\nTH_API void* THAlloc(ptrdiff_t size);\nTH_API void* THRealloc(void *ptr, ptrdiff_t size);\nTH_API void THFree(void *ptr);\nTH_API void THSetGCHandler( void (*torchGCHandlerFunction)(void *data), void *data );\n// this hook should only be called by custom allocator functions\nTH_API void THHeapUpdate(ptrdiff_t size);\nTH_API void THSetNumThreads(int num_threads);\nTH_API int THGetNumThreads(void);\nTH_API int THGetNumCores(void);\nTH_API void THInferNumThreads(void);\n\n#define THError(...) _THError(__FILE__, __LINE__, __VA_ARGS__)\n\n#define THCleanup(...) __VA_ARGS__\n\n#define THArgCheck(...)                                               \\\ndo {                                                                  \\\n  _THArgCheck(__FILE__, __LINE__, __VA_ARGS__);                       \\\n} while(0)\n\n#define THArgCheckWithCleanup(condition, cleanup, ...)                \\\ndo if (!(condition)) {                                                \\\n  cleanup                                                             \\\n  _THArgCheck(__FILE__, __LINE__, 0, __VA_ARGS__);                    \\\n} while(0)\n\n#define THAssert(exp)                                                 \\\ndo {                                                                  \\\n  if (!(exp)) {                                                       \\\n    _THAssertionFailed(__FILE__, __LINE__, #exp, \"\");                 \\\n  }                                                                   \\\n} while(0)\n\n#define THAssertMsg(exp, ...)                                         \\\ndo {                                                                  \\\n  if (!(exp)) {                                                       \\\n    _THAssertionFailed(__FILE__, __LINE__, #exp, __VA_ARGS__);        \\\n  }                                                                   \\\n} while(0)\n\n#define TH_CONCAT_STRING_2(x,y) TH_CONCAT_STRING_2_EXPAND(x,y)\n#define TH_CONCAT_STRING_2_EXPAND(x,y) #x #y\n\n#define TH_CONCAT_STRING_3(x,y,z) TH_CONCAT_STRING_3_EXPAND(x,y,z)\n#define TH_CONCAT_STRING_3_EXPAND(x,y,z) #x #y #z\n\n#define TH_CONCAT_STRING_4(x,y,z,w) TH_CONCAT_STRING_4_EXPAND(x,y,z,w)\n#define TH_CONCAT_STRING_4_EXPAND(x,y,z,w) #x #y #z #w\n\n#define TH_CONCAT_2(x,y) TH_CONCAT_2_EXPAND(x,y)\n#define TH_CONCAT_2_EXPAND(x,y) x ## y\n\n#define TH_CONCAT_3(x,y,z) TH_CONCAT_3_EXPAND(x,y,z)\n#define TH_CONCAT_3_EXPAND(x,y,z) x ## y ## z\n\n#define TH_CONCAT_4_EXPAND(x,y,z,w) x ## y ## z ## w\n#define TH_CONCAT_4(x,y,z,w) TH_CONCAT_4_EXPAND(x,y,z,w)\n\n#define THMin(X, Y)  ((X) < (Y) ? (X) : (Y))\n#define THMax(X, Y)  ((X) > (Y) ? (X) : (Y))\n\n#if (defined(_MSC_VER) || defined(__MINGW32__))\n# define log1p(x) THLog1p(x)\n#define snprintf _snprintf\n#define popen _popen\n#define pclose _pclose\n#include <BaseTsd.h>\ntypedef SSIZE_T ssize_t;\n#endif\n\n#endif\n"
  },
  {
    "path": "lib/TH/THGenerateAllTypes.h",
    "content": "#ifndef TH_GENERIC_FILE\n#error \"You must define TH_GENERIC_FILE before including THGenerateAllTypes.h\"\n#endif\n\n#ifndef THGenerateManyTypes\n#define THAllLocalGenerateManyTypes\n#define THGenerateManyTypes\n#endif\n\n#include \"THGenerateFloatTypes.h\"\n#include \"THGenerateIntTypes.h\"\n\n#ifdef THAllLocalGenerateManyTypes\n#undef THAllLocalGenerateManyTypes\n#undef THGenerateManyTypes\n#undef TH_GENERIC_FILE\n#endif\n"
  },
  {
    "path": "lib/TH/THGenerateByteType.h",
    "content": "#ifndef TH_GENERIC_FILE\n#error \"You must define TH_GENERIC_FILE before including THGenerateByteType.h\"\n#endif\n\n#define real unsigned char\n#define accreal long\n#define Real Byte\n#define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val)\n#define TH_CONVERT_ACCREAL_TO_REAL(_val) (real)(_val)\n#define THInf UCHAR_MAX\n#define TH_REAL_IS_BYTE\n#line 1 TH_GENERIC_FILE\n#include TH_GENERIC_FILE\n#undef real\n#undef accreal\n#undef Real\n#undef THInf\n#undef TH_REAL_IS_BYTE\n#undef TH_CONVERT_REAL_TO_ACCREAL\n#undef TH_CONVERT_ACCREAL_TO_REAL\n\n#ifndef THGenerateManyTypes\n#undef TH_GENERIC_FILE\n#endif\n"
  },
  {
    "path": "lib/TH/THGenerateCharType.h",
    "content": "#ifndef TH_GENERIC_FILE\n#error \"You must define TH_GENERIC_FILE before including THGenerateCharType.h\"\n#endif\n\n#define real char\n#define accreal long\n#define Real Char\n#define THInf CHAR_MAX\n#define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val)\n#define TH_CONVERT_ACCREAL_TO_REAL(_val) (real)(_val)\n#define TH_REAL_IS_CHAR\n#line 1 TH_GENERIC_FILE\n#include TH_GENERIC_FILE\n#undef real\n#undef accreal\n#undef Real\n#undef THInf\n#undef TH_REAL_IS_CHAR\n#undef TH_CONVERT_REAL_TO_ACCREAL\n#undef TH_CONVERT_ACCREAL_TO_REAL\n\n#ifndef THGenerateManyTypes\n#undef TH_GENERIC_FILE\n#endif\n"
  },
  {
    "path": "lib/TH/THGenerateDoubleType.h",
    "content": "#ifndef TH_GENERIC_FILE\n#error \"You must define TH_GENERIC_FILE before including THGenerateDoubleType.h\"\n#endif\n\n#define real double\n#define accreal double\n#define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val)\n#define TH_CONVERT_ACCREAL_TO_REAL(_val) (real)(_val)\n#define Real Double\n#define THInf DBL_MAX\n#define TH_REAL_IS_DOUBLE\n#line 1 TH_GENERIC_FILE\n#include TH_GENERIC_FILE\n#undef accreal\n#undef real\n#undef Real\n#undef THInf\n#undef TH_REAL_IS_DOUBLE\n#undef TH_CONVERT_REAL_TO_ACCREAL\n#undef TH_CONVERT_ACCREAL_TO_REAL\n\n#ifndef THGenerateManyTypes\n#undef TH_GENERIC_FILE\n#endif\n"
  },
  {
    "path": "lib/TH/THGenerateFloatType.h",
    "content": "#ifndef TH_GENERIC_FILE\n#error \"You must define TH_GENERIC_FILE before including THGenerateFloatType.h\"\n#endif\n\n#define real float\n#define accreal double\n#define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val)\n#define TH_CONVERT_ACCREAL_TO_REAL(_val) (real)(_val)\n#define Real Float\n#define THInf FLT_MAX\n#define TH_REAL_IS_FLOAT\n#line 1 TH_GENERIC_FILE\n#include TH_GENERIC_FILE\n#undef accreal\n#undef real\n#undef Real\n#undef THInf\n#undef TH_REAL_IS_FLOAT\n#undef TH_CONVERT_REAL_TO_ACCREAL\n#undef TH_CONVERT_ACCREAL_TO_REAL\n\n#ifndef THGenerateManyTypes\n#undef TH_GENERIC_FILE\n#endif\n"
  },
  {
    "path": "lib/TH/THGenerateFloatTypes.h",
    "content": "#ifndef TH_GENERIC_FILE\n#error \"You must define TH_GENERIC_FILE before including THGenerateFloatTypes.h\"\n#endif\n\n#ifndef THGenerateManyTypes\n#define THFloatLocalGenerateManyTypes\n#define THGenerateManyTypes\n#endif\n\n#include \"THGenerateFloatType.h\"\n#include \"THGenerateDoubleType.h\"\n\n#ifdef THFloatLocalGenerateManyTypes\n#undef THFloatLocalGenerateManyTypes\n#undef THGenerateManyTypes\n#undef TH_GENERIC_FILE\n#endif\n"
  },
  {
    "path": "lib/TH/THGenerateHalfType.h",
    "content": "#ifndef TH_GENERIC_FILE\n#error \"You must define TH_GENERIC_FILE before including THGenerateHalfType.h\"\n#endif\n\n#include \"THHalf.h\"\n#define real THHalf\n#define accreal float\n#define TH_CONVERT_REAL_TO_ACCREAL(_val) TH_half2float(_val)\n#define TH_CONVERT_ACCREAL_TO_REAL(_val) TH_float2half(_val)\n#define Real Half\n#define THInf TH_HALF_BITS_TO_LITERAL(TH_HALF_INF)\n#define TH_REAL_IS_HALF\n#line 1 TH_GENERIC_FILE\n#include TH_GENERIC_FILE\n#undef real\n#undef accreal\n#undef Real\n#undef THInf\n#undef TH_REAL_IS_HALF\n#undef TH_CONVERT_REAL_TO_ACCREAL\n#undef TH_CONVERT_ACCREAL_TO_REAL\n\n#ifndef THGenerateManyTypes\n#undef TH_GENERIC_FILE\n#endif\n"
  },
  {
    "path": "lib/TH/THGenerateIntType.h",
    "content": "#ifndef TH_GENERIC_FILE\n#error \"You must define TH_GENERIC_FILE before including THGenerateIntType.h\"\n#endif\n\n#define real int\n#define accreal long\n#define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val)\n#define TH_CONVERT_ACCREAL_TO_REAL(_val) (real)(_val)\n#define Real Int\n#define THInf INT_MAX\n#define TH_REAL_IS_INT\n#line 1 TH_GENERIC_FILE\n#include TH_GENERIC_FILE\n#undef real\n#undef accreal\n#undef Real\n#undef THInf\n#undef TH_REAL_IS_INT\n#undef TH_CONVERT_REAL_TO_ACCREAL\n#undef TH_CONVERT_ACCREAL_TO_REAL\n\n#ifndef THGenerateManyTypes\n#undef TH_GENERIC_FILE\n#endif\n"
  },
  {
    "path": "lib/TH/THGenerateIntTypes.h",
    "content": "#ifndef TH_GENERIC_FILE\n#error \"You must define TH_GENERIC_FILE before including THGenerateIntTypes.h\"\n#endif\n\n#ifndef THGenerateManyTypes\n#define THIntLocalGenerateManyTypes\n#define THGenerateManyTypes\n#endif\n\n#include \"THGenerateByteType.h\"\n#include \"THGenerateCharType.h\"\n#include \"THGenerateShortType.h\"\n#include \"THGenerateIntType.h\"\n#include \"THGenerateLongType.h\"\n\n#ifdef THIntLocalGenerateManyTypes\n#undef THIntLocalGenerateManyTypes\n#undef THGenerateManyTypes\n#undef TH_GENERIC_FILE\n#endif\n"
  },
  {
    "path": "lib/TH/THGenerateLongType.h",
    "content": "#ifndef TH_GENERIC_FILE\n#error \"You must define TH_GENERIC_FILE before including THGenerateLongType.h\"\n#endif\n\n#define real long\n#define accreal long\n#define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val)\n#define TH_CONVERT_ACCREAL_TO_REAL(_val) (real)(_val)\n#define Real Long\n#define THInf LONG_MAX\n#define TH_REAL_IS_LONG\n#line 1 TH_GENERIC_FILE\n#include TH_GENERIC_FILE\n#undef real\n#undef accreal\n#undef Real\n#undef THInf\n#undef TH_REAL_IS_LONG\n#undef TH_CONVERT_REAL_TO_ACCREAL\n#undef TH_CONVERT_ACCREAL_TO_REAL\n\n#ifndef THGenerateManyTypes\n#undef TH_GENERIC_FILE\n#endif\n"
  },
  {
    "path": "lib/TH/THGenerateShortType.h",
    "content": "#ifndef TH_GENERIC_FILE\n#error \"You must define TH_GENERIC_FILE before including THGenerateShortType.h\"\n#endif\n\n#define real short\n#define accreal long\n#define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val)\n#define TH_CONVERT_ACCREAL_TO_REAL(_val) (real)(_val)\n#define Real Short\n#define THInf SHRT_MAX\n#define TH_REAL_IS_SHORT\n#line 1 TH_GENERIC_FILE\n#include TH_GENERIC_FILE\n#undef real\n#undef accreal\n#undef Real\n#undef THInf\n#undef TH_REAL_IS_SHORT\n#undef TH_CONVERT_REAL_TO_ACCREAL\n#undef TH_CONVERT_ACCREAL_TO_REAL\n\n#ifndef THGenerateManyTypes\n#undef TH_GENERIC_FILE\n#endif\n"
  },
  {
    "path": "lib/TH/THHalf.c",
    "content": "#include \"THHalf.h\"\n\n/* Copyright 1993-2014 NVIDIA Corporation.  All rights reserved. */\n\nTHHalf TH_float2half(float f)\n{\n  THHalf h;\n  TH_float2halfbits(&f, &h.x);\n  return h;\n}\n\nTH_API float TH_half2float(THHalf h)\n{\n  float f;\n  TH_halfbits2float(&h.x, &f);\n  return f;\n}\n\n// Host functions for converting between FP32 and FP16 formats\n\nvoid TH_halfbits2float(unsigned short* src, float* res)\n{\n    unsigned h = *src;\n    unsigned sign = ((h >> 15) & 1);\n    unsigned exponent = ((h >> 10) & 0x1f);\n    unsigned mantissa = ((h & 0x3ff) << 13);\n\n    if (exponent == 0x1f) {  /* NaN or Inf */\n        mantissa = (mantissa ? (sign = 0, 0x7fffff) : 0);\n        exponent = 0xff;\n    } else if (!exponent) {  /* Denorm or Zero */\n        if (mantissa) {\n            unsigned int msb;\n            exponent = 0x71;\n            do {\n                msb = (mantissa & 0x400000);\n                mantissa <<= 1;  /* normalize */\n                --exponent;\n            } while (!msb);\n            mantissa &= 0x7fffff;  /* 1.mantissa is implicit */\n        }\n    } else {\n        exponent += 0x70;\n    }\n\n    *(unsigned*)res = ((sign << 31) | (exponent << 23) | mantissa);\n}\n\nvoid TH_float2halfbits(float* src, unsigned short* dest)\n{\n    unsigned x = *(unsigned*)src;\n    unsigned u = (x & 0x7fffffff), remainder, shift, lsb, lsb_s1, lsb_m1;\n    unsigned sign, exponent, mantissa;\n\n    // Get rid of +NaN/-NaN case first.\n    if (u > 0x7f800000) {\n      *dest = 0x7fffU;\n      return ;\n    }\n  \n    sign = ((x >> 16) & 0x8000);\n  \n    // Get rid of +Inf/-Inf, +0/-0.\n    if (u > 0x477fefff) {\n      *dest = sign | 0x7c00U;\n      return; \n    }\n    if (u < 0x33000001) {\n      *dest = (sign | 0x0000);\n      return;\n    }\n\n    exponent = ((u >> 23) & 0xff);\n    mantissa = (u & 0x7fffff);\n\n    if (exponent > 0x70) {\n        shift = 13;\n        exponent -= 0x70;\n    } else {\n        shift = 0x7e - exponent;\n        exponent = 0;\n        mantissa |= 0x800000;\n    }\n    lsb = (1 << shift);\n    lsb_s1 = (lsb >> 1);\n    lsb_m1 = (lsb - 1);\n  \n    // Round to nearest even.\n    remainder = (mantissa & lsb_m1);\n    mantissa >>= shift;\n    if (remainder > lsb_s1 || (remainder == lsb_s1 && (mantissa & 0x1))) {\n        ++mantissa;\n        if (!(mantissa & 0x3ff)) {\n            ++exponent;\n            mantissa = 0;\n        }\n    }  \n\n    *dest = (sign | (exponent << 10) | mantissa);  \n}\n"
  },
  {
    "path": "lib/TH/THHalf.h",
    "content": "#ifndef TH_HALF_H\n#define TH_HALF_H\n\n#include \"THGeneral.h\"\n#include <stdint.h>\n\n/* Neither built-in nor included from Cutorch, use our definition lifted from CUDA */\n#if defined(__GNUC__)\n#define __thalign__(n) __attribute__((aligned(n)))\n#elif defined(_WIN32)\n#define __thalign__(n) __declspec(align(n))\n#else\n#define __thalign__(n)\n#endif\n\ntypedef struct __thalign__(2){\n  unsigned short x;\n} __THHalf;\n\ntypedef struct __thalign__(4) {\n  unsigned int x;\n} __THHalf2;\n\ntypedef __THHalf THHalf;\ntypedef __THHalf2 THHalf2;\n\nTH_API void TH_float2halfbits(float*, unsigned short*);\nTH_API void TH_halfbits2float(unsigned short*, float*);\n\nTH_API THHalf TH_float2half(float);\nTH_API float  TH_half2float(THHalf);\n\n#ifndef TH_HALF_BITS_TO_LITERAL\n# define TH_HALF_BITS_TO_LITERAL(n) { n }\n#endif\n\n#define TH_HALF_ZERO 0x0U\n#define TH_HALF_INF  0x7C00U\n\n#undef __thalign__\n#endif\n"
  },
  {
    "path": "lib/TH/THLapack.c",
    "content": "#include \"THLapack.h\"\n\n#include \"generic/THLapack.c\"\n#include \"THGenerateFloatTypes.h\"\n"
  },
  {
    "path": "lib/TH/THLapack.h",
    "content": "#ifndef TH_LAPACK_INC\n#define TH_LAPACK_INC\n\n#include \"THGeneral.h\"\n\n#define THLapack_(NAME) TH_CONCAT_4(TH,Real,Lapack_,NAME)\n\n#define THLapackCheck(fmt, func, info , ...)\t\t\t\t\t\t\\\nif (info < 0) {\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n  THError(\"Lapack Error in %s : Illegal Argument %d\", func, -info); \\\n} else if(info > 0) {\t\t\t\t\t\t\t\t\t\t\t\t\\\n  THError(fmt, func, info, ##__VA_ARGS__);\t\t\t\t\t\t\t\\\n}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n\n#define THLapackCheckWithCleanup(fmt, cleanup, func, info , ...)    \\\nif (info < 0) {                                                     \\\n  cleanup                                                           \\\n  THError(\"Lapack Error in %s : Illegal Argument %d\", func, -info); \\\n} else if(info > 0) {                                               \\\n  cleanup                                                           \\\n  THError(fmt, func, info, ##__VA_ARGS__);                          \\\n}\n\n#include \"generic/THLapack.h\"\n#include \"THGenerateAllTypes.h\"\n\n#endif\n"
  },
  {
    "path": "lib/TH/THLogAdd.c",
    "content": "#include \"THLogAdd.h\"\n\n#include <float.h>\n\n#ifdef USE_DOUBLE\n#define MINUS_LOG_THRESHOLD -39.14\n#else\n#define MINUS_LOG_THRESHOLD -18.42\n#endif\n\nconst double THLog2Pi=1.83787706640934548355;\nconst double THLogZero=-DBL_MAX;\nconst double THLogOne=0;\n\ndouble THLogAdd(double log_a, double log_b)\n{\n  double minusdif;\n\n  if (log_a < log_b)\n  {\n    double tmp = log_a;\n    log_a = log_b;\n    log_b = tmp;\n  }\n\n  minusdif = log_b - log_a;\n#ifdef DEBUG\n  if (isnan(minusdif))\n    THError(\"THLogAdd: minusdif (%f) log_b (%f) or log_a (%f) is nan\", minusdif, log_b, log_a);\n#endif\n  if (minusdif < MINUS_LOG_THRESHOLD)\n    return log_a;\n  else\n    return log_a + log1p(exp(minusdif));\n}\n\ndouble THLogSub(double log_a, double log_b)\n{\n  double minusdif;\n\n  if (log_a < log_b)\n    THError(\"LogSub: log_a (%f) should be greater than log_b (%f)\", log_a, log_b);\n\n  minusdif = log_b - log_a;\n#ifdef DEBUG\n  if (isnan(minusdif))\n    THError(\"LogSub: minusdif (%f) log_b (%f) or log_a (%f) is nan\", minusdif, log_b, log_a);\n#endif\n  if (log_a == log_b)\n    return THLogZero;\n  else if (minusdif < MINUS_LOG_THRESHOLD)\n    return log_a;\n  else\n    return log_a + log1p(-exp(minusdif));\n}\n\n/* Credits to Leon Bottou */\ndouble THExpMinusApprox(const double x)\n{\n#define EXACT_EXPONENTIAL 0\n#if EXACT_EXPONENTIAL\n  return exp(-x);\n#else\n  /* fast approximation of exp(-x) for x positive */\n# define A0   (1.0)\n# define A1   (0.125)\n# define A2   (0.0078125)\n# define A3   (0.00032552083)\n# define A4   (1.0172526e-5)\n  if (x < 13.0)\n  {\n/*    assert(x>=0); */\n    double y;\n    y = A0+x*(A1+x*(A2+x*(A3+x*A4)));\n    y *= y;\n    y *= y;\n    y *= y;\n    y = 1/y;\n    return y;\n  }\n  return 0;\n# undef A0\n# undef A1\n# undef A2\n# undef A3\n# undef A4\n#endif\n}\n"
  },
  {
    "path": "lib/TH/THLogAdd.h",
    "content": "#ifndef TH_LOG_ADD_INC\n#define TH_LOG_ADD_INC\n\n#include \"THGeneral.h\"\n\nTH_API const double THLog2Pi;\nTH_API const double THLogZero;\nTH_API const double THLogOne;\n\nTH_API double THLogAdd(double log_a, double log_b);\nTH_API double THLogSub(double log_a, double log_b);\nTH_API double THExpMinusApprox(const double x);\n\n#endif\n"
  },
  {
    "path": "lib/TH/THMath.h",
    "content": "#ifndef _THMATH_H\n#define _THMATH_H\n\nstatic inline double TH_sigmoid(double value) {\n  return 1.0 / (1.0 + exp(-value));\n}\n\nstatic inline double TH_frac(double x) {\n  return x - trunc(x);\n}\n\nstatic inline double TH_rsqrt(double x) {\n  return 1.0 / sqrt(x);\n}\n\nstatic inline double TH_lerp(double a, double b, double weight) {\n  return a + weight * (b-a);\n}\n\nstatic inline float TH_sigmoidf(float value) {\n  return 1.0f / (1.0f + expf(-value));\n}\n\nstatic inline float TH_fracf(float x) {\n  return x - truncf(x);\n}\n\nstatic inline float TH_rsqrtf(float x) {\n  return 1.0f / sqrtf(x);\n}\n\nstatic inline float TH_lerpf(float a, float b, float weight) {\n  return a + weight * (b-a);\n}\n\n#endif // _THMATH_H\n"
  },
  {
    "path": "lib/TH/THMemoryFile.c",
    "content": "#include \"THMemoryFile.h\"\n#include \"THFilePrivate.h\"\n#include \"stdint.h\"\n\ntypedef struct THMemoryFile__\n{\n    THFile file;\n    THCharStorage *storage;\n    size_t size;\n    size_t position;\n\tint longSize;\n\n} THMemoryFile;\n\nstatic int THMemoryFile_isOpened(THFile *self)\n{\n  THMemoryFile *mfself = (THMemoryFile*)self;\n  return (mfself->storage != NULL);\n}\n\nstatic char *THMemoryFile_strnextspace(char *str_, char *c_)\n{\n  char c;\n\n  while( (c = *str_) )\n  {\n    if( (c != ' ') && (c != '\\n') && (c != ':') && (c != ';') )\n      break;\n    str_++;\n  }\n\n  while( (c = *str_) )\n  {\n    if( (c == ' ') || (c == '\\n') || (c == ':') || (c == ';') )\n    {\n      *c_ = c;\n      *str_ = '\\0';\n      return(str_);\n    }\n    str_++;\n  }\n  return NULL;\n}\n\nstatic void THMemoryFile_grow(THMemoryFile *self, size_t size)\n{\n  size_t missingSpace;\n\n  if(size <= self->size)\n    return;\n  else\n  {\n    if(size < self->storage->size) /* note the \"<\" and not \"<=\" */\n    {\n      self->size = size;\n      self->storage->data[self->size] = '\\0';\n      return;\n    }\n  }\n\n  missingSpace = size-self->storage->size+1; /* +1 for the '\\0' */\n  THCharStorage_resize(self->storage, (self->storage->size/2 > missingSpace ?\n                                       self->storage->size + (self->storage->size/2)\n                                       : self->storage->size + missingSpace));\n}\n\nstatic int THMemoryFile_mode(const char *mode, int *isReadable, int *isWritable)\n{\n  *isReadable = 0;\n  *isWritable = 0;\n  if(strlen(mode) == 1)\n  {\n    if(*mode == 'r')\n    {\n      *isReadable = 1;\n      return 1;\n    }\n    else if(*mode == 'w')\n    {\n      *isWritable = 1;\n      return 1;\n    }\n  }\n  else if(strlen(mode) == 2)\n  {\n    if(mode[0] == 'r' && mode[1] == 'w')\n    {\n      *isReadable = 1;\n      *isWritable = 1;\n      return 1;\n    }\n  }\n  return 0;\n}\n\n/********************************************************/\n\n#define READ_WRITE_METHODS(TYPE, TYPEC, ASCII_READ_ELEM, ASCII_WRITE_ELEM, INSIDE_SPACING) \\\n  static size_t THMemoryFile_read##TYPEC(THFile *self, TYPE *data, size_t n) \\\n  {                                                                     \\\n    THMemoryFile *mfself = (THMemoryFile*)self;                         \\\n    size_t nread = 0;                                                    \\\n                                                                        \\\n    THArgCheck(mfself->storage != NULL, 1, \"attempt to use a closed file\");     \\\n    THArgCheck(mfself->file.isReadable, 1, \"attempt to read in a write-only file\"); \\\n                                                                        \\\n    if (n == 0)                                                         \\\n        return 0;                                                       \\\n                                                                        \\\n    if(mfself->file.isBinary)                                           \\\n    {                                                                   \\\n      size_t nByte = sizeof(TYPE)*n;                                      \\\n      size_t nByteRemaining = (mfself->position + nByte <= mfself->size ? nByte : mfself->size-mfself->position); \\\n      nread = nByteRemaining/sizeof(TYPE);                              \\\n      memmove(data, mfself->storage->data+mfself->position, nread*sizeof(TYPE)); \\\n      mfself->position += nread*sizeof(TYPE);                           \\\n    }                                                                   \\\n    else                                                                \\\n    {                                                                   \\\n      size_t i;                                                           \\\n      for(i = 0; i < n; i++)                                            \\\n      {                                                                 \\\n        size_t nByteRead = 0;                                             \\\n        char spaceChar = 0;                                             \\\n        char *spacePtr = THMemoryFile_strnextspace(mfself->storage->data+mfself->position, &spaceChar); \\\n        ASCII_READ_ELEM;                                                \\\n        if(ret == EOF)                                                  \\\n        {                                                               \\\n          while(mfself->storage->data[mfself->position])                \\\n            mfself->position++;                                         \\\n        }                                                               \\\n        else                                                            \\\n          mfself->position += nByteRead;                                \\\n        if(spacePtr)                                                    \\\n          *spacePtr = spaceChar;                                        \\\n      }                                                                 \\\n      if(mfself->file.isAutoSpacing && (n > 0))                         \\\n      {                                                                 \\\n        if( (mfself->position < mfself->size) && (mfself->storage->data[mfself->position] == '\\n') ) \\\n          mfself->position++;                                           \\\n      }                                                                 \\\n    }                                                                   \\\n                                                                        \\\n    if(nread != n)                                                      \\\n    {                                                                   \\\n      mfself->file.hasError = 1; /* shouldn't we put hasError to 0 all the time ? */ \\\n      if(!mfself->file.isQuiet)                                         \\\n        THError(\"read error: read %d blocks instead of %d\", nread, n);  \\\n    }                                                                   \\\n                                                                        \\\n    return nread;                                                       \\\n  }                                                                     \\\n                                                                        \\\n  static size_t THMemoryFile_write##TYPEC(THFile *self, TYPE *data, size_t n) \\\n  {                                                                     \\\n    THMemoryFile *mfself = (THMemoryFile*)self;                         \\\n                                                                        \\\n    THArgCheck(mfself->storage != NULL, 1, \"attempt to use a closed file\");     \\\n    THArgCheck(mfself->file.isWritable, 1, \"attempt to write in a read-only file\"); \\\n                                                                        \\\n    if (n == 0)                                                         \\\n        return 0;                                                       \\\n                                                                        \\\n    if(mfself->file.isBinary)                                           \\\n    {                                                                   \\\n      size_t nByte = sizeof(TYPE)*n;                                      \\\n      THMemoryFile_grow(mfself, mfself->position+nByte);                \\\n      memmove(mfself->storage->data+mfself->position, data, nByte);     \\\n      mfself->position += nByte;                                        \\\n      if(mfself->position > mfself->size)                               \\\n      {                                                                 \\\n        mfself->size = mfself->position;                                \\\n        mfself->storage->data[mfself->size] = '\\0';                     \\\n      }                                                                 \\\n    }                                                                   \\\n    else                                                                \\\n    {                                                                   \\\n      size_t i;                                                           \\\n      for(i = 0; i < n; i++)                                            \\\n      {                                                                 \\\n        ssize_t nByteWritten;                                           \\\n        while (1)                                                       \\\n        {                                                               \\\n          ASCII_WRITE_ELEM;                                             \\\n          if( (nByteWritten > -1) && (nByteWritten < mfself->storage->size-mfself->position) ) \\\n          {                                                             \\\n            mfself->position += nByteWritten;                           \\\n            break;                                                      \\\n          }                                                             \\\n          THMemoryFile_grow(mfself, mfself->storage->size + (mfself->storage->size/2) + 2); \\\n        }                                                               \\\n        if(mfself->file.isAutoSpacing)                                  \\\n        {                                                               \\\n          if(i < n-1)                                                   \\\n          {                                                             \\\n            THMemoryFile_grow(mfself, mfself->position+1);              \\\n            sprintf(mfself->storage->data+mfself->position, \" \");       \\\n            mfself->position++;                                         \\\n          }                                                             \\\n          if(i == n-1)                                                  \\\n          {                                                             \\\n            THMemoryFile_grow(mfself, mfself->position+1);              \\\n            sprintf(mfself->storage->data+mfself->position, \"\\n\");      \\\n            mfself->position++;                                         \\\n          }                                                             \\\n        }                                                               \\\n      }                                                                 \\\n      if(mfself->position > mfself->size)                               \\\n      {                                                                 \\\n        mfself->size = mfself->position;                                \\\n        mfself->storage->data[mfself->size] = '\\0';                     \\\n      }                                                                 \\\n    }                                                                   \\\n                                                                        \\\n    return n;                                                           \\\n  }\n\n\nvoid THMemoryFile_longSize(THFile *self, int size)\n{\n  THMemoryFile *dfself = (THMemoryFile*)(self);\n  THArgCheck(size == 0 || size == 4 || size == 8, 1, \"Invalid long size specified\");\n  dfself->longSize = size;\n}\n\nTHCharStorage *THMemoryFile_storage(THFile *self)\n{\n  THMemoryFile *mfself = (THMemoryFile*)self;\n  THArgCheck(mfself->storage != NULL, 1, \"attempt to use a closed file\");\n\n  THCharStorage_resize(mfself->storage, mfself->size+1);\n\n  return mfself->storage;\n}\n\nstatic void THMemoryFile_synchronize(THFile *self)\n{\n  THMemoryFile *mfself = (THMemoryFile*)self;\n  THArgCheck(mfself->storage != NULL, 1, \"attempt to use a closed file\");\n}\n\nstatic void THMemoryFile_seek(THFile *self, size_t position)\n{\n  THMemoryFile *mfself = (THMemoryFile*)self;\n\n  THArgCheck(mfself->storage != NULL, 1, \"attempt to use a closed file\");\n  THArgCheck(position >= 0, 2, \"position must be positive\");\n\n  if(position <= mfself->size)\n    mfself->position = position;\n  else\n  {\n    mfself->file.hasError = 1;\n    if(!mfself->file.isQuiet)\n      THError(\"unable to seek at position %zu\", position);\n  }\n}\n\nstatic void THMemoryFile_seekEnd(THFile *self)\n{\n  THMemoryFile *mfself = (THMemoryFile*)self;\n  THArgCheck(mfself->storage != NULL, 1, \"attempt to use a closed file\");\n\n  mfself->position = mfself->size;\n}\n\nstatic size_t THMemoryFile_position(THFile *self)\n{\n  THMemoryFile *mfself = (THMemoryFile*)self;\n  THArgCheck(mfself->storage != NULL, 1, \"attempt to use a closed file\");\n  return mfself->position;\n}\n\nstatic void THMemoryFile_close(THFile *self)\n{\n  THMemoryFile *mfself = (THMemoryFile*)self;\n  THArgCheck(mfself->storage != NULL, 1, \"attempt to use a closed file\");\n  THCharStorage_free(mfself->storage);\n  mfself->storage = NULL;\n}\n\nstatic void THMemoryFile_free(THFile *self)\n{\n  THMemoryFile *mfself = (THMemoryFile*)self;\n\n  if(mfself->storage)\n    THCharStorage_free(mfself->storage);\n\n  THFree(mfself);\n}\n\n/* READ_WRITE_METHODS(bool, Bool, */\n/*                    int value = 0; int ret = sscanf(mfself->storage->data+mfself->position, \"%d%n\", &value, &nByteRead); data[i] = (value ? 1 : 0), */\n/*                    int value = (data[i] ? 1 : 0); nByteWritten = snprintf(mfself->storage->data+mfself->position, mfself->storage->size-mfself->position, \"%d\", value), */\n/*                    1) */\n\nREAD_WRITE_METHODS(unsigned char, Byte,\n                   size_t ret = (mfself->position + n <= mfself->size ? n : mfself->size-mfself->position);  \\\n                   if(spacePtr) *spacePtr = spaceChar; \\\n                   nByteRead = ret; \\\n                   nread = ret; \\\n                   i = n-1; \\\n                   memmove(data, mfself->storage->data+mfself->position, nByteRead),\n                   nByteWritten = (n < mfself->storage->size-mfself->position ? n : -1); \\\n                   i = n-1; \\\n                   if(nByteWritten > -1)\n                     memmove(mfself->storage->data+mfself->position, data, nByteWritten),\n                   0)\n\n/* DEBUG: we should check if %n is count or not as a element (so ret might need to be ret-- on some systems) */\n/* Note that we do a trick for char */\nREAD_WRITE_METHODS(char, Char,\n                   size_t ret = (mfself->position + n <= mfself->size ? n : mfself->size-mfself->position);  \\\n                   if(spacePtr) *spacePtr = spaceChar; \\\n                   nByteRead = ret; \\\n                   nread = ret; \\\n                   i = n-1; \\\n                   memmove(data, mfself->storage->data+mfself->position, nByteRead),\n                   nByteWritten = (n < mfself->storage->size-mfself->position ? n : -1); \\\n                   i = n-1; \\\n                   if(nByteWritten > -1)\n                     memmove(mfself->storage->data+mfself->position, data, nByteWritten),\n                   0)\n\nREAD_WRITE_METHODS(short, Short,\n                   int nByteRead_; int ret = sscanf(mfself->storage->data+mfself->position, \"%hd%n\", &data[i], &nByteRead_); nByteRead = nByteRead_; if(ret <= 0) break; else nread++,\n                   nByteWritten = snprintf(mfself->storage->data+mfself->position, mfself->storage->size-mfself->position, \"%hd\", data[i]),\n                   1)\n\nREAD_WRITE_METHODS(int, Int,\n                   int nByteRead_; int ret = sscanf(mfself->storage->data+mfself->position, \"%d%n\", &data[i], &nByteRead_); nByteRead = nByteRead_; if(ret <= 0) break; else nread++,\n                   nByteWritten = snprintf(mfself->storage->data+mfself->position, mfself->storage->size-mfself->position, \"%d\", data[i]),\n                   1)\n\nREAD_WRITE_METHODS(float, Float,\n                   int nByteRead_; int ret = sscanf(mfself->storage->data+mfself->position, \"%g%n\", &data[i], &nByteRead_); nByteRead = nByteRead_; if(ret <= 0) break; else nread++,\n                   nByteWritten = snprintf(mfself->storage->data+mfself->position, mfself->storage->size-mfself->position, \"%.9g\", data[i]),\n                   1)\n\nREAD_WRITE_METHODS(THHalf, Half,\n                   int nByteRead_; float buf; \\\n                   int ret = sscanf(mfself->storage->data+mfself->position, \"%g%n\", &buf, &nByteRead_); \\\n                   data[i] = TH_float2half(buf); nByteRead = nByteRead_; if(ret <= 0) break; else nread++,\n                   nByteWritten = snprintf(mfself->storage->data+mfself->position, mfself->storage->size-mfself->position, \"%.9g\", TH_half2float(data[i])),\n                   1)\n\nREAD_WRITE_METHODS(double, Double,\n                   int nByteRead_; int ret = sscanf(mfself->storage->data+mfself->position, \"%lg%n\", &data[i], &nByteRead_); nByteRead = nByteRead_; if(ret <= 0) break; else nread++,\n                   nByteWritten = snprintf(mfself->storage->data+mfself->position, mfself->storage->size-mfself->position, \"%.17g\", data[i]),\n                   1)\n\nint THDiskFile_isLittleEndianCPU(void);\n\nstatic size_t THMemoryFile_readLong(THFile *self, long *data, size_t n)\n{\n  THMemoryFile *mfself = (THMemoryFile*)self;\n  size_t nread = 0L;\n\n  THArgCheck(mfself->storage != NULL, 1, \"attempt to use a closed file\");\n  THArgCheck(mfself->file.isReadable, 1, \"attempt to read in a write-only file\");\n\n  if (n == 0)\n    return 0;\n\n  if(mfself->file.isBinary)\n  {\n    if(mfself->longSize == 0 || mfself->longSize == sizeof(long))\n    {\n      size_t nByte = sizeof(long)*n;\n      size_t nByteRemaining = (mfself->position + nByte <= mfself->size ? nByte : mfself->size-mfself->position);\n      nread = nByteRemaining/sizeof(long);\n      memmove(data, mfself->storage->data+mfself->position, nread*sizeof(long));\n      mfself->position += nread*sizeof(long);\n    } else if(mfself->longSize == 4)\n    {\n      size_t nByte = 4*n;\n      size_t nByteRemaining = (mfself->position + nByte <= mfself->size ? nByte : mfself->size-mfself->position);\n      int32_t *storage = (int32_t *)(mfself->storage->data + mfself->position);\n      nread = nByteRemaining/4;\n      size_t i;\n      for(i = 0; i < nread; i++)\n        data[i] = storage[i];\n      mfself->position += nread*4;\n    }\n    else /* if(mfself->longSize == 8) */\n    {\n      int big_endian = !THDiskFile_isLittleEndianCPU();\n      size_t nByte = 8*n;\n      int32_t *storage = (int32_t *)(mfself->storage->data + mfself->position);\n      size_t nByteRemaining = (mfself->position + nByte <= mfself->size ? nByte : mfself->size-mfself->position);\n      nread = nByteRemaining/8;\n      size_t i;\n      for(i = 0; i < nread; i++)\n        data[i] = storage[2*i + big_endian];\n      mfself->position += nread*8;\n    }\n  }\n  else\n  {\n    size_t i;\n    for(i = 0; i < n; i++)\n    {\n      size_t nByteRead = 0;\n      char spaceChar = 0;\n      char *spacePtr = THMemoryFile_strnextspace(mfself->storage->data+mfself->position, &spaceChar);\n      int nByteRead_; int ret = sscanf(mfself->storage->data+mfself->position, \"%ld%n\", &data[i], &nByteRead_); nByteRead = nByteRead_; if(ret <= 0) break; else nread++;\n      if(ret == EOF)\n      {\n        while(mfself->storage->data[mfself->position])\n          mfself->position++;\n      }\n      else\n        mfself->position += nByteRead;\n      if(spacePtr)\n        *spacePtr = spaceChar;\n    }\n    if(mfself->file.isAutoSpacing && (n > 0))\n    {\n      if( (mfself->position < mfself->size) && (mfself->storage->data[mfself->position] == '\\n') )\n        mfself->position++;\n    }\n  }\n\n  if(nread != n)\n  {\n    mfself->file.hasError = 1; /* shouldn't we put hasError to 0 all the time ? */\n    if(!mfself->file.isQuiet)\n      THError(\"read error: read %d blocks instead of %d\", nread, n);\n  }\n\n  return nread;\n}\n\nstatic size_t THMemoryFile_writeLong(THFile *self, long *data, size_t n)\n{\n  THMemoryFile *mfself = (THMemoryFile*)self;\n\n  THArgCheck(mfself->storage != NULL, 1, \"attempt to use a closed file\");\n  THArgCheck(mfself->file.isWritable, 1, \"attempt to write in a read-only file\");\n\n  if (n == 0)\n    return 0;\n\n  if(mfself->file.isBinary)\n  {\n    if(mfself->longSize == 0 || mfself->longSize == sizeof(long))\n    {\n      size_t nByte = sizeof(long)*n;\n      THMemoryFile_grow(mfself, mfself->position+nByte);\n      memmove(mfself->storage->data+mfself->position, data, nByte);\n      mfself->position += nByte;\n    } else if(mfself->longSize == 4)\n    {\n      size_t nByte = 4*n;\n      THMemoryFile_grow(mfself, mfself->position+nByte);\n      int32_t *storage = (int32_t *)(mfself->storage->data + mfself->position);\n      size_t i;\n      for(i = 0; i < n; i++)\n        storage[i] = data[i];\n      mfself->position += nByte;\n    }\n    else /* if(mfself->longSize == 8) */\n    {\n      int big_endian = !THDiskFile_isLittleEndianCPU();\n      size_t nByte = 8*n;\n      THMemoryFile_grow(mfself, mfself->position+nByte);\n      int32_t *storage = (int32_t *)(mfself->storage->data + mfself->position);\n      size_t i;\n      for(i = 0; i < n; i++)\n      {\n        storage[2*i + !big_endian] = 0;\n        storage[2*i + big_endian] = data[i];\n      }\n      mfself->position += nByte;\n    }\n    if(mfself->position > mfself->size)\n    {\n      mfself->size = mfself->position;\n      mfself->storage->data[mfself->size] = '\\0';\n    }\n  }\n  else\n  {\n    size_t i;\n    for(i = 0; i < n; i++)\n    {\n      ssize_t nByteWritten;\n      while (1)\n      {\n        nByteWritten = snprintf(mfself->storage->data+mfself->position, mfself->storage->size-mfself->position, \"%ld\", data[i]);\n        if( (nByteWritten > -1) && (nByteWritten < mfself->storage->size-mfself->position) )\n        {\n          mfself->position += nByteWritten;\n          break;\n        }\n        THMemoryFile_grow(mfself, mfself->storage->size + (mfself->storage->size/2) + 2);\n      }\n      if(mfself->file.isAutoSpacing)\n      {\n        if(i < n-1)\n        {\n          THMemoryFile_grow(mfself, mfself->position+1);\n          sprintf(mfself->storage->data+mfself->position, \" \");\n          mfself->position++;\n        }\n        if(i == n-1)\n        {\n          THMemoryFile_grow(mfself, mfself->position+1);\n          sprintf(mfself->storage->data+mfself->position, \"\\n\");\n          mfself->position++;\n        }\n      }\n    }\n    if(mfself->position > mfself->size)\n    {\n      mfself->size = mfself->position;\n      mfself->storage->data[mfself->size] = '\\0';\n    }\n  }\n\n  return n;\n}\n\nstatic char* THMemoryFile_cloneString(const char *str, ptrdiff_t size)\n{\n  char *cstr = THAlloc(size);\n  memcpy(cstr, str, size);\n  return cstr;\n}\n\nstatic size_t THMemoryFile_readString(THFile *self, const char *format, char **str_)\n{\n  THMemoryFile *mfself = (THMemoryFile*)self;\n\n  THArgCheck(mfself->storage != NULL, 1, \"attempt to use a closed file\");\n  THArgCheck(mfself->file.isReadable, 1, \"attempt to read in a write-only file\");\n  THArgCheck((strlen(format) >= 2 ? (format[0] == '*') && (format[1] == 'a' || format[1] == 'l') : 0), 2, \"format must be '*a' or '*l'\");\n\n  if(mfself->position == mfself->size) /* eof ? */\n  {\n    mfself->file.hasError = 1;\n    if(!mfself->file.isQuiet)\n      THError(\"read error: read 0 blocks instead of 1\");\n\n    *str_ = NULL;\n    return 0;\n  }\n\n  if(format[1] == 'a')\n  {\n    size_t str_size = mfself->size-mfself->position;\n\n    *str_ = THMemoryFile_cloneString(mfself->storage->data+mfself->position, str_size);\n    mfself->position = mfself->size;\n\n    return str_size;\n  }\n  else\n  {\n    char *p = mfself->storage->data+mfself->position;\n    int eolFound = 0;\n    size_t posEol;\n    size_t i;\n    for(i = 0; i < mfself->size-mfself->position; i++)\n    {\n      if(p[i] == '\\n')\n      {\n        posEol = i;\n        eolFound = 1;\n        break;\n      }\n    }\n\n    if(eolFound)\n    {\n      *str_ = THMemoryFile_cloneString(mfself->storage->data+mfself->position, posEol);\n      mfself->position += posEol+1;\n      return posEol;\n    }\n    else /* well, we read all! */\n    {\n      size_t str_size = mfself->size-mfself->position;\n\n      *str_ = THMemoryFile_cloneString(mfself->storage->data+mfself->position, str_size);\n      mfself->position = mfself->size;\n\n      return str_size;\n    }\n  }\n\n  *str_ = NULL;\n  return 0;\n}\n\nstatic size_t THMemoryFile_writeString(THFile *self, const char *str, size_t size)\n{\n  THMemoryFile *mfself = (THMemoryFile*)self;\n\n  THArgCheck(mfself->storage != NULL, 1, \"attempt to use a closed file\");\n  THArgCheck(mfself->file.isWritable, 1, \"attempt to write in a read-only file\");\n\n  THMemoryFile_grow(mfself, mfself->position+size);\n  memmove(mfself->storage->data+mfself->position, str, size);\n  mfself->position += size;\n  if(mfself->position > mfself->size)\n  {\n    mfself->size = mfself->position;\n    mfself->storage->data[mfself->size] = '\\0';\n  }\n\n  return size;\n}\n\nTHFile *THMemoryFile_newWithStorage(THCharStorage *storage, const char *mode)\n{\n  static struct THFileVTable vtable = {\n    THMemoryFile_isOpened,\n\n    THMemoryFile_readByte,\n    THMemoryFile_readChar,\n    THMemoryFile_readShort,\n    THMemoryFile_readInt,\n    THMemoryFile_readLong,\n    THMemoryFile_readFloat,\n    THMemoryFile_readDouble,\n    THMemoryFile_readHalf,\n    THMemoryFile_readString,\n\n    THMemoryFile_writeByte,\n    THMemoryFile_writeChar,\n    THMemoryFile_writeShort,\n    THMemoryFile_writeInt,\n    THMemoryFile_writeLong,\n    THMemoryFile_writeFloat,\n    THMemoryFile_writeDouble,\n    THMemoryFile_writeHalf,\n    THMemoryFile_writeString,\n\n    THMemoryFile_synchronize,\n    THMemoryFile_seek,\n    THMemoryFile_seekEnd,\n    THMemoryFile_position,\n    THMemoryFile_close,\n    THMemoryFile_free\n  };\n\n  THMemoryFile *mfself;\n  int isReadable;\n  int isWritable;\n\n  if(storage)\n  {\n    THArgCheck(storage->data[storage->size-1] == '\\0', 1, \"provided CharStorage must be terminated by 0\");\n    THArgCheck(THMemoryFile_mode(mode, &isReadable, &isWritable), 2, \"file mode should be 'r','w' or 'rw'\");\n    THCharStorage_retain(storage);\n  }\n  else\n  {\n    THArgCheck(THMemoryFile_mode(mode, &isReadable, &isWritable), 2, \"file mode should be 'r','w' or 'rw'\");\n    storage = THCharStorage_newWithSize(1);\n    storage->data[0] = '\\0';\n  }\n\n  mfself = THAlloc(sizeof(THMemoryFile));\n\n  mfself->storage = storage;\n  mfself->size = (storage ? storage->size-1 : 0);\n  mfself->position = 0;\n  mfself->longSize = 0;\n\n  mfself->file.vtable = &vtable;\n  mfself->file.isQuiet = 0;\n  mfself->file.isReadable = isReadable;\n  mfself->file.isWritable = isWritable;\n  mfself->file.isBinary = 0;\n  mfself->file.isAutoSpacing = 1;\n  mfself->file.hasError = 0;\n\n  return (THFile*)mfself;\n}\n\nTHFile *THMemoryFile_new(const char *mode)\n{\n  return THMemoryFile_newWithStorage(NULL, mode);\n}\n"
  },
  {
    "path": "lib/TH/THMemoryFile.h",
    "content": "#ifndef TH_MEMORY_FILE_INC\n#define TH_MEMORY_FILE_INC\n\n#include \"THFile.h\"\n#include \"THStorage.h\"\n\nTH_API THFile *THMemoryFile_newWithStorage(THCharStorage *storage, const char *mode);\nTH_API THFile *THMemoryFile_new(const char *mode);\n\nTH_API THCharStorage *THMemoryFile_storage(THFile *self);\nTH_API void THMemoryFile_longSize(THFile *self, int size);\n\n#endif\n"
  },
  {
    "path": "lib/TH/THRandom.c",
    "content": "#include \"THGeneral.h\"\n#include \"THRandom.h\"\n\n#ifndef _WIN32\n#include <fcntl.h>\n#include <unistd.h>\n#endif\n\n/* Code for the Mersenne Twister random generator.... */\n#define n _MERSENNE_STATE_N\n#define m _MERSENNE_STATE_M\n\n/* Creates (unseeded) new generator*/\nstatic THGenerator* THGenerator_newUnseeded()\n{\n  THGenerator *self = THAlloc(sizeof(THGenerator));\n  memset(self, 0, sizeof(THGenerator));\n  self->left = 1;\n  self->seeded = 0;\n  self->normal_is_valid = 0;\n  return self;\n}\n\n/* Creates new generator and makes sure it is seeded*/\nTHGenerator* THGenerator_new()\n{\n  THGenerator *self = THGenerator_newUnseeded();\n  THRandom_seed(self);\n  return self;\n}\n\nTHGenerator* THGenerator_copy(THGenerator *self, THGenerator *from)\n{\n    memcpy(self, from, sizeof(THGenerator));\n    return self;\n}\n\nvoid THGenerator_free(THGenerator *self)\n{\n  THFree(self);\n}\n\nint THGenerator_isValid(THGenerator *_generator)\n{\n  if ((_generator->seeded == 1) &&\n    (_generator->left > 0 && _generator->left <= n) && (_generator->next <= n))\n    return 1;\n\n  return 0;\n}\n\n#ifndef _WIN32\nstatic unsigned long readURandomLong()\n{\n  int randDev = open(\"/dev/urandom\", O_RDONLY);\n  unsigned long randValue;\n  if (randDev < 0) {\n    THError(\"Unable to open /dev/urandom\");\n  }\n  ssize_t readBytes = read(randDev, &randValue, sizeof(randValue));\n  if (readBytes < sizeof(randValue)) {\n    THError(\"Unable to read from /dev/urandom\");\n  }\n  close(randDev);\n  return randValue;\n}\n#endif // _WIN32\n\nunsigned long THRandom_seed(THGenerator *_generator)\n{\n#ifdef _WIN32\n  unsigned long s = (unsigned long)time(0);\n#else\n  unsigned long s = readURandomLong();\n#endif\n  THRandom_manualSeed(_generator, s);\n  return s;\n}\n\n/* The next 4 methods are taken from http:www.math.keio.ac.jpmatumotoemt.html\n   Here is the copyright:\n   Some minor modifications have been made to adapt to \"my\" C... */\n\n/*\n   A C-program for MT19937, with initialization improved 2002/2/10.\n   Coded by Takuji Nishimura and Makoto Matsumoto.\n   This is a faster version by taking Shawn Cokus's optimization,\n   Matthe Bellew's simplification, Isaku Wada's double version.\n\n   Before using, initialize the state by using init_genrand(seed)\n   or init_by_array(init_key, key_length).\n\n   Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,\n   All rights reserved.\n\n   Redistribution and use in source and binary forms, with or without\n   modification, are permitted provided that the following conditions\n   are met:\n\n     1. Redistributions of source code must retain the above copyright\n        notice, this list of conditions and the following disclaimer.\n\n     2. Redistributions in binary form must reproduce the above copyright\n        notice, this list of conditions and the following disclaimer in the\n        documentation and/or other materials provided with the distribution.\n\n     3. The names of its contributors may not be used to endorse or promote\n        products derived from this software without specific prior written\n        permission.\n\n   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n   \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR\n   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,\n   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\n   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR\n   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF\n   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING\n   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\n   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\n\n   Any feedback is very welcome.\n   http://www.math.keio.ac.jp/matumoto/emt.html\n   email: matumoto@math.keio.ac.jp\n*/\n\n/* Macros for the Mersenne Twister random generator... */\n/* Period parameters */\n/* #define n 624 */\n/* #define m 397 */\n#define MATRIX_A 0x9908b0dfUL   /* constant vector a */\n#define UMASK 0x80000000UL /* most significant w-r bits */\n#define LMASK 0x7fffffffUL /* least significant r bits */\n#define MIXBITS(u,v) ( ((u) & UMASK) | ((v) & LMASK) )\n#define TWIST(u,v) ((MIXBITS(u,v) >> 1) ^ ((v)&1UL ? MATRIX_A : 0UL))\n/*********************************************************** That's it. */\n\nvoid THRandom_manualSeed(THGenerator *_generator, unsigned long the_seed_)\n{\n  int j;\n\n  /* This ensures reseeding resets all of the state (i.e. state for Gaussian numbers) */\n  THGenerator *blank = THGenerator_newUnseeded();\n  THGenerator_copy(_generator, blank);\n  THGenerator_free(blank);\n\n  _generator->the_initial_seed = the_seed_;\n  _generator->state[0] = _generator->the_initial_seed & 0xffffffffUL;\n  for(j = 1; j < n; j++)\n  {\n    _generator->state[j] = (1812433253UL * (_generator->state[j-1] ^ (_generator->state[j-1] >> 30)) + j);\n    /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */\n    /* In the previous versions, mSBs of the seed affect   */\n    /* only mSBs of the array state[].                        */\n    /* 2002/01/09 modified by makoto matsumoto             */\n    _generator->state[j] &= 0xffffffffUL;  /* for >32 bit machines */\n  }\n  _generator->left = 1;\n  _generator->seeded = 1;\n}\n\nunsigned long THRandom_initialSeed(THGenerator *_generator)\n{\n  return _generator->the_initial_seed;\n}\n\nvoid THRandom_nextState(THGenerator *_generator)\n{\n  unsigned long *p = _generator->state;\n  int j;\n\n  _generator->left = n;\n  _generator->next = 0;\n\n  for(j = n-m+1; --j; p++)\n    *p = p[m] ^ TWIST(p[0], p[1]);\n\n  for(j = m; --j; p++)\n    *p = p[m-n] ^ TWIST(p[0], p[1]);\n\n  *p = p[m-n] ^ TWIST(p[0], _generator->state[0]);\n}\n\nunsigned long THRandom_random(THGenerator *_generator)\n{\n  unsigned long y;\n\n  if (--(_generator->left) == 0)\n    THRandom_nextState(_generator);\n  y = *(_generator->state + (_generator->next)++);\n\n  /* Tempering */\n  y ^= (y >> 11);\n  y ^= (y << 7) & 0x9d2c5680UL;\n  y ^= (y << 15) & 0xefc60000UL;\n  y ^= (y >> 18);\n\n  return y;\n}\n\n/* generates a random number on [0,1)-double-interval */\nstatic double __uniform__(THGenerator *_generator)\n{\n  /* divided by 2^32 */\n  return (double)THRandom_random(_generator) * (1.0/4294967296.0);\n}\n\n/*********************************************************\n\n Thanks *a lot* Takuji Nishimura and Makoto Matsumoto!\n\n Now my own code...\n\n*********************************************************/\n\ndouble THRandom_uniform(THGenerator *_generator, double a, double b)\n{\n  return(__uniform__(_generator) * (b - a) + a);\n}\n\ndouble THRandom_normal(THGenerator *_generator, double mean, double stdv)\n{\n  THArgCheck(stdv > 0, 2, \"standard deviation must be strictly positive\");\n\n  /* This is known as the Box-Muller method */\n  if(!_generator->normal_is_valid)\n  {\n    _generator->normal_x = __uniform__(_generator);\n    _generator->normal_y = __uniform__(_generator);\n    _generator->normal_rho = sqrt(-2. * log(1.0-_generator->normal_y));\n    _generator->normal_is_valid = 1;\n  }\n  else\n    _generator->normal_is_valid = 0;\n\n  if(_generator->normal_is_valid)\n    return _generator->normal_rho*cos(2.*M_PI*_generator->normal_x)*stdv+mean;\n  else\n    return _generator->normal_rho*sin(2.*M_PI*_generator->normal_x)*stdv+mean;\n}\n\ndouble THRandom_exponential(THGenerator *_generator, double lambda)\n{\n  return(-1. / lambda * log(1-__uniform__(_generator)));\n}\n\ndouble THRandom_cauchy(THGenerator *_generator, double median, double sigma)\n{\n  return(median + sigma * tan(M_PI*(__uniform__(_generator)-0.5)));\n}\n\n/* Faut etre malade pour utiliser ca.\n   M'enfin. */\ndouble THRandom_logNormal(THGenerator *_generator, double mean, double stdv)\n{\n  THArgCheck(stdv > 0, 2, \"standard deviation must be strictly positive\");\n  return(exp(THRandom_normal(_generator, mean, stdv)));\n}\n\nint THRandom_geometric(THGenerator *_generator, double p)\n{\n  THArgCheck(p > 0 && p < 1, 1, \"must be > 0 and < 1\");\n  return((int)(log(1-__uniform__(_generator)) / log(p)) + 1);\n}\n\nint THRandom_bernoulli(THGenerator *_generator, double p)\n{\n  THArgCheck(p >= 0 && p <= 1, 1, \"must be >= 0 and <= 1\");\n  return(__uniform__(_generator) <= p);\n}\n"
  },
  {
    "path": "lib/TH/THRandom.h",
    "content": "#ifndef TH_RANDOM_INC\n#define TH_RANDOM_INC\n\n#include \"THGeneral.h\"\n\n#define _MERSENNE_STATE_N 624\n#define _MERSENNE_STATE_M 397\n/* A THGenerator contains all the state required for a single random number stream */\ntypedef struct THGenerator {\n  /* The initial seed. */\n  unsigned long the_initial_seed;\n  int left;  /* = 1; */\n  int seeded; /* = 0; */\n  unsigned long next;\n  unsigned long state[_MERSENNE_STATE_N]; /* the array for the state vector  */\n  /********************************/\n\n  /* For normal distribution */\n  double normal_x;\n  double normal_y;\n  double normal_rho;\n  int normal_is_valid; /* = 0; */\n} THGenerator;\n\n#define torch_Generator \"torch.Generator\"\n\n/* Manipulate THGenerator objects */\nTH_API THGenerator * THGenerator_new(void);\nTH_API THGenerator * THGenerator_copy(THGenerator *self, THGenerator *from);\nTH_API void THGenerator_free(THGenerator *gen);\n\n/* Checks if given generator is valid */\nTH_API int THGenerator_isValid(THGenerator *_generator);\n\n/* Initializes the random number generator from /dev/urandom (or on Windows\nplatforms with the current time (granularity: seconds)) and returns the seed. */\nTH_API unsigned long THRandom_seed(THGenerator *_generator);\n\n/* Initializes the random number generator with the given long \"the_seed_\". */\nTH_API void THRandom_manualSeed(THGenerator *_generator, unsigned long the_seed_);\n\n/* Returns the starting seed used. */\nTH_API unsigned long THRandom_initialSeed(THGenerator *_generator);\n\n/* Generates a uniform 32 bits integer. */\nTH_API unsigned long THRandom_random(THGenerator *_generator);\n\n/* Generates a uniform random number on [0,1[. */\nTH_API double THRandom_uniform(THGenerator *_generator, double a, double b);\n\n/** Generates a random number from a normal distribution.\n    (With mean #mean# and standard deviation #stdv >= 0#).\n*/\nTH_API double THRandom_normal(THGenerator *_generator, double mean, double stdv);\n\n/** Generates a random number from an exponential distribution.\n    The density is $p(x) = lambda * exp(-lambda * x)$, where\n    lambda is a positive number.\n*/\nTH_API double THRandom_exponential(THGenerator *_generator, double lambda);\n\n/** Returns a random number from a Cauchy distribution.\n    The Cauchy density is $p(x) = sigma/(pi*(sigma^2 + (x-median)^2))$\n*/\nTH_API double THRandom_cauchy(THGenerator *_generator, double median, double sigma);\n\n/** Generates a random number from a log-normal distribution.\n    (#mean > 0# is the mean of the log-normal distribution\n    and #stdv# is its standard deviation).\n*/\nTH_API double THRandom_logNormal(THGenerator *_generator, double mean, double stdv);\n\n/** Generates a random number from a geometric distribution.\n    It returns an integer #i#, where $p(i) = (1-p) * p^(i-1)$.\n    p must satisfy $0 < p < 1$.\n*/\nTH_API int THRandom_geometric(THGenerator *_generator, double p);\n\n/* Returns true with probability $p$ and false with probability $1-p$ (p > 0). */\nTH_API int THRandom_bernoulli(THGenerator *_generator, double p);\n#endif\n"
  },
  {
    "path": "lib/TH/THSize.c",
    "content": "#include \"THSize.h\"\n\nint THSize_isSameSizeAs(const long *sizeA, long dimsA, const long *sizeB, long dimsB) {\n  int d;\n  if (dimsA != dimsB)\n    return 0;\n  for(d = 0; d < dimsA; ++d)\n  {\n    if(sizeA[d] != sizeB[d])\n      return 0;\n  }\n  return 1;\n}\n\nptrdiff_t THSize_nElement(long dims, long *size) {\n  if(dims == 0)\n    return 0;\n  else\n  {\n    ptrdiff_t nElement = 1;\n    int d;\n    for(d = 0; d < dims; d++)\n      nElement *= size[d];\n    return nElement;\n  }\n}\n"
  },
  {
    "path": "lib/TH/THSize.h",
    "content": "#ifndef TH_SIZE_INC\n#define TH_SIZE_INC\n\n#include \"THGeneral.h\"\n#include <stddef.h>\n\n// THTensor functions that would work on a THSize if we had such a class in C++,\n// i.e. THTensor functions that depend only on the shape of the tensor, not the type.\n\nTH_API int THSize_isSameSizeAs(const long *sizeA, long dimsA, const long *sizeB, long dimsB);\nTH_API ptrdiff_t THSize_nElement(long dims, long *size);\n\n#endif\n"
  },
  {
    "path": "lib/TH/THStorage.c",
    "content": "#include \"THAtomic.h\"\n#include \"THStorage.h\"\n\n#include \"generic/THStorage.c\"\n#include \"THGenerateAllTypes.h\"\n\n#include \"generic/THStorage.c\"\n#include \"THGenerateHalfType.h\"\n\n#include \"generic/THStorageCopy.c\"\n#include \"THGenerateAllTypes.h\"\n\n#include \"generic/THStorageCopy.c\"\n#include \"THGenerateHalfType.h\"\n\n\nTHDescBuff THLongStorage_sizeDesc(const THLongStorage *size) {\n  return _THSizeDesc(size->data, size->size);\n}\n\nTHLongStorage *THLongStorage_newInferSize(THLongStorage *size, ptrdiff_t nElement)\n{\n  ptrdiff_t total_size = (size->size > 0 ? 1 : 0);\n  ptrdiff_t dim_infer = -1;\n  ptrdiff_t i;\n  for (i = 0; i < size->size; i++) {\n    if (size->data[i] == -1) {\n      THArgCheck(dim_infer == -1, 1, \"only one dimension can be inferred\");\n      dim_infer = i;\n    } else {\n      total_size *= size->data[i];\n    }\n  }\n  if (dim_infer != -1) {\n    THDescBuff buf = THLongStorage_sizeDesc(size);\n    THArgCheck(total_size > 0 && nElement % total_size == 0, 2,\n        \"size '%s' is invalid for input with %td elements\", buf.str, nElement);\n  } else {\n    THDescBuff buf = THLongStorage_sizeDesc(size);\n    THArgCheck(nElement == total_size, 2,\n        \"size '%s' is invalid for input with %td elements\", buf.str, nElement);\n  }\n  THLongStorage* copy = THLongStorage_newWithSize(size->size);\n  THLongStorage_copy(copy, size);\n  if (dim_infer != -1) {\n    copy->data[dim_infer] = nElement / total_size;\n  }\n  return copy;\n}\n\nint THLongStorage_inferSize2(THLongStorage *output, long *sizesA, long dimsA, long *sizesB, long dimsB,\n                             char *error_buffer, int buffer_len) {\n  THArgCheck(sizesA != NULL, 1, \"sizesA must not be null\");\n  THArgCheck(sizesB != NULL, 2, \"sizesB must not be null\");\n  THArgCheck(dimsA, 1, \"Can't expand empty tensor a\");\n  THArgCheck(dimsB, 1, \"Can't expand empty tensor b\");\n  ptrdiff_t ndim = dimsA > dimsB ? dimsA : dimsB;\n\n  long *expandedSizes = THAlloc(sizeof(long)*ndim);\n\n  for (long i = ndim - 1; i >= 0; --i) {\n    long offset = ndim - 1 - i;\n    long dimA = dimsA - 1 - offset;\n    long dimB = dimsB - 1 - offset;\n    long sizeA = (dimA >= 0) ? sizesA[dimA] : 1;\n    long sizeB = (dimB >= 0) ? sizesB[dimB] : 1;\n    if (sizeA == sizeB || sizeA == 1 || sizeB == 1) {\n      expandedSizes[i] = THMax(sizeA, sizeB);\n    } else {\n      THFree(expandedSizes);\n      snprintf(error_buffer, buffer_len, \"The size of tensor a (%ld) must match the size of tensor b (%ld) at \"\n               \"non-singleton dimension %ld.\", sizeA, sizeB, i);\n      return -1;\n    }\n  }\n  THLongStorage_resize(output, ndim);\n  memcpy(THLongStorage_data(output), expandedSizes, sizeof(long)*ndim);\n  THFree(expandedSizes);\n  return 0;\n}\n\nint THLongStorage_inferSizeN(THLongStorage *output, int n, long **sizes, long *dims,\n                             char *error_buffer, int buffer_len) {\n  THArgCheck(n > 0, 2, \"n must be greater than 0\");\n  THArgCheck(sizes != NULL, 1, \"sizes must not be null\");\n  THArgCheck(dims != NULL, 1, \"dims must not be null\");\n\n  ptrdiff_t ndim = 0;\n  for (int j = 0; j < n; ++j) {\n    THArgCheck(sizes[ j ] != NULL, 1, \"size %d must not be null\", j);\n    THArgCheck(dims[ j ], 1, \"Can't expand empty tensor %d\", j);\n    ndim = dims[ j ] > ndim ? dims[ j ] : ndim;\n  }\n\n  long *expandedSizes = THAlloc(sizeof(long)*ndim);\n\n  for (long i = ndim - 1; i >= 0; --i) {\n    expandedSizes[ i ] = 1;\n    long offset = ndim - 1 - i;\n    for (int j  = 0; j < n; ++j) {\n      long dim = dims[ j ] - 1 - offset;\n      long size = (dim >= 0) ? sizes[ j ][ dim ] : 1;\n      if (size == expandedSizes[ i ] || size == 1 || expandedSizes[ i ] == 1) {\n        expandedSizes[ i ] =  THMax(expandedSizes[ i ], size);\n      } else {\n        THFree(expandedSizes);\n        snprintf(error_buffer, buffer_len, \"The size of tensor %i (%ld) must match the expanded size\"\n                 \"of tensor (%ld) at non-singleton dimension %ld.\", j, size, expandedSizes[ i ], i);\n        return -1;\n      }\n    }\n  }\n  THLongStorage_resize(output, ndim);\n  memcpy(THLongStorage_data(output), expandedSizes, sizeof(long)*ndim);\n  THFree(expandedSizes);\n  return 0;\n}\n\nint THLongStorage_inferExpandGeometry(long *tensorSizes, long *tensorStrides, long tensorDim,\n                                        THLongStorage *sizes, long **expandedSizes, long **expandedStrides,\n                                        char *error_buffer, int buffer_len) {\n  ptrdiff_t ndim = THLongStorage_size(sizes);\n\n  long *expandedSizesCalc = THAlloc(sizeof(long)*ndim);\n  long *expandedStridesCalc = THAlloc(sizeof(long)*ndim);\n\n  // create a new geometry for the tensors\n  for (long i = ndim - 1; i >= 0; --i) {\n    long offset = ndim - 1 - i;\n    long dim = tensorDim - 1 - offset;\n    long size = (dim >= 0) ? tensorSizes[dim] : 1;\n    long stride = (dim >= 0) ?\n        tensorStrides[dim] : expandedSizesCalc[i + 1] * expandedStridesCalc[i+1];\n    long targetSize = THLongStorage_data(sizes)[i];\n    if (targetSize == -1) {\n      if (dim < 0) {\n        THFree(expandedSizesCalc);\n        THFree(expandedStridesCalc);\n        snprintf(error_buffer, buffer_len, \"The expanded size of the tensor (%ld) isn't allowed in a leading, non-existing dimension %ld.\", targetSize, i);\n        return -1;\n      } else {\n        targetSize = size;\n      }\n    }\n    if (size != targetSize) {\n      if (size == 1) {\n        size = targetSize;\n        stride = 0;\n      } else {\n        THFree(expandedSizesCalc);\n        THFree(expandedStridesCalc);\n        snprintf(error_buffer, buffer_len, \"The expanded size of the tensor (%ld) must match the existing size (%ld) at \"\n                 \"non-singleton dimension %ld.\", targetSize, size, i);\n        return -1;\n      }\n    }\n    expandedSizesCalc[i] = size;\n    expandedStridesCalc[i] = stride;\n  }\n  *expandedSizes = expandedSizesCalc;\n  *expandedStrides = expandedStridesCalc;\n  return 0;\n}\n"
  },
  {
    "path": "lib/TH/THStorage.h",
    "content": "#ifndef TH_STORAGE_INC\n#define TH_STORAGE_INC\n\n#include \"THGeneral.h\"\n#include \"THAllocator.h\"\n\n#define THStorage        TH_CONCAT_3(TH,Real,Storage)\n#define THStorage_(NAME) TH_CONCAT_4(TH,Real,Storage_,NAME)\n\n/* fast access methods */\n#define TH_STORAGE_GET(storage, idx) ((storage)->data[(idx)])\n#define TH_STORAGE_SET(storage, idx, value) ((storage)->data[(idx)] = (value))\n\n#include \"generic/THStorage.h\"\n#include \"THGenerateAllTypes.h\"\n\n#include \"generic/THStorage.h\"\n#include \"THGenerateHalfType.h\"\n\n#include \"generic/THStorageCopy.h\"\n#include \"THGenerateAllTypes.h\"\n\n#include \"generic/THStorageCopy.h\"\n#include \"THGenerateHalfType.h\"\n\nTH_API THDescBuff THLongStorage_sizeDesc(const THLongStorage *size);\nTH_API THLongStorage *THLongStorage_newInferSize(THLongStorage *size, ptrdiff_t nElement);\n\n// Given the sizes of {2,N} tensors, write out the size when the tensors are expanded together.\nTH_API int THLongStorage_inferSize2(THLongStorage *output, long *sizesA, long dimsA,\n                                    long *sizesB, long dimsB, char *error_buffer, int buffer_len);\nTH_API int THLongStorage_inferSizeN(THLongStorage *output, int n, long **sizes, long *dims,\n                                    char *error_buffer, int buffer_len);\n\nTH_API int THLongStorage_inferExpandGeometry(long *tensorSizes, long *tensorStrides, long tensorDim,\n                                             THLongStorage *sizes, long **expandedSizes, long **expandedStrides,\n                                             char *error_buffer, int buffer_len);\n\n#endif\n"
  },
  {
    "path": "lib/TH/THTensor.c",
    "content": "#include \"THAtomic.h\"\n#include \"THTensor.h\"\n#include \"THVector.h\"\n#include \"generic/simd/simd.h\"\n\n#include \"THBlas.h\"\n#include \"THLapack.h\"\n#include \"THRandom.h\"\n#include \"THTensorDimApply.h\"\n#include \"THMath.h\"\n\n#include \"generic/THTensor.c\"\n#include \"THGenerateAllTypes.h\"\n\n#include \"generic/THTensor.c\"\n#include \"THGenerateHalfType.h\"\n\n#include \"generic/THTensorCopy.c\"\n#include \"THGenerateAllTypes.h\"\n\n#include \"generic/THTensorCopy.c\"\n#include \"THGenerateHalfType.h\"\n\n#include \"generic/THTensorRandom.c\"\n#include \"THGenerateAllTypes.h\"\n\n#include \"generic/THTensorMath.c\"\n#include \"THGenerateAllTypes.h\"\n\n#include \"generic/THTensorConv.c\"\n#include \"THGenerateAllTypes.h\"\n\n#include \"generic/THTensorLapack.c\"\n#include \"THGenerateFloatTypes.h\"\n"
  },
  {
    "path": "lib/TH/THTensor.h",
    "content": "#ifndef TH_TENSOR_INC\n#define TH_TENSOR_INC\n\n#include \"THStorage.h\"\n#include \"THTensorApply.h\"\n\n#define THTensor          TH_CONCAT_3(TH,Real,Tensor)\n#define THTensor_(NAME)   TH_CONCAT_4(TH,Real,Tensor_,NAME)\n\n/* basics */\n#include \"generic/THTensor.h\"\n#include \"THGenerateAllTypes.h\"\n\n#include \"generic/THTensor.h\"\n#include \"THGenerateHalfType.h\"\n\n#include \"generic/THTensorCopy.h\"\n#include \"THGenerateAllTypes.h\"\n\n#include \"generic/THTensorCopy.h\"\n#include \"THGenerateHalfType.h\"\n\n#include \"THTensorMacros.h\"\n\n/* random numbers */\n#include \"THRandom.h\"\n#include \"generic/THTensorRandom.h\"\n#include \"THGenerateAllTypes.h\"\n\n/* maths */\n#include \"generic/THTensorMath.h\"\n#include \"THGenerateAllTypes.h\"\n\n/* convolutions */\n#include \"generic/THTensorConv.h\"\n#include \"THGenerateAllTypes.h\"\n\n/* lapack support */\n#include \"generic/THTensorLapack.h\"\n#include \"THGenerateFloatTypes.h\"\n\n#endif\n"
  },
  {
    "path": "lib/TH/THTensorApply.h",
    "content": "#ifndef TH_TENSOR_APPLY_INC\n#define TH_TENSOR_APPLY_INC\n\n/*\n * The basic strategy for apply is as follows:\n *\n * 1. Starting with the outermost index, loop until we reach a dimension where the\n * data is no longer contiguous, i.e. the stride at that dimension is not equal to\n * the size of the tensor defined by the outer dimensions. Let's call this outer\n * (contiguous) tensor A. Note that if the Tensor is contiguous, then A is equal\n * to the entire Tensor. Let's call the inner tensor B.\n *\n * 2. We loop through the indices in B, starting at its outermost dimension. For\n * example, if B is a 2x2 matrix, then we do:\n *\n * B[0][0]\n * B[0][1]\n * B[1][0]\n * B[1][1]\n *\n * We set the offset into the underlying storage as (storageOffset + stride_B * index_B),\n * i.e. basically we compute the offset into the storage as we would normally for a\n * Tensor. But because we are guaranteed the subsequent data is contiguous in memory, we\n * can simply loop for sizeof(A) iterations and perform the operation, without having to\n * follow the order described by the strides of A.\n *\n * 3. As an optimization, we merge dimensions of A that are contiguous in memory. For\n * example, if A is a 3x3x3x3 tensor narrowed from a 3x3x4x3 tensor, then the first two\n * dimensions can be merged for the purposes of APPLY, reducing the number of nested\n * loops.\n */\n\n#define __TH_TENSOR_APPLYX_PREAMBLE(TYPE, TENSOR, DIM, ALLOW_CONTIGUOUS) \\\n  TYPE *TENSOR##_data = NULL; \\\n  long *TENSOR##_counter = NULL, *TENSOR##_sizes = NULL, *TENSOR##_strides = NULL, *TENSOR##_dimOffset = NULL; \\\n  long TENSOR##_stride = 0, TENSOR##_size = 0, TENSOR##_dim = 0, TENSOR##_i, TENSOR##_n; \\\n  int TENSOR##_contiguous = ALLOW_CONTIGUOUS && DIM < 0; \\\n  TENSOR##_n = (TENSOR->nDimension ? 1 : 0); \\\n  for(TENSOR##_i = 0; TENSOR##_i < TENSOR->nDimension; TENSOR##_i++) \\\n    TENSOR##_n *= TENSOR->size[TENSOR##_i]; \\\n\\\n  if(TENSOR->nDimension == 0) \\\n    TH_TENSOR_APPLY_hasFinished = 1; \\\n  else \\\n  { \\\n    TENSOR##_data = TENSOR->storage->data+TENSOR->storageOffset; \\\n    TENSOR##_size = 1; \\\n    TENSOR##_stride = 1; \\\n    for(TENSOR##_i = TENSOR->nDimension-1; TENSOR##_i >= 0; TENSOR##_i--) { \\\n      if(TENSOR->size[TENSOR##_i] != 1) { \\\n        if(TENSOR->stride[TENSOR##_i] == TENSOR##_size && TENSOR##_i != DIM) \\\n          TENSOR##_size *= TENSOR->size[TENSOR##_i]; \\\n        else{ \\\n          TENSOR##_contiguous = 0; \\\n          break; \\\n        } \\\n      } \\\n    } \\\n    if (!TENSOR##_contiguous) { \\\n      /* Find the dimension of contiguous sections */ \\\n      TENSOR##_dim = 1; \\\n      for(TENSOR##_i = TENSOR->nDimension-2; TENSOR##_i >= 0; TENSOR##_i--) \\\n      { \\\n        if(TENSOR->stride[TENSOR##_i] != TENSOR->stride[TENSOR##_i+1] * TENSOR->size[TENSOR##_i+1] || TENSOR##_i == DIM || TENSOR##_i+1 == DIM) \\\n          TENSOR##_dim++; \\\n      } \\\n      /* Allocate an array of 3*dim elements, where dim is the number of contiguous sections */ \\\n      TENSOR##_counter = (long*)THAlloc(sizeof(long)*(3*TENSOR##_dim)); \\\n      TENSOR##_sizes = TENSOR##_counter + TENSOR##_dim; \\\n      TENSOR##_strides = TENSOR##_counter + 2*TENSOR##_dim; \\\n      TH_TENSOR_dim_index = TENSOR##_dim-1; \\\n      TENSOR##_dimOffset = (DIM == TENSOR->nDimension-1) ? &TENSOR##_i : &TENSOR##_counter[DIM]; \\\n      TENSOR##_sizes[TH_TENSOR_dim_index] = TENSOR->size[TENSOR->nDimension-1]; \\\n      TENSOR##_strides[TH_TENSOR_dim_index] = TENSOR->stride[TENSOR->nDimension-1]; \\\n      /* TENSOR##_counter tracks where we are in the storage. The offset into the */ \\\n      /* storage is given by storage_offset + (i * j), where i is the stride */ \\\n      /* vector and j is tensor_counter vector. This sets the starting position for the loop. */ \\\n      for(TENSOR##_i = TENSOR##_dim-1; TENSOR##_i >= 0; --TENSOR##_i) { \\\n        TENSOR##_counter[TENSOR##_i] = 0; \\\n      } \\\n      for(TENSOR##_i = TENSOR->nDimension-2; TENSOR##_i >= 0; --TENSOR##_i) { \\\n        if (TENSOR->stride[TENSOR##_i] == TENSOR->stride[TENSOR##_i+1] * TENSOR->size[TENSOR##_i+1] && TENSOR##_i != DIM && TENSOR##_i+1 != DIM) { \\\n          TENSOR##_sizes[TH_TENSOR_dim_index] = TENSOR->size[TENSOR##_i] * TENSOR##_sizes[TH_TENSOR_dim_index]; \\\n          if (DIM != TENSOR->nDimension-1 && TENSOR##_i < DIM) \\\n            TENSOR##_dimOffset--; \\\n        } else { \\\n          --TH_TENSOR_dim_index; \\\n          TENSOR##_sizes[TH_TENSOR_dim_index] = TENSOR->size[TENSOR##_i]; \\\n          TENSOR##_strides[TH_TENSOR_dim_index] = TENSOR->stride[TENSOR##_i]; \\\n        } \\\n      } \\\n      /* Size of the inner most section */ \\\n      TENSOR##_size = TENSOR##_sizes[TENSOR##_dim-1]; \\\n      /* Stride of the inner most section */ \\\n      TENSOR##_stride = TENSOR##_strides[TENSOR##_dim-1]; \\\n    } \\\n  } \\\n  TENSOR##_i = 0;\n\n#define  __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR, ALWAYS_UPDATE) \\\n  if(TENSOR##_i == TENSOR##_size || ALWAYS_UPDATE) \\\n  { \\\n    if(TENSOR##_contiguous) \\\n      break; \\\n\\\n    if(TENSOR##_dim == 1) \\\n       break; \\\n\\\n    /* Reset pointer to beginning of loop */ \\\n    TENSOR##_data -= TENSOR##_size*TENSOR##_stride; \\\n    for(TENSOR##_i = TENSOR##_dim-2; TENSOR##_i >= 0; TENSOR##_i--) \\\n    { \\\n      TENSOR##_counter[TENSOR##_i]++; \\\n      /* Jump ahread by the stride of this dimension */ \\\n      TENSOR##_data += TENSOR##_strides[TENSOR##_i]; \\\n\\\n      if(TENSOR##_counter[TENSOR##_i]  == TENSOR##_sizes[TENSOR##_i]) \\\n      { \\\n        if(TENSOR##_i == 0) \\\n        { \\\n          TH_TENSOR_APPLY_hasFinished = 1; \\\n          break; \\\n        } \\\n          else \\\n        { \\\n          /* Reset the pointer to the beginning of the chunk defined by this dimension */ \\\n          TENSOR##_data -= TENSOR##_counter[TENSOR##_i]*TENSOR##_strides[TENSOR##_i]; \\\n          TENSOR##_counter[TENSOR##_i] = 0; \\\n        } \\\n      } \\\n      else \\\n        break; \\\n    } \\\n    TENSOR##_i = 0; \\\n  } \\\n\n#define TH_TENSOR_APPLY3_D(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, DIM, CODE) \\\n{ \\\n  int TH_TENSOR_APPLY_hasFinished = 0; \\\n  long TH_TENSOR_dim_index = 0; \\\n  __TH_TENSOR_APPLYX_PREAMBLE(TYPE1, TENSOR1, DIM, 1) \\\n  __TH_TENSOR_APPLYX_PREAMBLE(TYPE2, TENSOR2, DIM, 1) \\\n  __TH_TENSOR_APPLYX_PREAMBLE(TYPE3, TENSOR3, DIM, 1) \\\n                                                                        \\\n  int elements_equal = 1;                                               \\\n  if(TENSOR1##_n != TENSOR2##_n) {                                      \\\n    elements_equal = 0;                                                 \\\n  }                                                                     \\\n  else if(TENSOR1##_n != TENSOR3##_n) {                                 \\\n    elements_equal = 0;                                                 \\\n  }                                                                     \\\n  if (elements_equal == 0) {                                            \\\n    THDescBuff T1buff = _THSizeDesc(TENSOR1->size, TENSOR1->nDimension); \\\n    THDescBuff T2buff = _THSizeDesc(TENSOR2->size, TENSOR2->nDimension); \\\n    THDescBuff T3buff = _THSizeDesc(TENSOR3->size, TENSOR3->nDimension); \\\n    THError(\"inconsistent tensor size, expected %s %s, %s %s and %s %s to have the same \" \\\n            \"number of elements, but got %d, %d and %d elements respectively\", \\\n            #TENSOR1, T1buff.str, #TENSOR2, T2buff.str, #TENSOR3, T3buff.str, \\\n            TENSOR1##_n, TENSOR2##_n, TENSOR3##_n);                     \\\n  }                                                                     \\\n                                                                        \\\n  while(!TH_TENSOR_APPLY_hasFinished) \\\n  { \\\n    /* Loop through the inner most region of the Tensor */ \\\n    for(; TENSOR1##_i < TENSOR1##_size && TENSOR2##_i < TENSOR2##_size && TENSOR3##_i < TENSOR3##_size; TENSOR1##_i++, TENSOR2##_i++, TENSOR3##_i++, TENSOR1##_data += TENSOR1##_stride, TENSOR2##_data += TENSOR2##_stride, TENSOR3##_data += TENSOR3##_stride) /* 0 et pas TENSOR##_dim! */ \\\n    { \\\n      CODE \\\n    } \\\n    __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR1, 0) \\\n    __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR2, 0) \\\n    __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR3, 0) \\\n  } \\\n  if(TENSOR1##_counter != NULL) \\\n    THFree(TENSOR1##_counter); \\\n  if(TENSOR2##_counter != NULL) \\\n    THFree(TENSOR2##_counter); \\\n  if(TENSOR3##_counter != NULL) \\\n    THFree(TENSOR3##_counter); \\\n}\n\n#define TH_TENSOR_APPLY3(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, CODE) \\\n  TH_TENSOR_APPLY3_D(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, -1, CODE)\n\n#define TH_TENSOR_APPLY2_D(TYPE1, TENSOR1, TYPE2, TENSOR2, DIM, CODE) \\\n{ \\\n  int TH_TENSOR_APPLY_hasFinished = 0; \\\n  long TH_TENSOR_dim_index = 0; \\\n  __TH_TENSOR_APPLYX_PREAMBLE(TYPE1, TENSOR1, DIM, 1) \\\n  __TH_TENSOR_APPLYX_PREAMBLE(TYPE2, TENSOR2, DIM, 1) \\\n\\\n    if(TENSOR1##_n != TENSOR2##_n) {                                    \\\n      THDescBuff T1buff = _THSizeDesc(TENSOR1->size, TENSOR1->nDimension); \\\n      THDescBuff T2buff = _THSizeDesc(TENSOR2->size, TENSOR2->nDimension); \\\n      THError(\"inconsistent tensor size, expected %s %s and %s %s to have the same \" \\\n              \"number of elements, but got %d and %d elements respectively\", \\\n              #TENSOR1, T1buff.str, #TENSOR2, T2buff.str, TENSOR1##_n, TENSOR2##_n); \\\n    }                                                                   \\\n  while(!TH_TENSOR_APPLY_hasFinished) \\\n  { \\\n    /* Loop through the inner most region of the Tensor */ \\\n    for(; TENSOR1##_i < TENSOR1##_size && TENSOR2##_i < TENSOR2##_size; TENSOR1##_i++, TENSOR2##_i++, TENSOR1##_data += TENSOR1##_stride, TENSOR2##_data += TENSOR2##_stride) /* 0 et pas TENSOR##_dim! */ \\\n    { \\\n      CODE \\\n    } \\\n    __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR1, 0) \\\n    __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR2, 0) \\\n  } \\\n  if(TENSOR1##_counter != NULL) \\\n    THFree(TENSOR1##_counter); \\\n  if(TENSOR2##_counter != NULL) \\\n    THFree(TENSOR2##_counter); \\\n}\n\n#define TH_TENSOR_APPLY2(TYPE1, TENSOR1, TYPE2, TENSOR2, CODE) \\\n  TH_TENSOR_APPLY2_D(TYPE1, TENSOR1, TYPE2, TENSOR2, -1, CODE)\n\n#define TH_TENSOR_APPLY_D(TYPE, TENSOR, DIM, CODE) \\\n{ \\\n  int TH_TENSOR_APPLY_hasFinished = 0; \\\n  long TH_TENSOR_dim_index = 0; \\\n  __TH_TENSOR_APPLYX_PREAMBLE(TYPE, TENSOR, DIM, 0) \\\n\\\n  while(!TH_TENSOR_APPLY_hasFinished) \\\n  { \\\n    /* Loop through the inner most region of the Tensor */ \\\n    for(; TENSOR##_i < TENSOR##_size; TENSOR##_i++, TENSOR##_data += TENSOR##_stride) /* 0 et pas TENSOR##_dim! */ \\\n    { \\\n      CODE \\\n    } \\\n    __TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR, 1) \\\n  } \\\n  THFree(TENSOR##_counter); \\\n}\n\n#define TH_TENSOR_APPLY(TYPE, TENSOR, CODE) \\\n  TH_TENSOR_APPLY_D(TYPE, TENSOR, -1, CODE)\n\n#endif\n"
  },
  {
    "path": "lib/TH/THTensorDimApply.h",
    "content": "#ifndef TH_TENSOR_DIM_APPLY_INC\n#define TH_TENSOR_DIM_APPLY_INC\n\n#define TH_TENSOR_DIM_APPLY3(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, DIMENSION, CODE) \\\n{ \\\n  TYPE1 *TENSOR1##_data = NULL; \\\n  long TENSOR1##_stride = 0, TENSOR1##_size = 0; \\\n  TYPE2 *TENSOR2##_data = NULL; \\\n  long TENSOR2##_stride = 0, TENSOR2##_size = 0; \\\n  TYPE3 *TENSOR3##_data = NULL; \\\n  long TENSOR3##_stride = 0, TENSOR3##_size = 0; \\\n  long *TH_TENSOR_DIM_APPLY_counter = NULL; \\\n  int TH_TENSOR_DIM_APPLY_hasFinished = 0; \\\n  int TH_TENSOR_DIM_APPLY_i; \\\n\\\n  if( (DIMENSION < 0) || (DIMENSION >= TENSOR1->nDimension) ) \\\n    THError(\"invalid dimension %d (expected to be 0 <= dim < %d)\", DIMENSION, TENSOR1->nDimension); \\\n  int same_dims = 1;                                                    \\\n  if( TENSOR1->nDimension != TENSOR2->nDimension ) {                    \\\n    same_dims = 0;                                                      \\\n  } \\\n  if( TENSOR1->nDimension != TENSOR3->nDimension ) { \\\n    same_dims = 0;                                   \\\n  } \\\n  if (same_dims == 0) { \\\n    THDescBuff T1buff = _THSizeDesc(TENSOR1->size, TENSOR1->nDimension); \\\n    THDescBuff T2buff = _THSizeDesc(TENSOR2->size, TENSOR2->nDimension); \\\n    THDescBuff T3buff = _THSizeDesc(TENSOR3->size, TENSOR3->nDimension); \\\n    THError(\"inconsistent tensor size, expected %s %s, %s %s and %s %s to have the same \" \\\n            \"number of dimensions\", #TENSOR1, T1buff.str, #TENSOR2, T2buff.str, #TENSOR3, T3buff.str); \\\n  }                                                                     \\\n  int shape_check_flag = 0;                                             \\\n  for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR1->nDimension; TH_TENSOR_DIM_APPLY_i++) \\\n  { \\\n    if(TH_TENSOR_DIM_APPLY_i == DIMENSION) \\\n      continue; \\\n    if(TENSOR1->size[TH_TENSOR_DIM_APPLY_i] != TENSOR2->size[TH_TENSOR_DIM_APPLY_i]) \\\n      shape_check_flag = 1;                                             \\\n    if(TENSOR1->size[TH_TENSOR_DIM_APPLY_i] != TENSOR3->size[TH_TENSOR_DIM_APPLY_i]) \\\n      shape_check_flag = 1;                                             \\\n  } \\\n    \\\n  if (shape_check_flag == 1) { \\\n    THDescBuff T1buff = _THSizeDesc(TENSOR1->size, TENSOR1->nDimension); \\\n    THDescBuff T2buff = _THSizeDesc(TENSOR2->size, TENSOR2->nDimension); \\\n    THDescBuff T3buff = _THSizeDesc(TENSOR3->size, TENSOR3->nDimension); \\\n    THError(\"Expected %s %s, %s %s and %s %s to have the same size in dimension %d\", \\\n            #TENSOR1, T1buff.str, #TENSOR2, T2buff.str, #TENSOR3, T3buff.str, DIMENSION); \\\n  } \\\n\\\n  TH_TENSOR_DIM_APPLY_counter = (long*)THAlloc(sizeof(long)*(TENSOR1->nDimension)); \\\n  for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR1->nDimension; TH_TENSOR_DIM_APPLY_i++) \\\n    TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \\\n\\\n  TENSOR1##_data = (TENSOR1)->storage->data+(TENSOR1)->storageOffset; \\\n  TENSOR1##_stride = (TENSOR1)->stride[DIMENSION]; \\\n  TENSOR1##_size = TENSOR1->size[DIMENSION]; \\\n\\\n  TENSOR2##_data = (TENSOR2)->storage->data+(TENSOR2)->storageOffset; \\\n  TENSOR2##_stride = (TENSOR2)->stride[DIMENSION]; \\\n  TENSOR2##_size = TENSOR2->size[DIMENSION]; \\\n\\\n  TENSOR3##_data = (TENSOR3)->storage->data+(TENSOR3)->storageOffset; \\\n  TENSOR3##_stride = (TENSOR3)->stride[DIMENSION]; \\\n  TENSOR3##_size = TENSOR3->size[DIMENSION]; \\\n\\\n  while(!TH_TENSOR_DIM_APPLY_hasFinished) \\\n  { \\\n    CODE \\\n\\\n    if(TENSOR1->nDimension == 1) \\\n       break; \\\n \\\n    for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR1->nDimension; TH_TENSOR_DIM_APPLY_i++) \\\n    { \\\n      if(TH_TENSOR_DIM_APPLY_i == DIMENSION) \\\n      { \\\n        if(TH_TENSOR_DIM_APPLY_i == TENSOR1->nDimension-1) \\\n        { \\\n          TH_TENSOR_DIM_APPLY_hasFinished = 1; \\\n          break; \\\n        } \\\n        continue; \\\n      } \\\n\\\n      TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]++; \\\n      TENSOR1##_data += TENSOR1->stride[TH_TENSOR_DIM_APPLY_i]; \\\n      TENSOR2##_data += TENSOR2->stride[TH_TENSOR_DIM_APPLY_i]; \\\n      TENSOR3##_data += TENSOR3->stride[TH_TENSOR_DIM_APPLY_i]; \\\n\\\n      if(TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] == TENSOR1->size[TH_TENSOR_DIM_APPLY_i]) \\\n      { \\\n        if(TH_TENSOR_DIM_APPLY_i == TENSOR1->nDimension-1) \\\n        { \\\n          TH_TENSOR_DIM_APPLY_hasFinished = 1; \\\n          break; \\\n        } \\\n        else \\\n        { \\\n          TENSOR1##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR1->stride[TH_TENSOR_DIM_APPLY_i]; \\\n          TENSOR2##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR2->stride[TH_TENSOR_DIM_APPLY_i]; \\\n          TENSOR3##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR3->stride[TH_TENSOR_DIM_APPLY_i]; \\\n          TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \\\n        } \\\n      } \\\n      else \\\n        break; \\\n    } \\\n  } \\\n  THFree(TH_TENSOR_DIM_APPLY_counter); \\\n}\n\n/**\n * Similar to DIM_APPLY(...) but we maintain two sets of pointers: one for the first tensor\n * and one for the second. The two tensors must have the same shape, other than at the\n * specified DIMENSION. This function makes it easy to store the output from reducing the\n * TENSOR at index. For example, in the sum example described below, we could instead do:\n *\n * long i = 0;\n * TYPE1 sum;\n *\n * for (i = 0; i < TENSOR1##_size; ++i) {\n *   sum += TENSOR1##_data[i * TENSOR1##_stride]\n * }\n * *TENSOR2##_data = (TYPE2) sum;\n *\n * In particular, we guarantee that the offset into TENSOR2 will be what you would get if\n * you applied all of the index values used to generate the offset into TENSOR1.\n */\n#define TH_TENSOR_DIM_APPLY2(TYPE1, TENSOR1, TYPE2, TENSOR2, DIMENSION, CODE) \\\n{ \\\n  TYPE1 *TENSOR1##_data = NULL; \\\n  long TENSOR1##_stride = 0, TENSOR1##_size = 0; \\\n  TYPE2 *TENSOR2##_data = NULL; \\\n  long TENSOR2##_stride = 0, TENSOR2##_size = 0; \\\n  long *TH_TENSOR_DIM_APPLY_counter = NULL; \\\n  int TH_TENSOR_DIM_APPLY_hasFinished = 0; \\\n  int TH_TENSOR_DIM_APPLY_i; \\\n\\\n  if( (DIMENSION < 0) || (DIMENSION >= TENSOR1->nDimension) ) \\\n    THError(\"invalid dimension %d (expected to be 0 <= dim < %d)\", DIMENSION, TENSOR1->nDimension); \\\n  if( TENSOR1->nDimension != TENSOR2->nDimension ) {                    \\\n    THDescBuff T1buff = _THSizeDesc(TENSOR1->size, TENSOR1->nDimension); \\\n    THDescBuff T2buff = _THSizeDesc(TENSOR2->size, TENSOR2->nDimension); \\\n    THError(\"inconsistent tensor size, expected %s %s and %s %s to have the same \" \\\n            \"number of dimensions\", #TENSOR1, T1buff.str, #TENSOR2, T2buff.str);        \\\n  }                                                                     \\\n  int shape_check_flag = 0;                                             \\\n  for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR1->nDimension; TH_TENSOR_DIM_APPLY_i++) \\\n  { \\\n    if(TH_TENSOR_DIM_APPLY_i == DIMENSION) \\\n      continue; \\\n    if(TENSOR1->size[TH_TENSOR_DIM_APPLY_i] != TENSOR2->size[TH_TENSOR_DIM_APPLY_i]) { \\\n      THDescBuff T1buff = _THSizeDesc(TENSOR1->size, TENSOR1->nDimension); \\\n      THDescBuff T2buff = _THSizeDesc(TENSOR2->size, TENSOR2->nDimension); \\\n      THError(\"Expected %s %s and %s %s to have the same size in dimension %d\", \\\n              #TENSOR1, T1buff.str, #TENSOR2, T2buff.str, DIMENSION);   \\\n    }                                                                   \\\n  } \\\n\\\n  TH_TENSOR_DIM_APPLY_counter = (long*)THAlloc(sizeof(long)*(TENSOR1->nDimension)); \\\n  for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR1->nDimension; TH_TENSOR_DIM_APPLY_i++) \\\n    TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \\\n\\\n  TENSOR1##_data = (TENSOR1)->storage->data+(TENSOR1)->storageOffset; \\\n  TENSOR1##_stride = (TENSOR1)->stride[DIMENSION]; \\\n  TENSOR1##_size = TENSOR1->size[DIMENSION]; \\\n\\\n  TENSOR2##_data = (TENSOR2)->storage->data+(TENSOR2)->storageOffset; \\\n  TENSOR2##_stride = (TENSOR2)->stride[DIMENSION]; \\\n  TENSOR2##_size = TENSOR2->size[DIMENSION]; \\\n\\\n  while(!TH_TENSOR_DIM_APPLY_hasFinished) \\\n  { \\\n    CODE \\\n\\\n    if(TENSOR1->nDimension == 1) \\\n       break; \\\n \\\n    for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR1->nDimension; TH_TENSOR_DIM_APPLY_i++) \\\n    { \\\n      if(TH_TENSOR_DIM_APPLY_i == DIMENSION) \\\n      { \\\n        if(TH_TENSOR_DIM_APPLY_i == TENSOR1->nDimension-1) \\\n        { \\\n          TH_TENSOR_DIM_APPLY_hasFinished = 1; \\\n          break; \\\n        } \\\n        continue; \\\n      } \\\n\\\n      TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]++; \\\n      TENSOR1##_data += TENSOR1->stride[TH_TENSOR_DIM_APPLY_i]; \\\n      TENSOR2##_data += TENSOR2->stride[TH_TENSOR_DIM_APPLY_i]; \\\n\\\n      if(TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] == TENSOR1->size[TH_TENSOR_DIM_APPLY_i]) \\\n      { \\\n        if(TH_TENSOR_DIM_APPLY_i == TENSOR1->nDimension-1) \\\n        { \\\n          TH_TENSOR_DIM_APPLY_hasFinished = 1; \\\n          break; \\\n        } \\\n        else \\\n        { \\\n          TENSOR1##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR1->stride[TH_TENSOR_DIM_APPLY_i]; \\\n          TENSOR2##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR2->stride[TH_TENSOR_DIM_APPLY_i]; \\\n          TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \\\n        } \\\n      } \\\n      else \\\n        break; \\\n    } \\\n  } \\\n  THFree(TH_TENSOR_DIM_APPLY_counter); \\\n}\n\n/**\n * The basic idea for DIM_APPLY: Given a TENSOR and a DIMENSION, provide access to the data stored\n * at all sets of dimension values other than DIMENSION, such that we can get all the values at those\n * fixed indices for the various values at DIMENSION.\n *\n * Suppose we have a 2x3x4 Tensor A, and we have DIMENSION=2. Then we will hit CODE (2x3) times, and the\n * pointer into storage will be at:\n *\n * A[0][0]\n * A[0][1]\n * A[0][2]\n * A[1][0]\n * A[1][1]\n * A[1][2]\n *\n * And at each point, we can access the data for each of the four elements of the Tensor via\n * TENSOR##_stride. So for example, if we wanted to sum the elements there, we could do:\n *\n * long i = 0;\n * TYPE sum;\n * for (i = 0; i < TENSOR##_size; i++) {\n *  sum += TENSOR##_data[i * TENSOR##_stride]\n * }\n *\n * Note that we don't have to have DIMENSION be the last tensor. If we have DIMENSION=1, then we will hit the\n * code (2x4) times, with pointer into the storage at:\n *\n * offset +\n *   stride_0 * 0 + stride_2 * 0\n *   stride_0 * 1 + stride_2 * 0\n *   stride_0 * 0 + stride_2 * 1\n *   stride_0 * 1 + stride_2 * 1\n *   stride_0 * 0 + stride_2 * 2\n *   stride_0 * 1 + stride_2 * 2\n *   stride_0 * 0 + stride_2 * 3\n *   stride_0 * 1 + stride_2 * 3\n *\n * So we can again sum over the values at DIMENSION with the other indices fixed.\n */\n#define TH_TENSOR_DIM_APPLY(TYPE, TENSOR, DIMENSION, CODE) \\\n{ \\\n  TYPE *TENSOR##_data = NULL; \\\n  long TENSOR##_stride = 0, TENSOR##_size = 0; \\\n  long *TH_TENSOR_DIM_APPLY_counter = NULL; \\\n  int TH_TENSOR_DIM_APPLY_hasFinished = 0; \\\n  int TH_TENSOR_DIM_APPLY_i; \\\n\\\n  if( (DIMENSION < 0) || (DIMENSION >= TENSOR->nDimension) ) \\\n    THError(\"invalid dimension\"); \\\n\\\n  TENSOR##_data = (TENSOR)->storage->data+(TENSOR)->storageOffset; \\\n  TENSOR##_stride = (TENSOR)->stride[DIMENSION]; \\\n  TENSOR##_size = TENSOR->size[DIMENSION]; \\\n  /* Counter stores the indices into the Tensor at any time */ \\\n  TH_TENSOR_DIM_APPLY_counter = (long*)THAlloc(sizeof(long)*(TENSOR->nDimension)); \\\n  for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR->nDimension; TH_TENSOR_DIM_APPLY_i++) \\\n    TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \\\n\\\n  while(!TH_TENSOR_DIM_APPLY_hasFinished) \\\n  { \\\n    CODE \\\n\\\n    if(TENSOR->nDimension == 1) \\\n       break; \\\n \\\n    for(TH_TENSOR_DIM_APPLY_i = 0; TH_TENSOR_DIM_APPLY_i < TENSOR->nDimension; TH_TENSOR_DIM_APPLY_i++) \\\n    { \\\n       /* Check if the index is equal to DIMENSION. We don't need to update the */ \\\n       /* offset if this is the case, and can consider the next index. However, */ \\\n       /* in the case that the DIMENSION is the last index in the Tensor, then */ \\\n       /* we have parsed the entire tensor and can exit */ \\\n      if(TH_TENSOR_DIM_APPLY_i == DIMENSION) \\\n      { \\\n        if(TH_TENSOR_DIM_APPLY_i == TENSOR->nDimension-1) \\\n        { \\\n          TH_TENSOR_DIM_APPLY_hasFinished = 1; \\\n          break; \\\n        } \\\n        continue; \\\n      } \\\n\\\n      /* Bump the counter at this index, update the pointer */ \\\n      TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]++; \\\n      TENSOR##_data += TENSOR->stride[TH_TENSOR_DIM_APPLY_i]; \\\n\\\n      if(TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] == TENSOR->size[TH_TENSOR_DIM_APPLY_i]) \\\n      { \\\n        /* Handled TENSOR_size(dim) iterations for DIM_APPLY_i. If this is the last dimension, exit */ \\\n        if(TH_TENSOR_DIM_APPLY_i == TENSOR->nDimension-1) \\\n        { \\\n          TH_TENSOR_DIM_APPLY_hasFinished = 1; \\\n          break; \\\n        } \\\n        else \\\n        { \\\n          /* Reset the counter, and the pointer to the beginning of the storage for this combination of indices */ \\\n          TENSOR##_data -= TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i]*TENSOR->stride[TH_TENSOR_DIM_APPLY_i]; \\\n          TH_TENSOR_DIM_APPLY_counter[TH_TENSOR_DIM_APPLY_i] = 0; \\\n        } \\\n      } \\\n      else \\\n        break; \\\n    } \\\n  } \\\n  THFree(TH_TENSOR_DIM_APPLY_counter); \\\n}\n\n#endif\n"
  },
  {
    "path": "lib/TH/THTensorMacros.h",
    "content": "#ifndef TH_TENSOR_MACROS_INC\n#define TH_TENSOR_MACROS_INC\n\n/* fast method to access to tensor data */\n\n#define THTensor_fastGet1d(self, x0)                                    \\\n  (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]])\n\n#define THTensor_fastGet2d(self, x0, x1)                                \\\n  (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]+(x1)*(self)->stride[1]])\n\n#define THTensor_fastGet3d(self, x0, x1, x2)                            \\\n  (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]+(x1)*(self)->stride[1]+(x2)*(self)->stride[2]])\n\n#define THTensor_fastGet4d(self, x0, x1, x2, x3)                        \\\n  (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]+(x1)*(self)->stride[1]+(x2)*(self)->stride[2]+(x3)*(self)->stride[3]])\n\n#define THTensor_fastSet1d(self, x0, value)                             \\\n  (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]] = value)\n\n#define THTensor_fastSet2d(self, x0, x1, value)                         \\\n  (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]+(x1)*(self)->stride[1]] = value)\n\n#define THTensor_fastSet3d(self, x0, x1, x2, value)                     \\\n  (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]+(x1)*(self)->stride[1]+(x2)*(self)->stride[2]] = value)\n\n#define THTensor_fastSet4d(self, x0, x1, x2, x3, value)                 \\\n  (((self)->storage->data+(self)->storageOffset)[(x0)*(self)->stride[0]+(x1)*(self)->stride[1]+(x2)*(self)->stride[2]+(x3)*(self)->stride[3]] = value)\n\n#endif\n"
  },
  {
    "path": "lib/TH/THVector.c",
    "content": "#include \"THVector.h\"\n\n#include \"generic/simd/simd.h\"\n\n#ifdef __NEON__\n#include \"vector/NEON.c\"\n#endif\n\n#ifdef __PPC64__\n#include \"vector/VSX.c\"\n#endif\n\n#if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \\\n        || defined(USE_SSE4_1) || defined(USE_SSE4_2)\n#include \"vector/SSE.c\"\n#endif\n\n#if defined(USE_AVX)\n#include \"vector/AVX.h\"\n#endif\n\n#if defined(USE_AVX2)\n#include \"vector/AVX2.h\"\n#endif\n\n#include \"generic/THVectorDefault.c\"\n#include \"THGenerateAllTypes.h\"\n\n#include \"generic/THVectorDispatch.c\"\n#include \"THGenerateAllTypes.h\"\n"
  },
  {
    "path": "lib/TH/THVector.h",
    "content": "#ifndef TH_VECTOR_INC\n#define TH_VECTOR_INC\n\n#include \"THGeneral.h\"\n\n#define THVector_(NAME) TH_CONCAT_4(TH,Real,Vector_,NAME)\n\n/* We are going to use dynamic dispatch, and want only to generate declarations\n * of the vector functions */\n#include \"generic/THVector.h\"\n#include \"THGenerateAllTypes.h\"\n\n#endif // TH_VECTOR_INC\n"
  },
  {
    "path": "lib/TH/cmake/FindARM.cmake",
    "content": "# Check if the processor is an ARM and if Neon instruction are available on the machine where\n# the project is compiled.\n\nIF(CMAKE_SYSTEM_NAME MATCHES \"Linux\")\n   EXEC_PROGRAM(cat ARGS \"/proc/cpuinfo\" OUTPUT_VARIABLE CPUINFO)\n\n   #neon instruction can be found on the majority part of modern ARM processor\n   STRING(REGEX REPLACE \"^.*(neon).*$\" \"\\\\1\" NEON_THERE \"${CPUINFO}\")\n   STRING(COMPARE EQUAL \"neon\" \"${NEON_THERE}\" NEON_TRUE)\n   IF (NEON_TRUE)\n      set(NEON_FOUND true CACHE BOOL \"NEON available on host\")\n   ELSE (NEON_TRUE)\n      set(NEON_FOUND false CACHE BOOL \"NEON available on host\")\n   ENDIF (NEON_TRUE)\n\n   # on ARMv8, neon is inherit and instead listed as 'asimd' in /proc/cpuinfo\n   STRING(REGEX REPLACE \"^.*(asimd).*$\" \"\\\\1\" ASIMD_THERE \"${CPUINFO}\")\n   STRING(COMPARE EQUAL \"asimd\" \"${ASIMD_THERE}\" ASIMD_TRUE)\n   IF (ASIMD_TRUE)\n      set(ASIMD_FOUND true CACHE BOOL \"ASIMD/NEON available on host\")\n   ELSE (ASIMD_TRUE)\n      set(ASIMD_FOUND false CACHE BOOL \"ASIMD/NEON available on host\")\n   ENDIF (ASIMD_TRUE)\n\n   #Find the processor type (for now OMAP3 or OMAP4)\n   STRING(REGEX REPLACE \"^.*(OMAP3).*$\" \"\\\\1\" OMAP3_THERE \"${CPUINFO}\")\n   STRING(COMPARE EQUAL \"OMAP3\" \"${OMAP3_THERE}\" OMAP3_TRUE)\n   IF (OMAP3_TRUE)\n      set(CORTEXA8_FOUND true CACHE BOOL \"OMAP3 available on host\")\n   ELSE (OMAP3_TRUE)\n      set(CORTEXA8_FOUND false CACHE BOOL \"OMAP3 available on host\")\n   ENDIF (OMAP3_TRUE)\n\n   #Find the processor type (for now OMAP3 or OMAP4)\n   STRING(REGEX REPLACE \"^.*(OMAP4).*$\" \"\\\\1\" OMAP4_THERE \"${CPUINFO}\")\n   STRING(COMPARE EQUAL \"OMAP4\" \"${OMAP4_THERE}\" OMAP4_TRUE)\n   IF (OMAP4_TRUE)\n      set(CORTEXA9_FOUND true CACHE BOOL \"OMAP4 available on host\")\n   ELSE (OMAP4_TRUE)\n      set(CORTEXA9_FOUND false CACHE BOOL \"OMAP4 available on host\")\n   ENDIF (OMAP4_TRUE)\n\nELSEIF(CMAKE_SYSTEM_NAME MATCHES \"Darwin\")\n   EXEC_PROGRAM(\"/usr/sbin/sysctl -n machdep.cpu.features\" OUTPUT_VARIABLE\n      CPUINFO)\n\n   #neon instruction can be found on the majority part of modern ARM processor\n   STRING(REGEX REPLACE \"^.*(neon).*$\" \"\\\\1\" NEON_THERE \"${CPUINFO}\")\n   STRING(COMPARE EQUAL \"neon\" \"${NEON_THERE}\" NEON_TRUE)\n   IF (NEON_TRUE)\n      set(NEON_FOUND true CACHE BOOL \"NEON available on host\")\n   ELSE (NEON_TRUE)\n      set(NEON_FOUND false CACHE BOOL \"NEON available on host\")\n   ENDIF (NEON_TRUE)\n\nELSEIF(CMAKE_SYSTEM_NAME MATCHES \"Windows\")\n   # TODO\n   set(CORTEXA8_FOUND   false CACHE BOOL \"OMAP3 not available on host\")\n   set(CORTEXA9_FOUND   false CACHE BOOL \"OMAP4 not available on host\")\n   set(NEON_FOUND   false CACHE BOOL \"NEON not available on host\")\nELSE(CMAKE_SYSTEM_NAME MATCHES \"Linux\")\n   set(CORTEXA8_FOUND   false CACHE BOOL \"OMAP3 not available on host\")\n   set(CORTEXA9_FOUND   false CACHE BOOL \"OMAP4 not available on host\")\n   set(NEON_FOUND   false CACHE BOOL \"NEON not available on host\")\nENDIF(CMAKE_SYSTEM_NAME MATCHES \"Linux\")\n\nif(NOT NEON_FOUND)\n      MESSAGE(STATUS \"Could not find hardware support for NEON on this machine.\")\nendif(NOT NEON_FOUND)\nif(NOT CORTEXA8_FOUND)\n      MESSAGE(STATUS \"No OMAP3 processor on this machine.\")\nendif(NOT CORTEXA8_FOUND)\nif(NOT CORTEXA9_FOUND)\n      MESSAGE(STATUS \"No OMAP4 processor on this machine.\")\nendif(NOT CORTEXA9_FOUND)\nmark_as_advanced(NEON_FOUND)\n"
  },
  {
    "path": "lib/TH/cmake/FindBLAS.cmake",
    "content": "# - Find BLAS library\n# This module finds an installed fortran library that implements the BLAS \n# linear-algebra interface (see http://www.netlib.org/blas/).  \n# The list of libraries searched for is taken\n# from the autoconf macro file, acx_blas.m4 (distributed at\n# http://ac-archive.sourceforge.net/ac-archive/acx_blas.html).\n#\n# This module sets the following variables:\n#  BLAS_FOUND - set to true if a library implementing the BLAS interface is found.\n#  BLAS_INFO - name of the detected BLAS library.\n#  BLAS_F2C - set to true if following the f2c return convention\n#  BLAS_LIBRARIES - list of libraries to link against to use BLAS\n#  BLAS_INCLUDE_DIR - include directory\n\n# Do nothing is BLAS was found before\nIF(NOT BLAS_FOUND)\n\nSET(BLAS_LIBRARIES)\nSET(BLAS_INCLUDE_DIR)\nSET(BLAS_INFO)\nSET(BLAS_F2C)\n\nSET(WITH_BLAS \"\" CACHE STRING \"Blas type [mkl/open/goto/acml/atlas/accelerate/veclib/generic]\")\n\n# Old FindBlas\nINCLUDE(CheckCSourceRuns)\nINCLUDE(CheckFortranFunctionExists)\n\nMACRO(Check_Fortran_Libraries LIBRARIES _prefix _name _flags _list)\n  # This macro checks for the existence of the combination of fortran libraries\n  # given by _list.  If the combination is found, this macro checks (using the \n  # Check_Fortran_Function_Exists macro) whether can link against that library\n  # combination using the name of a routine given by _name using the linker\n  # flags given by _flags.  If the combination of libraries is found and passes\n  # the link test, LIBRARIES is set to the list of complete library paths that\n  # have been found.  Otherwise, LIBRARIES is set to NOTFOUND.\n  # N.B. _prefix is the prefix applied to the names of all cached variables that\n  # are generated internally and marked advanced by this macro.\n  \n  set(__list)\n  foreach(_elem ${_list})\n    if(__list)\n      set(__list \"${__list} - ${_elem}\")\n    else(__list)\n      set(__list \"${_elem}\")\n    endif(__list)\n  endforeach(_elem)\n  message(STATUS \"Checking for [${__list}]\")\n\n  set(_libraries_work TRUE)\n  set(${LIBRARIES})\n  set(_combined_name)\n  foreach(_library ${_list})\n    set(_combined_name ${_combined_name}_${_library})\n    if(_libraries_work)\n      if ( WIN32 )\n        find_library(${_prefix}_${_library}_LIBRARY\n          NAMES ${_library}\n          PATHS ENV LIB \n          PATHS ENV PATH )\n      endif ( WIN32 )\n      if ( APPLE ) \n        find_library(${_prefix}_${_library}_LIBRARY\n          NAMES ${_library}\n          PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 \n          ENV DYLD_LIBRARY_PATH )\n      else ( APPLE )\n        find_library(${_prefix}_${_library}_LIBRARY\n          NAMES ${_library}\n          PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64 \n          ENV LD_LIBRARY_PATH )\n      endif( APPLE )\n      mark_as_advanced(${_prefix}_${_library}_LIBRARY)\n      set(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY})\n      set(_libraries_work ${${_prefix}_${_library}_LIBRARY})\n      MESSAGE(STATUS \"  Library ${_library}: ${${_prefix}_${_library}_LIBRARY}\")\n    endif(_libraries_work)\n  endforeach(_library ${_list})\n  if(_libraries_work)\n    # Test this combination of libraries.\n    set(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}})\n    if (CMAKE_Fortran_COMPILER_WORKS)\n      check_fortran_function_exists(${_name} ${_prefix}${_combined_name}_WORKS)\n    else (CMAKE_Fortran_COMPILER_WORKS)\n      check_function_exists(\"${_name}_\" ${_prefix}${_combined_name}_WORKS)\n    endif (CMAKE_Fortran_COMPILER_WORKS)\n    set(CMAKE_REQUIRED_LIBRARIES)\n    mark_as_advanced(${_prefix}${_combined_name}_WORKS)\n    set(_libraries_work ${${_prefix}${_combined_name}_WORKS})\n  endif(_libraries_work)\n  if(NOT _libraries_work)\n    set(${LIBRARIES} NOTFOUND)\n  endif(NOT _libraries_work)\nendmacro(Check_Fortran_Libraries)\n\n# Intel MKL?\nif((NOT BLAS_LIBRARIES)\n    AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL \"mkl\")))\n  FIND_PACKAGE(MKL)\n  IF(MKL_FOUND)\n    SET(BLAS_INFO \"mkl\")\n    SET(BLAS_LIBRARIES ${MKL_LIBRARIES})\n    SET(BLAS_INCLUDE_DIR ${MKL_INCLUDE_DIR})\n    SET(BLAS_VERSION ${MKL_VERSION})\n  ENDIF(MKL_FOUND)\nendif()\n\nif((NOT BLAS_LIBRARIES)\n    AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL \"open\")))\n  check_fortran_libraries(\n  BLAS_LIBRARIES\n  BLAS\n  sgemm\n  \"\"\n  \"openblas\")\n  if(BLAS_LIBRARIES)\n    set(BLAS_INFO \"open\")\n  endif(BLAS_LIBRARIES)\nendif()\n\nif((NOT BLAS_LIBRARIES)\n    AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL \"open\")))\n  check_fortran_libraries(\n  BLAS_LIBRARIES\n  BLAS\n  sgemm\n  \"\"\n  \"openblas;pthread\")\n  if(BLAS_LIBRARIES)\n    set(BLAS_INFO \"open\")\n  endif(BLAS_LIBRARIES)\nendif()\n\nif((NOT BLAS_LIBRARIES) AND (WIN32)\n    AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL \"open\")))\n  check_fortran_libraries(\n  BLAS_LIBRARIES\n  BLAS\n  sgemm\n  \"\"\n  \"libopenblas\")\n  if(BLAS_LIBRARIES)\n    set(BLAS_INFO \"open\")\n  endif(BLAS_LIBRARIES)\nendif()\n\nif((NOT BLAS_LIBRARIES)\n    AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL \"goto\")))\n  check_fortran_libraries(\n  BLAS_LIBRARIES\n  BLAS\n  sgemm\n  \"\"\n  \"goto2;gfortran\")\n  if (BLAS_LIBRARIES)\n    set(BLAS_INFO \"goto\")\n  endif (BLAS_LIBRARIES)\nendif()\n\nif((NOT BLAS_LIBRARIES)\n    AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL \"goto\")))\n  check_fortran_libraries(\n  BLAS_LIBRARIES\n  BLAS\n  sgemm\n  \"\"\n  \"goto2;gfortran;pthread\")\n  if (BLAS_LIBRARIES)\n    set(BLAS_INFO \"goto\")\n  endif (BLAS_LIBRARIES)\nendif()\n\nif((NOT BLAS_LIBRARIES)\n    AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL \"acml\")))\n  check_fortran_libraries(\n  BLAS_LIBRARIES\n  BLAS\n  sgemm\n  \"\"\n  \"acml;gfortran\")\n  if (BLAS_LIBRARIES)\n    set(BLAS_INFO \"acml\")\n  endif (BLAS_LIBRARIES)\nendif()\n\n# Apple BLAS library?\nif((NOT BLAS_LIBRARIES)\n    AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL \"accelerate\")))\n  check_fortran_libraries(\n  BLAS_LIBRARIES\n  BLAS\n  sgemm\n  \"\"\n  \"Accelerate\")\n  if (BLAS_LIBRARIES)\n    set(BLAS_INFO \"accelerate\")\n    set(BLAS_IS_ACCELERATE 1)\n  endif (BLAS_LIBRARIES)\nendif()\n\nif((NOT BLAS_LIBRARIES)\n    AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL \"veclib\")))\n  check_fortran_libraries(\n    BLAS_LIBRARIES\n    BLAS\n    sgemm\n    \"\"\n    \"vecLib\")\n  if (BLAS_LIBRARIES)\n    set(BLAS_INFO \"veclib\")\n  endif (BLAS_LIBRARIES)\nendif()\n\n# BLAS in ATLAS library? (http://math-atlas.sourceforge.net/)\nif((NOT BLAS_LIBRARIES)\n    AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL \"atlas\")))\n  check_fortran_libraries(\n  BLAS_LIBRARIES\n  BLAS\n  sgemm\n  \"\"\n  \"ptf77blas;atlas;gfortran\")\n  if (BLAS_LIBRARIES)\n    set(BLAS_INFO \"atlas\")\n  endif (BLAS_LIBRARIES)\nendif()\n\n# Generic BLAS library?\nif((NOT BLAS_LIBRARIES)\n    AND ((NOT WITH_BLAS) OR (WITH_BLAS STREQUAL \"generic\")))\n  check_fortran_libraries(\n  BLAS_LIBRARIES\n  BLAS\n  sgemm\n  \"\"\n  \"blas\")\n  if (BLAS_LIBRARIES)\n    set(BLAS_INFO \"generic\")\n  endif (BLAS_LIBRARIES)\nendif()\n\n# Determine if blas was compiled with the f2c conventions\nIF (BLAS_LIBRARIES)\n  SET(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES})\n  CHECK_C_SOURCE_RUNS(\"\n#include <stdlib.h>\n#include <stdio.h>\nfloat x[4] = { 1, 2, 3, 4 };\nfloat y[4] = { .1, .01, .001, .0001 };\nint four = 4;\nint one = 1;\nextern double sdot_();\nint main() {\n  int i;\n  double r = sdot_(&four, x, &one, y, &one);\n  exit((float)r != (float).1234);\n}\" BLAS_F2C_DOUBLE_WORKS )\n  CHECK_C_SOURCE_RUNS(\"\n#include <stdlib.h>\n#include <stdio.h>\nfloat x[4] = { 1, 2, 3, 4 };\nfloat y[4] = { .1, .01, .001, .0001 };\nint four = 4;\nint one = 1;\nextern float sdot_();\nint main() {\n  int i;\n  double r = sdot_(&four, x, &one, y, &one);\n  exit((float)r != (float).1234);\n}\" BLAS_F2C_FLOAT_WORKS )\n  IF (BLAS_F2C_DOUBLE_WORKS AND NOT BLAS_F2C_FLOAT_WORKS)\n    MESSAGE(STATUS \"This BLAS uses the F2C return conventions\")\n    SET(BLAS_F2C TRUE)\n  ELSE (BLAS_F2C_DOUBLE_WORKS AND NOT BLAS_F2C_FLOAT_WORKS)\n    SET(BLAS_F2C FALSE)\n  ENDIF (BLAS_F2C_DOUBLE_WORKS AND NOT BLAS_F2C_FLOAT_WORKS)\n  CHECK_C_SOURCE_RUNS(\"\n#include <stdlib.h>\n#include <stdio.h>\nfloat x[4] = { 1, 2, 3, 4 };\nfloat y[4] = { .1, .01, .001, .0001 };\nextern float cblas_sdot();\nint main() {\n  int i;\n  double r = cblas_sdot(4, x, 1, y, 1);\n  exit((float)r != (float).1234);\n}\" BLAS_USE_CBLAS_DOT )\n  IF (BLAS_USE_CBLAS_DOT)\n    SET(BLAS_USE_CBLAS_DOT TRUE)\n  ELSE (BLAS_USE_CBLAS_DOT)\n    SET(BLAS_USE_CBLAS_DOT FALSE)\n  ENDIF (BLAS_USE_CBLAS_DOT)\nENDIF(BLAS_LIBRARIES)\n\n# epilogue\n\nif(BLAS_LIBRARIES)\n  set(BLAS_FOUND TRUE)\nelse(BLAS_LIBRARIES)\n  set(BLAS_FOUND FALSE)\nendif(BLAS_LIBRARIES)\n\nIF (NOT BLAS_FOUND AND BLAS_FIND_REQUIRED)\n  message(FATAL_ERROR \"Cannot find a library with BLAS API. Please specify library location.\")\nENDIF (NOT BLAS_FOUND AND BLAS_FIND_REQUIRED)\nIF(NOT BLAS_FIND_QUIETLY)\n  IF(BLAS_FOUND)\n    MESSAGE(STATUS \"Found a library with BLAS API (${BLAS_INFO}).\")\n  ELSE(BLAS_FOUND)\n    MESSAGE(STATUS \"Cannot find a library with BLAS API. Not using BLAS.\")\n  ENDIF(BLAS_FOUND)\nENDIF(NOT BLAS_FIND_QUIETLY)\n\n# Do nothing is BLAS was found before\nENDIF(NOT BLAS_FOUND)\n"
  },
  {
    "path": "lib/TH/cmake/FindLAPACK.cmake",
    "content": "# - Find LAPACK library\n# This module finds an installed fortran library that implements the LAPACK\n# linear-algebra interface (see http://www.netlib.org/lapack/).\n#\n# The approach follows that taken for the autoconf macro file, acx_lapack.m4\n# (distributed at http://ac-archive.sourceforge.net/ac-archive/acx_lapack.html).\n#\n# This module sets the following variables:\n#  LAPACK_FOUND - set to true if a library implementing the LAPACK interface is found\n#  LAPACK_LIBRARIES - list of libraries (using full path name) for LAPACK\n\n# Note: I do not think it is a good idea to mixup different BLAS/LAPACK versions\n# Hence, this script wants to find a Lapack library matching your Blas library\n\n# Do nothing if LAPACK was found before\nIF(NOT LAPACK_FOUND)\n\nSET(LAPACK_LIBRARIES)\nSET(LAPACK_INFO)\n\nIF(LAPACK_FIND_QUIETLY OR NOT LAPACK_FIND_REQUIRED)\n  FIND_PACKAGE(BLAS)\nELSE(LAPACK_FIND_QUIETLY OR NOT LAPACK_FIND_REQUIRED)\n  FIND_PACKAGE(BLAS REQUIRED)\nENDIF(LAPACK_FIND_QUIETLY OR NOT LAPACK_FIND_REQUIRED)\n\n# Old search lapack script\ninclude(CheckFortranFunctionExists)\n\nmacro(Check_Lapack_Libraries LIBRARIES _prefix _name _flags _list _blas)\n  # This macro checks for the existence of the combination of fortran libraries\n  # given by _list.  If the combination is found, this macro checks (using the\n  # Check_Fortran_Function_Exists macro) whether can link against that library\n  # combination using the name of a routine given by _name using the linker\n  # flags given by _flags.  If the combination of libraries is found and passes\n  # the link test, LIBRARIES is set to the list of complete library paths that\n  # have been found.  Otherwise, LIBRARIES is set to FALSE.\n  # N.B. _prefix is the prefix applied to the names of all cached variables that\n  # are generated internally and marked advanced by this macro.\n  set(_libraries_work TRUE)\n  set(${LIBRARIES})\n  set(_combined_name)\n  foreach(_library ${_list})\n    set(_combined_name ${_combined_name}_${_library})\n    if(_libraries_work)\n      if (WIN32)\n        find_library(${_prefix}_${_library}_LIBRARY\n          NAMES ${_library} PATHS ENV LIB PATHS ENV PATH)\n      else (WIN32)\n        if(APPLE)\n          find_library(${_prefix}_${_library}_LIBRARY\n            NAMES ${_library}\n            PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64\n            ENV DYLD_LIBRARY_PATH)\n        else(APPLE)\n          find_library(${_prefix}_${_library}_LIBRARY\n            NAMES ${_library}\n            PATHS /usr/local/lib /usr/lib /usr/local/lib64 /usr/lib64\n            ENV LD_LIBRARY_PATH)\n        endif(APPLE)\n      endif(WIN32)\n      mark_as_advanced(${_prefix}_${_library}_LIBRARY)\n      set(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY})\n      set(_libraries_work ${${_prefix}_${_library}_LIBRARY})\n    endif(_libraries_work)\n  endforeach(_library ${_list})\n  if(_libraries_work)\n    # Test this combination of libraries.\n    set(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}} ${_blas})\n    if (CMAKE_Fortran_COMPILER_WORKS)\n      check_fortran_function_exists(${_name} ${_prefix}${_combined_name}_WORKS)\n    else (CMAKE_Fortran_COMPILER_WORKS)\n      check_function_exists(\"${_name}_\" ${_prefix}${_combined_name}_WORKS)\n    endif (CMAKE_Fortran_COMPILER_WORKS)\n    set(CMAKE_REQUIRED_LIBRARIES)\n    mark_as_advanced(${_prefix}${_combined_name}_WORKS)\n    set(_libraries_work ${${_prefix}${_combined_name}_WORKS})\n  endif(_libraries_work)\n  if(NOT _libraries_work)\n    set(${LIBRARIES} FALSE)\n  endif(NOT _libraries_work)\nendmacro(Check_Lapack_Libraries)\n\n\nif(BLAS_FOUND)\n\n  # Intel MKL\n  IF((NOT LAPACK_INFO) AND (BLAS_INFO STREQUAL \"mkl\"))\n    IF(MKL_LAPACK_LIBRARIES)\n      SET(LAPACK_LIBRARIES ${MKL_LAPACK_LIBRARIES} ${MKL_LIBRARIES})\n    ELSE(MKL_LAPACK_LIBRARIES)\n      SET(LAPACK_LIBRARIES ${MKL_LIBRARIES})\n    ENDIF(MKL_LAPACK_LIBRARIES)\n    SET(LAPACK_INCLUDE_DIR ${MKL_INCLUDE_DIR})\n    SET(LAPACK_INFO \"mkl\")\n  ENDIF()\n\n  # OpenBlas\n  IF((NOT LAPACK_INFO) AND (BLAS_INFO STREQUAL \"open\"))\n    SET(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES})\n    check_function_exists(\"cheev_\" OPEN_LAPACK_WORKS)\n    if(OPEN_LAPACK_WORKS)\n      SET(LAPACK_INFO \"open\")\n    else()\n      message(STATUS \"It seems OpenBlas has not been compiled with Lapack support\")\n    endif()\n  endif()\n\n  # GotoBlas\n  IF((NOT LAPACK_INFO) AND (BLAS_INFO STREQUAL \"goto\"))\n    SET(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES})\n    check_function_exists(\"cheev_\" GOTO_LAPACK_WORKS)\n    if(GOTO_LAPACK_WORKS)\n      SET(LAPACK_INFO \"goto\")\n    else()\n      message(STATUS \"It seems GotoBlas has not been compiled with Lapack support\")\n    endif()\n  endif()\n\n  # ACML\n  IF((NOT LAPACK_INFO) AND (BLAS_INFO STREQUAL \"acml\"))\n    SET(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES})\n    check_function_exists(\"cheev_\" ACML_LAPACK_WORKS)\n    if(ACML_LAPACK_WORKS)\n      SET(LAPACK_INFO \"acml\")\n    else()\n      message(STATUS \"Strangely, this ACML library does not support Lapack?!\")\n    endif()\n  endif()\n\n  # Accelerate\n  IF((NOT LAPACK_INFO) AND (BLAS_INFO STREQUAL \"accelerate\"))\n    SET(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES})\n    check_function_exists(\"cheev_\" ACCELERATE_LAPACK_WORKS)\n    if(ACCELERATE_LAPACK_WORKS)\n      SET(LAPACK_INFO \"accelerate\")\n    else()\n      message(STATUS \"Strangely, this Accelerate library does not support Lapack?!\")\n    endif()\n  endif()\n\n  # vecLib\n  IF((NOT LAPACK_INFO) AND (BLAS_INFO STREQUAL \"veclib\"))\n    SET(CMAKE_REQUIRED_LIBRARIES ${BLAS_LIBRARIES})\n    check_function_exists(\"cheev_\" VECLIB_LAPACK_WORKS)\n    if(VECLIB_LAPACK_WORKS)\n      SET(LAPACK_INFO \"veclib\")\n    else()\n      message(STATUS \"Strangely, this vecLib library does not support Lapack?!\")\n    endif()\n  endif()\n\n  # Generic LAPACK library?\n  IF((NOT LAPACK_INFO) AND ((BLAS_INFO STREQUAL \"generic\") OR (BLAS_INFO STREQUAL \"open\")))\n    check_lapack_libraries(\n      LAPACK_LIBRARIES\n      LAPACK\n      cheev\n      \"\"\n      \"lapack\"\n      \"${BLAS_LIBRARIES}\"\n      )\n    if(LAPACK_LIBRARIES)\n      SET(LAPACK_INFO \"generic\")\n    endif(LAPACK_LIBRARIES)\n  endif()\n\nelse(BLAS_FOUND)\n  message(STATUS \"LAPACK requires BLAS\")\nendif(BLAS_FOUND)\n\nif(LAPACK_INFO)\n  set(LAPACK_FOUND TRUE)\nelse(LAPACK_INFO)\n  set(LAPACK_FOUND FALSE)\nendif(LAPACK_INFO)\n\nIF (NOT LAPACK_FOUND AND LAPACK_FIND_REQUIRED)\n  message(FATAL_ERROR \"Cannot find a library with LAPACK API. Please specify library location.\")\nENDIF (NOT LAPACK_FOUND AND LAPACK_FIND_REQUIRED)\nIF(NOT LAPACK_FIND_QUIETLY)\n  IF(LAPACK_FOUND)\n    MESSAGE(STATUS \"Found a library with LAPACK API. (${LAPACK_INFO})\")\n  ELSE(LAPACK_FOUND)\n    MESSAGE(STATUS \"Cannot find a library with LAPACK API. Not using LAPACK.\")\n  ENDIF(LAPACK_FOUND)\nENDIF(NOT LAPACK_FIND_QUIETLY)\n\n# Do nothing if LAPACK was found before\nENDIF(NOT LAPACK_FOUND)\n"
  },
  {
    "path": "lib/TH/cmake/FindMKL.cmake",
    "content": "# - Find INTEL MKL library\n#\n# This module finds the Intel Mkl libraries.\n#\n# This module sets the following variables:\n#  MKL_FOUND - set to true if a library implementing the CBLAS interface is found\n#  MKL_VERSION - best guess\n#  MKL_INCLUDE_DIR - path to include dir.\n#  MKL_LIBRARIES - list of libraries for base mkl\n#  MKL_LAPACK_LIBRARIES - list of libraries to add for lapack\n#  MKL_SCALAPACK_LIBRARIES - list of libraries to add for scalapack\n#  MKL_SOLVER_LIBRARIES - list of libraries to add for the solvers\n#  MKL_CDFT_LIBRARIES - list of libraries to add for the solvers\n\n\n# Do nothing if MKL_FOUND was set before!\nIF (NOT MKL_FOUND)\n\nSET(MKL_VERSION)\nSET(MKL_INCLUDE_DIR)\nSET(MKL_LIBRARIES)\nSET(MKL_LAPACK_LIBRARIES)\nSET(MKL_SCALAPACK_LIBRARIES)\nSET(MKL_SOLVER_LIBRARIES)\nSET(MKL_CDFT_LIBRARIES)\n\n# Includes\nINCLUDE(CheckTypeSize)\nINCLUDE(CheckFunctionExists)\n\n# Intel Compiler Suite\nSET(INTEL_COMPILER_DIR CACHE STRING\n  \"Root directory of the Intel Compiler Suite (contains ipp, mkl, etc.)\")\nSET(INTEL_MKL_DIR CACHE STRING\n  \"Root directory of the Intel MKL (standalone)\")\nSET(INTEL_MKL_SEQUENTIAL OFF CACHE BOOL\n  \"Force using the sequential (non threaded) libraries\")\n\n# Checks\nCHECK_TYPE_SIZE(\"void*\" SIZE_OF_VOIDP)\nIF (\"${SIZE_OF_VOIDP}\" EQUAL 8)\n  SET(mklvers \"em64t\")\n  SET(iccvers \"intel64\")\n  SET(mkl64s \"_lp64\")\nELSE (\"${SIZE_OF_VOIDP}\" EQUAL 8)\n  SET(mklvers \"32\")\n  SET(iccvers \"ia32\")\n  SET(mkl64s)\nENDIF (\"${SIZE_OF_VOIDP}\" EQUAL 8)\nIF(CMAKE_COMPILER_IS_GNUCC)\n  SET(mklthreads \"mkl_gnu_thread\" \"mkl_intel_thread\")\n  SET(mklifaces  \"gf\" \"intel\")\n  SET(mklrtls \"gomp\" \"iomp5\")\nELSE(CMAKE_COMPILER_IS_GNUCC)\n  SET(mklthreads \"mkl_intel_thread\")\n  SET(mklifaces  \"intel\")\n  SET(mklrtls \"iomp5\" \"guide\")\n  IF (MSVC)\n    SET(mklrtls \"libiomp5md\")\n  ENDIF (MSVC)\nENDIF (CMAKE_COMPILER_IS_GNUCC)\n\n# Kernel libraries dynamically loaded\nSET(mklkerlibs \"mc\" \"mc3\" \"nc\" \"p4n\" \"p4m\" \"p4m3\" \"p4p\" \"def\")\nSET(mklseq)\n\n\n\n# Paths\nSET(saved_CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH})\nSET(saved_CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH})\nIF (INTEL_COMPILER_DIR)\n  # TODO: diagnostic if dir does not exist\n  SET(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH}\n    \"${INTEL_COMPILER_DIR}/lib/${iccvers}\")\n  IF (NOT INTEL_MKL_DIR)\n    SET(INTEL_MKL_DIR \"${INTEL_COMPILER_DIR}/mkl\")\n  ENDIF (NOT INTEL_MKL_DIR)\nENDIF (INTEL_COMPILER_DIR)\nIF (INTEL_MKL_DIR)\n  # TODO: diagnostic if dir does not exist\n  SET(CMAKE_INCLUDE_PATH ${CMAKE_INCLUDE_PATH}\n    \"${INTEL_MKL_DIR}/include\")\n  SET(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH}\n    \"${INTEL_MKL_DIR}/lib/${mklvers}\")\n  IF (MSVC)\n    SET(CMAKE_LIBRARY_PATH ${CMAKE_LIBRARY_PATH}\n      \"${INTEL_MKL_DIR}/lib/${iccvers}\")\n  ENDIF (MSVC)\nENDIF (INTEL_MKL_DIR)\n\n# Try linking multiple libs\nMACRO(CHECK_ALL_LIBRARIES LIBRARIES _name _list _flags)\n  # This macro checks for the existence of the combination of libraries given by _list.\n  # If the combination is found, this macro checks whether we can link against that library\n  # combination using the name of a routine given by _name using the linker\n  # flags given by _flags.  If the combination of libraries is found and passes\n  # the link test, LIBRARIES is set to the list of complete library paths that\n  # have been found.  Otherwise, LIBRARIES is set to FALSE.\n  # N.B. _prefix is the prefix applied to the names of all cached variables that\n  # are generated internally and marked advanced by this macro.\n  SET(_prefix \"${LIBRARIES}\")\n  # start checking\n  SET(_libraries_work TRUE)\n  SET(${LIBRARIES})\n  SET(_combined_name)\n  SET(_paths)\n  set(__list)\n  foreach(_elem ${_list})\n    if(__list)\n      set(__list \"${__list} - ${_elem}\")\n    else(__list)\n      set(__list \"${_elem}\")\n    endif(__list)\n  endforeach(_elem)\n  message(STATUS \"Checking for [${__list}]\")\n  FOREACH(_library ${_list})\n    SET(_combined_name ${_combined_name}_${_library})\n    IF(_libraries_work)\n      IF(${_library} STREQUAL \"gomp\")\n          FIND_PACKAGE(OpenMP)\n          IF(OPENMP_FOUND)\n\t      SET(${_prefix}_${_library}_LIBRARY ${OpenMP_C_FLAGS})\n          ENDIF(OPENMP_FOUND)\n      ELSE(${_library} STREQUAL \"gomp\")\n          FIND_LIBRARY(${_prefix}_${_library}_LIBRARY NAMES ${_library})\n      ENDIF(${_library} STREQUAL \"gomp\")\n      MARK_AS_ADVANCED(${_prefix}_${_library}_LIBRARY)\n      SET(${LIBRARIES} ${${LIBRARIES}} ${${_prefix}_${_library}_LIBRARY})\n      SET(_libraries_work ${${_prefix}_${_library}_LIBRARY})\n      IF(${_prefix}_${_library}_LIBRARY)\n        MESSAGE(STATUS \"  Library ${_library}: ${${_prefix}_${_library}_LIBRARY}\")\n      ELSE(${_prefix}_${_library}_LIBRARY)\n        MESSAGE(STATUS \"  Library ${_library}: not found\")\n      ENDIF(${_prefix}_${_library}_LIBRARY)\n    ENDIF(_libraries_work)\n  ENDFOREACH(_library ${_list})\n  # Test this combination of libraries.\n  IF(_libraries_work)\n    SET(CMAKE_REQUIRED_LIBRARIES ${_flags} ${${LIBRARIES}})\n    SET(CMAKE_REQUIRED_LIBRARIES \"${CMAKE_REQUIRED_LIBRARIES};${CMAKE_REQUIRED_LIBRARIES}\")\n    CHECK_FUNCTION_EXISTS(${_name} ${_prefix}${_combined_name}_WORKS)\n    SET(CMAKE_REQUIRED_LIBRARIES)\n    MARK_AS_ADVANCED(${_prefix}${_combined_name}_WORKS)\n    SET(_libraries_work ${${_prefix}${_combined_name}_WORKS})\n  ENDIF(_libraries_work)\n  # Fin\n  IF(_libraries_work)\n  ELSE (_libraries_work)\n    SET(${LIBRARIES})\n    MARK_AS_ADVANCED(${LIBRARIES})\n  ENDIF(_libraries_work)\nENDMACRO(CHECK_ALL_LIBRARIES)\n\nif(WIN32)\n  set(mkl_m \"\")\nelse(WIN32)\n  set(mkl_m \"m\")\nendif(WIN32)\n\nif(UNIX AND NOT APPLE)\n  set(mkl_dl \"${CMAKE_DL_LIBS}\")\nelse(UNIX AND NOT APPLE)\n  set(mkl_dl \"\")\nendif(UNIX AND NOT APPLE)\n\n# Check for version 10/11\nIF (NOT MKL_LIBRARIES)\n  SET(MKL_VERSION 1011)\nENDIF (NOT MKL_LIBRARIES)\nFOREACH(mklrtl ${mklrtls} \"\")\n  FOREACH(mkliface ${mklifaces})\n    FOREACH(mkl64 ${mkl64s} \"\")\n      FOREACH(mklthread ${mklthreads})\n        IF (NOT MKL_LIBRARIES AND NOT INTEL_MKL_SEQUENTIAL)\n          CHECK_ALL_LIBRARIES(MKL_LIBRARIES cblas_sgemm\n            \"mkl_${mkliface}${mkl64};${mklthread};mkl_core;${mklrtl};pthread;${mkl_m};${mkl_dl}\" \"\")\n        ENDIF (NOT MKL_LIBRARIES AND NOT INTEL_MKL_SEQUENTIAL)          \n      ENDFOREACH(mklthread)\n    ENDFOREACH(mkl64)\n  ENDFOREACH(mkliface)\nENDFOREACH(mklrtl)\nFOREACH(mklrtl ${mklrtls} \"\")\n  FOREACH(mkliface ${mklifaces})\n    FOREACH(mkl64 ${mkl64s} \"\")\n      IF (NOT MKL_LIBRARIES)\n        CHECK_ALL_LIBRARIES(MKL_LIBRARIES cblas_sgemm\n          \"mkl_${mkliface}${mkl64};mkl_sequential;mkl_core;${mkl_m};${mkl_dl}\" \"\")\n        IF (MKL_LIBRARIES)\n          SET(mklseq \"_sequential\")\n        ENDIF (MKL_LIBRARIES)\n      ENDIF (NOT MKL_LIBRARIES)\n    ENDFOREACH(mkl64)\n  ENDFOREACH(mkliface)\nENDFOREACH(mklrtl)\nFOREACH(mklrtl ${mklrtls} \"\")\n  FOREACH(mkliface ${mklifaces})\n    FOREACH(mkl64 ${mkl64s} \"\")\n      FOREACH(mklthread ${mklthreads})\n        IF (NOT MKL_LIBRARIES)\n          CHECK_ALL_LIBRARIES(MKL_LIBRARIES cblas_sgemm\n            \"mkl_${mkliface}${mkl64};${mklthread};mkl_core;${mklrtl};pthread;${mkl_m};${mkl_dl}\" \"\")\n        ENDIF (NOT MKL_LIBRARIES)          \n      ENDFOREACH(mklthread)\n    ENDFOREACH(mkl64)\n  ENDFOREACH(mkliface)\nENDFOREACH(mklrtl)\n\n# Check for older versions\nIF (NOT MKL_LIBRARIES)\n  SET(MKL_VERSION 900)\n  CHECK_ALL_LIBRARIES(MKL_LIBRARIES cblas_sgemm\n    \"mkl;guide;pthread;m\" \"\")\nENDIF (NOT MKL_LIBRARIES)          \n\n# Include files\nIF (MKL_LIBRARIES)\n  FIND_PATH(MKL_INCLUDE_DIR \"mkl_cblas.h\")\n  MARK_AS_ADVANCED(MKL_INCLUDE_DIR)\nENDIF (MKL_LIBRARIES)\n\n# Other libraries\nIF (MKL_LIBRARIES)\n  FOREACH(mkl64 ${mkl64s} \"_core\" \"\")\n    FOREACH(mkls ${mklseq} \"\")\n      IF (NOT MKL_LAPACK_LIBRARIES)\n        FIND_LIBRARY(MKL_LAPACK_LIBRARIES NAMES \"mkl_lapack${mkl64}${mkls}\")\n        MARK_AS_ADVANCED(MKL_LAPACK_LIBRARIES)\n      ENDIF (NOT MKL_LAPACK_LIBRARIES)\n      IF (NOT MKL_SCALAPACK_LIBRARIES)\n        FIND_LIBRARY(MKL_SCALAPACK_LIBRARIES NAMES \"mkl_scalapack${mkl64}${mkls}\") \n        MARK_AS_ADVANCED(MKL_SCALAPACK_LIBRARIES)\n      ENDIF (NOT MKL_SCALAPACK_LIBRARIES)\n      IF (NOT MKL_SOLVER_LIBRARIES)\n        FIND_LIBRARY(MKL_SOLVER_LIBRARIES NAMES \"mkl_solver${mkl64}${mkls}\")\n        MARK_AS_ADVANCED(MKL_SOLVER_LIBRARIES)\n      ENDIF (NOT MKL_SOLVER_LIBRARIES)\n      IF (NOT MKL_CDFT_LIBRARIES)\n        FIND_LIBRARY(MKL_CDFT_LIBRARIES NAMES \"mkl_cdft${mkl64}${mkls}\")\n        MARK_AS_ADVANCED(MKL_CDFT_LIBRARIES)\n      ENDIF (NOT MKL_CDFT_LIBRARIES)\n    ENDFOREACH(mkls)\n  ENDFOREACH(mkl64)\nENDIF (MKL_LIBRARIES)\n\n# LibIRC: intel compiler always links this; \n# gcc does not; but mkl kernels sometimes need it.\nIF (MKL_LIBRARIES)\n  IF (CMAKE_COMPILER_IS_GNUCC)\n    FIND_LIBRARY(MKL_KERNEL_libirc \"irc\")\n  ELSEIF (CMAKE_C_COMPILER_ID AND NOT CMAKE_C_COMPILER_ID STREQUAL \"Intel\")\n    FIND_LIBRARY(MKL_KERNEL_libirc \"irc\")\n  ENDIF (CMAKE_COMPILER_IS_GNUCC)\n  MARK_AS_ADVANCED(MKL_KERNEL_libirc)\n  IF (MKL_KERNEL_libirc)\n    SET(MKL_LIBRARIES ${MKL_LIBRARIES} ${MKL_KERNEL_libirc})\n  ENDIF (MKL_KERNEL_libirc)\nENDIF (MKL_LIBRARIES)\n\n# Final\nSET(CMAKE_LIBRARY_PATH ${saved_CMAKE_LIBRARY_PATH})\nSET(CMAKE_INCLUDE_PATH ${saved_CMAKE_INCLUDE_PATH})\nIF (MKL_LIBRARIES)\n  SET(MKL_FOUND TRUE)\nELSE (MKL_LIBRARIES)\n  SET(MKL_FOUND FALSE)\n  SET(MKL_VERSION)\nENDIF (MKL_LIBRARIES)\n\n# Standard termination\nIF(NOT MKL_FOUND AND MKL_FIND_REQUIRED)\n  MESSAGE(FATAL_ERROR \"MKL library not found. Please specify library  location\")\nENDIF(NOT MKL_FOUND AND MKL_FIND_REQUIRED)\nIF(NOT MKL_FIND_QUIETLY)\n  IF(MKL_FOUND)\n    MESSAGE(STATUS \"MKL library found\")\n  ELSE(MKL_FOUND)\n    MESSAGE(STATUS \"MKL library not found\")\n  ENDIF(MKL_FOUND)\nENDIF(NOT MKL_FIND_QUIETLY)\n\n# Do nothing if MKL_FOUND was set before!\nENDIF (NOT MKL_FOUND)\n\n\n"
  },
  {
    "path": "lib/TH/cmake/FindSSE.cmake",
    "content": "INCLUDE(CheckCSourceRuns)\nINCLUDE(CheckCXXSourceRuns)\n\nSET(SSE1_CODE \"\n  #include <xmmintrin.h>\n\n  int main()\n  {\n    __m128 a;\n    float vals[4] = {0,0,0,0};\n    a = _mm_loadu_ps(vals);\n    return 0;\n  }\")\n\nSET(SSE2_CODE \"\n  #include <emmintrin.h>\n\n  int main()\n  {\n    __m128d a;\n    double vals[2] = {0,0};\n    a = _mm_loadu_pd(vals);\n    return 0;\n  }\")\n\nSET(SSE3_CODE \"\n  #include <pmmintrin.h>\n\n  int main( )\n  {\n    const int vals[4] = {0,0,0,0};\n    __m128i a;\n    a = _mm_lddqu_si128( (const __m128i*)vals );\n    return 0;\n  }\")\n\nSET(SSE4_1_CODE \"\n  #include <smmintrin.h>\n\n  int main ()\n  {\n    __m128i a = {0,0,0,0}, b = {0,0,0,0};\n    __m128i res = _mm_max_epi8(a, b);\n\n    return 0;\n  }\n\")\n\nSET(SSE4_2_CODE \"\n  #include <nmmintrin.h>\n\n  int main()\n  {\n    __m128i a = {0,0,0,0}, b = {0,0,0,0}, c = {0,0,0,0};\n    c = _mm_cmpgt_epi64(a, b);\n    return 0;\n  }\n\")\n\nSET(AVX_CODE \"\n  #include <immintrin.h>\n\n  int main()\n  {\n    __m256 a;\n    a = _mm256_set1_ps(0);\n    return 0;\n  }\n\")\n\nSET(AVX2_CODE \"\n  #include <immintrin.h>\n\n  int main()\n  {\n    __m256i a = {0};\n    a = _mm256_abs_epi16(a);\n    return 0;\n  }\n\")\n\nMACRO(CHECK_SSE lang type flags)\n  SET(__FLAG_I 1)\n  SET(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})\n  FOREACH(__FLAG ${flags})\n    IF(NOT ${lang}_${type}_FOUND)\n      SET(CMAKE_REQUIRED_FLAGS ${__FLAG})\n      IF(lang STREQUAL \"CXX\")\n        CHECK_CXX_SOURCE_RUNS(\"${${type}_CODE}\" ${lang}_HAS_${type}_${__FLAG_I})\n      ELSE()\n        CHECK_C_SOURCE_RUNS(\"${${type}_CODE}\" ${lang}_HAS_${type}_${__FLAG_I})\n      ENDIF()\n      IF(${lang}_HAS_${type}_${__FLAG_I})\n        SET(${lang}_${type}_FOUND TRUE CACHE BOOL \"${lang} ${type} support\")\n        SET(${lang}_${type}_FLAGS \"${__FLAG}\" CACHE STRING \"${lang} ${type} flags\")\n      ENDIF()\n      MATH(EXPR __FLAG_I \"${__FLAG_I}+1\")\n    ENDIF()\n  ENDFOREACH()\n  SET(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})\n\n  IF(NOT ${lang}_${type}_FOUND)\n    SET(${lang}_${type}_FOUND FALSE CACHE BOOL \"${lang} ${type} support\")\n    SET(${lang}_${type}_FLAGS \"\" CACHE STRING \"${lang} ${type} flags\")\n  ENDIF()\n\n  MARK_AS_ADVANCED(${lang}_${type}_FOUND ${lang}_${type}_FLAGS)\n\nENDMACRO()\n\nCHECK_SSE(C \"SSE1\" \" ;-msse;/arch:SSE\")\nCHECK_SSE(C \"SSE2\" \" ;-msse2;/arch:SSE2\")\nCHECK_SSE(C \"SSE3\" \" ;-msse3;/arch:SSE3\")\nCHECK_SSE(C \"SSE4_1\" \" ;-msse4.1;-msse4;/arch:SSE4\")\nCHECK_SSE(C \"SSE4_2\" \" ;-msse4.2;-msse4;/arch:SSE4\")\nCHECK_SSE(C \"AVX\" \" ;-mavx;/arch:AVX\")\nCHECK_SSE(C \"AVX2\" \" ;-mavx2 -mfma;/arch:AVX2\")\n\nCHECK_SSE(CXX \"SSE1\" \" ;-msse;/arch:SSE\")\nCHECK_SSE(CXX \"SSE2\" \" ;-msse2;/arch:SSE2\")\nCHECK_SSE(CXX \"SSE3\" \" ;-msse3;/arch:SSE3\")\nCHECK_SSE(CXX \"SSE4_1\" \" ;-msse4.1;-msse4;/arch:SSE4\")\nCHECK_SSE(CXX \"SSE4_2\" \" ;-msse4.2;-msse4;/arch:SSE4\")\nCHECK_SSE(CXX \"AVX\" \" ;-mavx;/arch:AVX\")\nCHECK_SSE(CXX \"AVX2\" \" ;-mavx2 -mfma;/arch:AVX2\")\n"
  },
  {
    "path": "lib/TH/generic/THBlas.c",
    "content": "#ifndef TH_GENERIC_FILE\n#define TH_GENERIC_FILE \"generic/THBlas.c\"\n#else\n\n\n#ifdef BLAS_F2C\n# define ffloat double\n#else\n# define ffloat float\n#endif\n\nTH_EXTERNC void dswap_(int *n, double *x, int *incx, double *y, int *incy);\nTH_EXTERNC void sswap_(int *n, float *x, int *incx, float *y, int *incy);\nTH_EXTERNC void dscal_(int *n, double *a, double *x, int *incx);\nTH_EXTERNC void sscal_(int *n, float *a, float *x, int *incx);\nTH_EXTERNC void dcopy_(int *n, double *x, int *incx, double *y, int *incy);\nTH_EXTERNC void scopy_(int *n, float *x, int *incx, float *y, int *incy);\nTH_EXTERNC void daxpy_(int *n, double *a, double *x, int *incx, double *y, int *incy);\nTH_EXTERNC void saxpy_(int *n, float *a, float *x, int *incx, float *y, int *incy);\nTH_EXTERNC double ddot_(int *n, double *x, int *incx, double *y, int *incy);\n#ifdef BLAS_USE_CBLAS_DOT\nTH_EXTERNC float cblas_sdot(const int n, const float *x, const int incx, const float *y, const int incy);\n#ifndef THBlas_C_sdot_\n#define THBlas_C_sdot_\ninline ffloat sdot_(const int *n, const float *x, const int *incx, const float *y, const int *incy)\n{\n  return cblas_sdot(*n, x, *incx, y, *incy);\n}\n#endif\n#else\nTH_EXTERNC ffloat sdot_(int *n, float *x, int *incx, float *y, int *incy);\n#endif\nTH_EXTERNC void dgemv_(char *trans, int *m, int *n, double *alpha, double *a, int *lda, double *x, int *incx, double *beta, double *y, int *incy);\nTH_EXTERNC void sgemv_(char *trans, int *m, int *n, float *alpha, float *a, int *lda, float *x, int *incx, float *beta, float *y, int *incy);\nTH_EXTERNC void dger_(int *m, int *n, double *alpha, double *x, int *incx, double *y, int *incy, double *a, int *lda);\nTH_EXTERNC void sger_(int *m, int *n, float *alpha, float *x, int *incx, float *y, int *incy, float *a, int *lda);\nTH_EXTERNC void dgemm_(char *transa, char *transb, int *m, int *n, int *k, double *alpha, double *a, int *lda, double *b, int *ldb, double *beta, double *c, int *ldc);\nTH_EXTERNC void sgemm_(char *transa, char *transb, int *m, int *n, int *k, float *alpha, float *a, int *lda, float *b, int *ldb, float *beta, float *c, int *ldc);\n\n\n\nvoid THBlas_(swap)(long n, real *x, long incx, real *y, long incy)\n{\n  if(n == 1)\n  {\n    incx = 1;\n    incy = 1;\n  }\n\n#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))\n  if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) )\n  {\n    int i_n = (int)n;\n    int i_incx = (int)incx;\n    int i_incy = (int)incy;\n\n#if defined(TH_REAL_IS_DOUBLE)\n    dswap_(&i_n, x, &i_incx, y, &i_incy);\n#else\n    sswap_(&i_n, x, &i_incx, y, &i_incy);\n#endif\n    return;\n  }\n#endif\n  {\n    long i;\n    for(i = 0; i < n; i++)\n    {\n      real z = x[i*incx];\n      x[i*incx] = y[i*incy];\n      y[i*incy] = z;\n    }\n  }\n}\n\nvoid THBlas_(scal)(long n, real a, real *x, long incx)\n{\n  if(n == 1)\n    incx = 1;\n\n#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))\n  if( (n <= INT_MAX) && (incx <= INT_MAX) )\n  {\n    int i_n = (int)n;\n    int i_incx = (int)incx;\n\n#if defined(TH_REAL_IS_DOUBLE)\n    dscal_(&i_n, &a, x, &i_incx);\n#else\n    sscal_(&i_n, &a, x, &i_incx);\n#endif\n    return;\n  }\n#endif\n  {\n    long i;\n    for(i = 0; i < n; i++) {\n      if (a == 0) {\n        x[i*incx] = 0;\n      } else {\n        x[i*incx] *= a;\n      }\n    }\n  }\n}\n\nvoid THBlas_(copy)(long n, real *x, long incx, real *y, long incy)\n{\n  if(n == 1)\n  {\n    incx = 1;\n    incy = 1;\n  }\n\n#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))\n  if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) )\n  {\n    int i_n = (int)n;\n    int i_incx = (int)incx;\n    int i_incy = (int)incy;\n\n#if defined(TH_REAL_IS_DOUBLE)\n    dcopy_(&i_n, x, &i_incx, y, &i_incy);\n#else\n    scopy_(&i_n, x, &i_incx, y, &i_incy);\n#endif\n    return;\n  }\n#endif\n  {\n    long i;\n    for(i = 0; i < n; i++)\n      y[i*incy] = x[i*incx];\n  }\n}\n\nvoid THBlas_(axpy)(long n, real a, real *x, long incx, real *y, long incy)\n{\n  if(n == 1)\n  {\n    incx = 1;\n    incy = 1;\n  }\n\n#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))\n  if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) )\n  {\n    int i_n = (int)n;\n    int i_incx = (int)incx;\n    int i_incy = (int)incy;\n\n#if defined(TH_REAL_IS_DOUBLE)\n    daxpy_(&i_n, &a, x, &i_incx, y, &i_incy);\n#else\n    saxpy_(&i_n, &a, x, &i_incx, y, &i_incy);\n#endif\n    return;\n  }\n#endif\n  {\n    long i;\n    for(i = 0; i < n; i++)\n      y[i*incy] += a*x[i*incx];\n  }\n}\n\nreal THBlas_(dot)(long n, real *x, long incx, real *y, long incy)\n{\n  if(n == 1)\n  {\n    incx = 1;\n    incy = 1;\n  }\n\n#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))\n  if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) )\n  {\n    int i_n = (int)n;\n    int i_incx = (int)incx;\n    int i_incy = (int)incy;\n\n#if defined(TH_REAL_IS_DOUBLE)\n    return (real) ddot_(&i_n, x, &i_incx, y, &i_incy);\n#else\n    return (real) sdot_(&i_n, x, &i_incx, y, &i_incy);\n#endif\n  }\n#endif\n  {\n    long i;\n    real sum = 0;\n    for(i = 0; i < n; i++)\n    sum += x[i*incx]*y[i*incy];\n    return sum;\n  }\n}\n\nvoid THBlas_(gemv)(char trans, long m, long n, real alpha, real *a, long lda, real *x, long incx, real beta, real *y, long incy)\n{\n  if(n == 1)\n    lda = m;\n\n#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))\n  if( (m <= INT_MAX) && (n <= INT_MAX) &&\n      (lda > 0) && (lda <= INT_MAX) &&\n      (incx > 0) && (incx <= INT_MAX) &&\n      (incy > 0) && (incy <= INT_MAX) )\n  {\n    int i_m = (int)m;\n    int i_n = (int)n;\n    int i_lda = (int)lda;\n    int i_incx = (int)incx;\n    int i_incy = (int)incy;\n\n#if defined(TH_REAL_IS_DOUBLE)\n    dgemv_(&trans, &i_m, &i_n, &alpha, a, &i_lda, x, &i_incx, &beta, y, &i_incy);\n#else\n    sgemv_(&trans, &i_m, &i_n, &alpha, a, &i_lda, x, &i_incx, &beta, y, &i_incy);\n#endif\n    return;\n  }\n#endif\n  {\n    long i, j;\n\n    if( (trans == 'T') || (trans == 't') )\n    {\n      for(i = 0; i < n; i++)\n      {\n        real sum = 0;\n        real *row_ = a+lda*i;\n        for(j = 0; j < m; j++)\n          sum += x[j*incx]*row_[j];\n\tif (beta == 0)\n\t  y[i*incy] = alpha*sum;\n\telse\n\t  y[i*incy] = beta*y[i*incy] + alpha*sum;\n      }\n    }\n    else\n    {\n      if(beta != 1)\n        THBlas_(scal)(m, beta, y, incy);\n\n      for(j = 0; j < n; j++)\n      {\n        real *column_ = a+lda*j;\n        real z = alpha*x[j*incx];\n        for(i = 0; i < m; i++)\n          y[i*incy] += z*column_[i];\n      }\n    }\n  }\n}\n\nvoid THBlas_(ger)(long m, long n, real alpha, real *x, long incx, real *y, long incy, real *a, long lda)\n{\n  if(n == 1)\n    lda = m;\n\n#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))\n  if( (m <= INT_MAX) && (n <= INT_MAX) && (lda <= INT_MAX)  && (incx <= INT_MAX) && (incy <= INT_MAX) )\n  {\n    int i_m = (int)m;\n    int i_n = (int)n;\n    int i_lda = (int)lda;\n    int i_incx = (int)incx;\n    int i_incy = (int)incy;\n\n#if defined(TH_REAL_IS_DOUBLE)\n    dger_(&i_m, &i_n, &alpha, x, &i_incx, y, &i_incy, a, &i_lda);\n#else\n    sger_(&i_m, &i_n, &alpha, x, &i_incx, y, &i_incy, a, &i_lda);\n#endif\n    return;\n  }\n#endif\n  {\n    long i, j;\n    for(j = 0; j < n; j++)\n    {\n      real *column_ = a+j*lda;\n      real z = alpha*y[j*incy];\n      for(i = 0; i < m; i++)\n        column_[i] += z*x[i*incx] ;\n    }\n  }\n}\n\nvoid THBlas_(gemm)(char transa, char transb, long m, long n, long k, real alpha, real *a, long lda, real *b, long ldb, real beta, real *c, long ldc)\n{\n  int transa_ = ((transa == 't') || (transa == 'T'));\n  int transb_ = ((transb == 't') || (transb == 'T'));\n\n  if(n == 1)\n    ldc = m;\n\n  if(transa_)\n  {\n    if(m == 1)\n      lda = k;\n  }\n  else\n  {\n    if(k == 1)\n      lda = m;\n  }\n\n  if(transb_)\n  {\n    if(k == 1)\n      ldb = n;\n  }\n  else\n  {\n    if(n == 1)\n      ldb = k;\n  }\n\n#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))\n  if( (m <= INT_MAX) && (n <= INT_MAX) && (k <= INT_MAX) && (lda <= INT_MAX)  && (ldb <= INT_MAX) && (ldc <= INT_MAX) )\n  {\n    int i_m = (int)m;\n    int i_n = (int)n;\n    int i_k = (int)k;\n    int i_lda = (int)lda;\n    int i_ldb = (int)ldb;\n    int i_ldc = (int)ldc;\n\n#if defined(TH_REAL_IS_DOUBLE)\n    dgemm_(&transa, &transb, &i_m, &i_n, &i_k, &alpha, a, &i_lda, b, &i_ldb, &beta, c, &i_ldc);\n#else\n    sgemm_(&transa, &transb, &i_m, &i_n, &i_k, &alpha, a, &i_lda, b, &i_ldb, &beta, c, &i_ldc);\n#endif\n    return;\n  }\n#endif\n  {\n    long i, j, l;\n    if(!transa_ && !transb_)\n    {\n      real *a_ = a;\n      for(i = 0; i < m; i++)\n      {\n        real *b_ = b;\n        for(j = 0; j < n; j++)\n        {\n          real sum = 0;\n          for(l = 0; l < k; l++)\n            sum += a_[l*lda]*b_[l];\n          b_ += ldb;\n\t  if (beta == 0)\n\t    c[j*ldc+i] = alpha*sum;\n\t  else\n\t    c[j*ldc+i] = beta*c[j*ldc+i]+alpha*sum;\n        }\n        a_++;\n      }\n    }\n    else if(transa_ && !transb_)\n    {\n      real *a_ = a;\n      for(i = 0; i < m; i++)\n      {\n        real *b_ = b;\n        for(j = 0; j < n; j++)\n        {\n          real sum = 0;\n          for(l = 0; l < k; l++)\n            sum += a_[l]*b_[l];\n          b_ += ldb;\n\t  if (beta == 0)\n\t    c[j*ldc+i] = alpha*sum;\n\t  else\n\t    c[j*ldc+i] = beta*c[j*ldc+i]+alpha*sum;\n        }\n        a_ += lda;\n      }\n    }\n    else if(!transa_ && transb_)\n    {\n      real *a_ = a;\n      for(i = 0; i < m; i++)\n      {\n        real *b_ = b;\n        for(j = 0; j < n; j++)\n        {\n          real sum = 0;\n          for(l = 0; l < k; l++)\n            sum += a_[l*lda]*b_[l*ldb];\n          b_++;\n\t  if (beta == 0)\n\t    c[j*ldc+i] = alpha*sum;\n\t  else\n\t    c[j*ldc+i] = beta*c[j*ldc+i]+alpha*sum;\n        }\n        a_++;\n      }\n    }\n    else\n    {\n      real *a_ = a;\n      for(i = 0; i < m; i++)\n      {\n        real *b_ = b;\n        for(j = 0; j < n; j++)\n        {\n          real sum = 0;\n          for(l = 0; l < k; l++)\n            sum += a_[l]*b_[l*ldb];\n          b_++;\n\t  if (beta == 0)\n\t    c[j*ldc+i] = alpha*sum;\n\t  else\n\t    c[j*ldc+i] = beta*c[j*ldc+i]+alpha*sum;\n        }\n        a_ += lda;\n      }\n    }\n  }\n}\n\n#endif\n"
  },
  {
    "path": "lib/TH/generic/THBlas.h",
    "content": "#ifndef TH_GENERIC_FILE\n#define TH_GENERIC_FILE \"generic/THBlas.h\"\n#else\n\n/* Level 1 */\nTH_API void THBlas_(swap)(long n, real *x, long incx, real *y, long incy);\nTH_API void THBlas_(scal)(long n, real a, real *x, long incx);\nTH_API void THBlas_(copy)(long n, real *x, long incx, real *y, long incy);\nTH_API void THBlas_(axpy)(long n, real a, real *x, long incx, real *y, long incy);\nTH_API real THBlas_(dot)(long n, real *x, long incx, real *y, long incy);\n\n/* Level 2 */\nTH_API void THBlas_(gemv)(char trans, long m, long n, real alpha, real *a, long lda, real *x, long incx, real beta, real *y, long incy);\nTH_API void THBlas_(ger)(long m, long n, real alpha, real *x, long incx, real *y, long incy, real *a, long lda);\n\n/* Level 3 */\nTH_API void THBlas_(gemm)(char transa, char transb, long m, long n, long k, real alpha, real *a, long lda, real *b, long ldb, real beta, real *c, long ldc);\n\n#endif\n"
  },
  {
    "path": "lib/TH/generic/THLapack.c",
    "content": "#ifndef TH_GENERIC_FILE\n#define TH_GENERIC_FILE \"generic/THLapack.c\"\n#else\n\n\nTH_EXTERNC void dgesv_(int *n, int *nrhs, double *a, int *lda, int *ipiv, double *b, int *ldb, int *info);\nTH_EXTERNC void sgesv_(int *n, int *nrhs, float *a, int *lda, int *ipiv, float *b, int *ldb, int *info);\nTH_EXTERNC void dtrtrs_(char *uplo, char *trans, char *diag, int *n, int *nrhs, double *a, int *lda, double *b, int *ldb, int *info);\nTH_EXTERNC void strtrs_(char *uplo, char *trans, char *diag, int *n, int *nrhs, float *a, int *lda, float *b, int *ldb, int *info);\nTH_EXTERNC void dgels_(char *trans, int *m, int *n, int *nrhs, double *a, int *lda, double *b, int *ldb, double *work, int *lwork, int *info);\nTH_EXTERNC void sgels_(char *trans, int *m, int *n, int *nrhs, float *a, int *lda, float *b, int *ldb, float *work, int *lwork, int *info);\nTH_EXTERNC void dsyev_(char *jobz, char *uplo, int *n, double *a, int *lda, double *w, double *work, int *lwork, int *info);\nTH_EXTERNC void ssyev_(char *jobz, char *uplo, int *n, float *a, int *lda, float *w, float *work, int *lwork, int *info);\nTH_EXTERNC void dgeev_(char *jobvl, char *jobvr, int *n, double *a, int *lda, double *wr, double *wi, double* vl, int *ldvl, double *vr, int *ldvr, double *work, int *lwork, int *info);\nTH_EXTERNC void sgeev_(char *jobvl, char *jobvr, int *n, float *a, int *lda, float *wr, float *wi, float* vl, int *ldvl, float *vr, int *ldvr, float *work, int *lwork, int *info);\nTH_EXTERNC void dgesvd_(char *jobu, char *jobvt, int *m, int *n, double *a, int *lda, double *s, double *u, int *ldu, double *vt, int *ldvt, double *work, int *lwork, int *info);\nTH_EXTERNC void sgesvd_(char *jobu, char *jobvt, int *m, int *n, float *a, int *lda, float *s, float *u, int *ldu, float *vt, int *ldvt, float *work, int *lwork, int *info);\nTH_EXTERNC void dgetrf_(int *m, int *n, double *a, int *lda, int *ipiv, int *info);\nTH_EXTERNC void sgetrf_(int *m, int *n, float *a, int *lda, int *ipiv, int *info);\nTH_EXTERNC void dgetrs_(char *trans, int *n, int *nrhs, double *a, int *lda, int *ipiv, double *b, int *ldb, int *info);\nTH_EXTERNC void sgetrs_(char *trans, int *n, int *nrhs, float *a, int *lda, int *ipiv, float *b, int *ldb, int *info);\nTH_EXTERNC void dgetri_(int *n, double *a, int *lda, int *ipiv, double *work, int *lwork, int *info);\nTH_EXTERNC void sgetri_(int *n, float *a, int *lda, int *ipiv, float *work, int *lwork, int *info);\nTH_EXTERNC void dpotrf_(char *uplo, int *n, double *a, int *lda, int *info);\nTH_EXTERNC void spotrf_(char *uplo, int *n, float *a, int *lda, int *info);\nTH_EXTERNC void dpotri_(char *uplo, int *n, double *a, int *lda, int *info);\nTH_EXTERNC void spotri_(char *uplo, int *n, float *a, int *lda, int *info);\nTH_EXTERNC void dpotrs_(char *uplo, int *n, int *nrhs, double *a, int *lda, double *b, int *ldb, int *info);\nTH_EXTERNC void spotrs_(char *uplo, int *n, int *nrhs, float *a, int *lda, float *b, int *ldb, int *info);\nTH_EXTERNC void sgeqrf_(int *m, int *n, float *a, int *lda, float *tau, float *work, int *lwork, int *info);\nTH_EXTERNC void dgeqrf_(int *m, int *n, double *a, int *lda, double *tau, double *work, int *lwork, int *info);\nTH_EXTERNC void sorgqr_(int *m, int *n, int *k, float *a, int *lda, float *tau, float *work, int *lwork, int *info);\nTH_EXTERNC void dorgqr_(int *m, int *n, int *k, double *a, int *lda, double *tau, double *work, int *lwork, int *info);\nTH_EXTERNC void sormqr_(char *side, char *trans, int *m, int *n, int *k, float *a, int *lda, float *tau, float *c, int *ldc, float *work, int *lwork, int *info);\nTH_EXTERNC void dormqr_(char *side, char *trans, int *m, int *n, int *k, double *a, int *lda, double *tau, double *c, int *ldc, double *work, int *lwork, int *info);\nTH_EXTERNC void spstrf_(char *uplo, int *n, float *a, int *lda, int *piv, int *rank, float *tol, float *work, int *info);\nTH_EXTERNC void dpstrf_(char *uplo, int *n, double *a, int *lda, int *piv, int *rank, double *tol, double *work, int *info);\n\n\n/* Compute the solution to a real system of linear equations  A * X = B */\nvoid THLapack_(gesv)(int n, int nrhs, real *a, int lda, int *ipiv, real *b, int ldb, int* info)\n{\n#ifdef USE_LAPACK\n#if defined(TH_REAL_IS_DOUBLE)\n  dgesv_(&n, &nrhs, a, &lda, ipiv, b, &ldb, info);\n#else\n  sgesv_(&n, &nrhs, a, &lda, ipiv, b, &ldb, info);\n#endif\n#else\n  THError(\"gesv : Lapack library not found in compile time\\n\");\n#endif\n  return;\n}\n\n/* Solve a triangular system of the form A * X = B  or A^T * X = B */\nvoid THLapack_(trtrs)(char uplo, char trans, char diag, int n, int nrhs, real *a, int lda, real *b, int ldb, int* info)\n{\n#ifdef USE_LAPACK\n#if defined(TH_REAL_IS_DOUBLE)\n  dtrtrs_(&uplo, &trans, &diag, &n, &nrhs, a, &lda, b, &ldb, info);\n#else\n  strtrs_(&uplo, &trans, &diag, &n, &nrhs, a, &lda, b, &ldb, info);\n#endif\n#else\n  THError(\"trtrs : Lapack library not found in compile time\\n\");\n#endif\n  return;\n}\n\n/* Solve overdetermined or underdetermined real linear systems involving an\nM-by-N matrix A, or its transpose, using a QR or LQ factorization of A */\nvoid THLapack_(gels)(char trans, int m, int n, int nrhs, real *a, int lda, real *b, int ldb, real *work, int lwork, int *info)\n{\n#ifdef USE_LAPACK\n#if defined(TH_REAL_IS_DOUBLE)\n  dgels_(&trans, &m, &n, &nrhs, a, &lda, b, &ldb, work, &lwork, info);\n#else\n  sgels_(&trans, &m, &n, &nrhs, a, &lda, b, &ldb, work, &lwork, info);\n#endif\n#else\n  THError(\"gels : Lapack library not found in compile time\\n\");\n#endif\n}\n\n/* Compute all eigenvalues and, optionally, eigenvectors of a real symmetric\nmatrix A */\nvoid THLapack_(syev)(char jobz, char uplo, int n, real *a, int lda, real *w, real *work, int lwork, int *info)\n{\n#ifdef USE_LAPACK\n#if defined(TH_REAL_IS_DOUBLE)\n  dsyev_(&jobz, &uplo, &n, a, &lda, w, work, &lwork, info);\n#else\n  ssyev_(&jobz, &uplo, &n, a, &lda, w, work, &lwork, info);\n#endif\n#else\n  THError(\"syev : Lapack library not found in compile time\\n\");\n#endif\n}\n\n/* Compute for an N-by-N real nonsymmetric matrix A, the eigenvalues and,\noptionally, the left and/or right eigenvectors */\nvoid THLapack_(geev)(char jobvl, char jobvr, int n, real *a, int lda, real *wr, real *wi, real* vl, int ldvl, real *vr, int ldvr, real *work, int lwork, int *info)\n{\n#ifdef USE_LAPACK\n#if defined(TH_REAL_IS_DOUBLE)\n  dgeev_(&jobvl, &jobvr, &n, a, &lda, wr, wi, vl, &ldvl, vr, &ldvr, work, &lwork, info);\n#else\n  sgeev_(&jobvl, &jobvr, &n, a, &lda, wr, wi, vl, &ldvl, vr, &ldvr, work, &lwork, info);\n#endif\n#else\n  THError(\"geev : Lapack library not found in compile time\\n\");\n#endif\n}\n\n/* Compute the singular value decomposition (SVD) of a real M-by-N matrix A,\noptionally computing the left and/or right singular vectors */\nvoid THLapack_(gesvd)(char jobu, char jobvt, int m, int n, real *a, int lda, real *s, real *u, int ldu, real *vt, int ldvt, real *work, int lwork, int *info)\n{\n#ifdef USE_LAPACK\n#if defined(TH_REAL_IS_DOUBLE)\n  dgesvd_( &jobu,  &jobvt,  &m,  &n,  a,  &lda,  s,  u,  &ldu,  vt,  &ldvt,  work,  &lwork,  info);\n#else\n  sgesvd_( &jobu,  &jobvt,  &m,  &n,  a,  &lda,  s,  u,  &ldu,  vt,  &ldvt,  work,  &lwork,  info);\n#endif\n#else\n  THError(\"gesvd : Lapack library not found in compile time\\n\");\n#endif\n}\n\n/* LU decomposition */\nvoid THLapack_(getrf)(int m, int n, real *a, int lda, int *ipiv, int *info)\n{\n#ifdef  USE_LAPACK\n#if defined(TH_REAL_IS_DOUBLE)\n  dgetrf_(&m, &n, a, &lda, ipiv, info);\n#else\n  sgetrf_(&m, &n, a, &lda, ipiv, info);\n#endif\n#else\n  THError(\"getrf : Lapack library not found in compile time\\n\");\n#endif\n}\n\nvoid THLapack_(getrs)(char trans, int n, int nrhs, real *a, int lda, int *ipiv, real *b, int ldb, int *info)\n{\n#ifdef  USE_LAPACK\n#if defined(TH_REAL_IS_DOUBLE)\n  dgetrs_(&trans, &n, &nrhs, a, &lda, ipiv, b, &ldb, info);\n#else\n  sgetrs_(&trans, &n, &nrhs, a, &lda, ipiv, b, &ldb, info);\n#endif\n#else\n  THError(\"getrs : Lapack library not found in compile time\\n\");\n#endif\n}\n\n/* Matrix Inverse */\nvoid THLapack_(getri)(int n, real *a, int lda, int *ipiv, real *work, int lwork, int* info)\n{\n#ifdef  USE_LAPACK\n#if defined(TH_REAL_IS_DOUBLE)\n  dgetri_(&n, a, &lda, ipiv, work, &lwork, info);\n#else\n  sgetri_(&n, a, &lda, ipiv, work, &lwork, info);\n#endif\n#else\n  THError(\"getri : Lapack library not found in compile time\\n\");\n#endif\n}\n\n/* Cholesky factorization */\nvoid THLapack_(potrf)(char uplo, int n, real *a, int lda, int *info)\n{\n#ifdef  USE_LAPACK\n#if defined(TH_REAL_IS_DOUBLE)\n  dpotrf_(&uplo, &n, a, &lda, info);\n#else\n  spotrf_(&uplo, &n, a, &lda, info);\n#endif\n#else\n  THError(\"potrf : Lapack library not found in compile time\\n\");\n#endif\n}\n\n/* Solve A*X = B with a symmetric positive definite matrix A using the Cholesky factorization */\nvoid THLapack_(potrs)(char uplo, int n, int nrhs, real *a, int lda, real *b, int ldb, int *info)\n{\n#ifdef  USE_LAPACK\n#if defined(TH_REAL_IS_DOUBLE)\n  dpotrs_(&uplo, &n, &nrhs, a, &lda, b, &ldb, info);\n#else\n  spotrs_(&uplo, &n, &nrhs, a, &lda, b, &ldb, info);\n#endif\n#else\n  THError(\"potrs: Lapack library not found in compile time\\n\");\n#endif\n}\n\n/* Cholesky factorization based Matrix Inverse */\nvoid THLapack_(potri)(char uplo, int n, real *a, int lda, int *info)\n{\n#ifdef  USE_LAPACK\n#if defined(TH_REAL_IS_DOUBLE)\n  dpotri_(&uplo, &n, a, &lda, info);\n#else\n  spotri_(&uplo, &n, a, &lda, info);\n#endif\n#else\n  THError(\"potri: Lapack library not found in compile time\\n\");\n#endif\n}\n\n/* Cholesky factorization with complete pivoting */\nvoid THLapack_(pstrf)(char uplo, int n, real *a, int lda, int *piv, int *rank, real tol, real *work, int *info)\n{\n#ifdef  USE_LAPACK\n#if defined(TH_REAL_IS_DOUBLE)\n  dpstrf_(&uplo, &n, a, &lda, piv, rank, &tol, work, info);\n#else\n  spstrf_(&uplo, &n, a, &lda, piv, rank, &tol, work, info);\n#endif\n#else\n  THError(\"pstrf: Lapack library not found at compile time\\n\");\n#endif\n}\n\n/* QR decomposition */\nvoid THLapack_(geqrf)(int m, int n, real *a, int lda, real *tau, real *work, int lwork, int *info)\n{\n#ifdef  USE_LAPACK\n#if defined(TH_REAL_IS_DOUBLE)\n  dgeqrf_(&m, &n, a, &lda, tau, work, &lwork, info);\n#else\n  sgeqrf_(&m, &n, a, &lda, tau, work, &lwork, info);\n#endif\n#else\n  THError(\"geqrf: Lapack library not found in compile time\\n\");\n#endif\n}\n\n/* Build Q from output of geqrf */\nvoid THLapack_(orgqr)(int m, int n, int k, real *a, int lda, real *tau, real *work, int lwork, int *info)\n{\n#ifdef  USE_LAPACK\n#if defined(TH_REAL_IS_DOUBLE)\n  dorgqr_(&m, &n, &k, a, &lda, tau, work, &lwork, info);\n#else\n  sorgqr_(&m, &n, &k, a, &lda, tau, work, &lwork, info);\n#endif\n#else\n  THError(\"orgqr: Lapack library not found in compile time\\n\");\n#endif\n}\n\n/* Multiply Q with a matrix using the output of geqrf */\nvoid THLapack_(ormqr)(char side, char trans, int m, int n, int k, real *a, int lda, real *tau, real *c, int ldc, real *work, int lwork, int *info)\n{\n#ifdef  USE_LAPACK\n#if defined(TH_REAL_IS_DOUBLE)\n  dormqr_(&side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, info);\n#else\n  sormqr_(&side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, info);\n#endif\n#else\n  THError(\"ormqr: Lapack library not found in compile time\\n\");\n#endif\n}\n\n\n#endif\n"
  },
  {
    "path": "lib/TH/generic/THLapack.h",
    "content": "#ifndef TH_GENERIC_FILE\n#define TH_GENERIC_FILE \"generic/THLapack.h\"\n#else\n\n/* AX=B */\nTH_API void THLapack_(gesv)(int n, int nrhs, real *a, int lda, int *ipiv, real *b, int ldb, int* info);\n/* Solve a triangular system of the form A * X = B  or A^T * X = B */\nTH_API void THLapack_(trtrs)(char uplo, char trans, char diag, int n, int nrhs, real *a, int lda, real *b, int ldb, int* info);\n/* ||AX-B|| */\nTH_API void THLapack_(gels)(char trans, int m, int n, int nrhs, real *a, int lda, real *b, int ldb, real *work, int lwork, int *info);\n/* Eigenvals */\nTH_API void THLapack_(syev)(char jobz, char uplo, int n, real *a, int lda, real *w, real *work, int lwork, int *info);\n/* Non-sym eigenvals */\nTH_API void THLapack_(geev)(char jobvl, char jobvr, int n, real *a, int lda, real *wr, real *wi, real* vl, int ldvl, real *vr, int ldvr, real *work, int lwork, int *info);\n/* svd */\nTH_API void THLapack_(gesvd)(char jobu, char jobvt, int m, int n, real *a, int lda, real *s, real *u, int ldu, real *vt, int ldvt, real *work, int lwork, int *info);\n/* LU decomposition */\nTH_API void THLapack_(getrf)(int m, int n, real *a, int lda, int *ipiv, int *info);\nTH_API void THLapack_(getrs)(char trans, int n, int nrhs, real *a, int lda, int *ipiv, real *b, int ldb, int *info);\n/* Matrix Inverse */\nTH_API void THLapack_(getri)(int n, real *a, int lda, int *ipiv, real *work, int lwork, int* info);\n\n/* Positive Definite matrices */\n/* Cholesky factorization */\nvoid THLapack_(potrf)(char uplo, int n, real *a, int lda, int *info);\n/* Matrix inverse based on Cholesky factorization */\nvoid THLapack_(potri)(char uplo, int n, real *a, int lda, int *info);\n/* Solve A*X = B with a symmetric positive definite matrix A using the Cholesky factorization */\nvoid THLapack_(potrs)(char uplo, int n, int nrhs, real *a, int lda, real *b, int ldb, int *info);\n/* Cholesky factorization with complete pivoting. */\nvoid THLapack_(pstrf)(char uplo, int n, real *a, int lda, int *piv, int *rank, real tol, real *work, int *info);\n\n/* QR decomposition */\nvoid THLapack_(geqrf)(int m, int n, real *a, int lda, real *tau, real *work, int lwork, int *info);\n/* Build Q from output of geqrf */\nvoid THLapack_(orgqr)(int m, int n, int k, real *a, int lda, real *tau, real *work, int lwork, int *info);\n/* Multiply Q with a matrix from output of geqrf */\nvoid THLapack_(ormqr)(char side, char trans, int m, int n, int k, real *a, int lda, real *tau, real *c, int ldc, real *work, int lwork, int *info);\n\n#endif\n"
  },
  {
    "path": "lib/TH/generic/THStorage.c",
    "content": "#ifndef TH_GENERIC_FILE\n#define TH_GENERIC_FILE \"generic/THStorage.c\"\n#else\n\nreal* THStorage_(data)(const THStorage *self)\n{\n  return self->data;\n}\n\nptrdiff_t THStorage_(size)(const THStorage *self)\n{\n  return self->size;\n}\n\nsize_t THStorage_(elementSize)()\n{\n  return sizeof(real);\n}\n\nTHStorage* THStorage_(new)(void)\n{\n  return THStorage_(newWithSize)(0);\n}\n\nTHStorage* THStorage_(newWithSize)(ptrdiff_t size)\n{\n  return THStorage_(newWithAllocator)(size, &THDefaultAllocator, NULL);\n}\n\nTHStorage* THStorage_(newWithAllocator)(ptrdiff_t size,\n                                        THAllocator *allocator,\n                                        void *allocatorContext)\n{\n  THStorage *storage = THAlloc(sizeof(THStorage));\n  storage->data = allocator->malloc(allocatorContext, sizeof(real)*size);\n  storage->size = size;\n  storage->refcount = 1;\n  storage->flag = TH_STORAGE_REFCOUNTED | TH_STORAGE_RESIZABLE | TH_STORAGE_FREEMEM;\n  storage->allocator = allocator;\n  storage->allocatorContext = allocatorContext;\n  return storage;\n}\n\nTHStorage* THStorage_(newWithMapping)(const char *filename, ptrdiff_t size, int flags)\n{\n  THMapAllocatorContext *ctx = THMapAllocatorContext_new(filename, flags);\n\n  THStorage *storage = THStorage_(newWithAllocator)(size,\n                                                    &THMapAllocator,\n                                                    ctx);\n\n  if(size <= 0)\n    storage->size = THMapAllocatorContext_size(ctx)/sizeof(real);\n\n  THStorage_(clearFlag)(storage, TH_STORAGE_RESIZABLE);\n\n  return storage;\n}\n\nTHStorage* THStorage_(newWithSize1)(real data0)\n{\n  THStorage *self = THStorage_(newWithSize)(1);\n  self->data[0] = data0;\n  return self;\n}\n\nTHStorage* THStorage_(newWithSize2)(real data0, real data1)\n{\n  THStorage *self = THStorage_(newWithSize)(2);\n  self->data[0] = data0;\n  self->data[1] = data1;\n  return self;\n}\n\nTHStorage* THStorage_(newWithSize3)(real data0, real data1, real data2)\n{\n  THStorage *self = THStorage_(newWithSize)(3);\n  self->data[0] = data0;\n  self->data[1] = data1;\n  self->data[2] = data2;\n  return self;\n}\n\nTHStorage* THStorage_(newWithSize4)(real data0, real data1, real data2, real data3)\n{\n  THStorage *self = THStorage_(newWithSize)(4);\n  self->data[0] = data0;\n  self->data[1] = data1;\n  self->data[2] = data2;\n  self->data[3] = data3;\n  return self;\n}\n\nvoid THStorage_(setFlag)(THStorage *storage, const char flag)\n{\n  storage->flag |= flag;\n}\n\nvoid THStorage_(clearFlag)(THStorage *storage, const char flag)\n{\n  storage->flag &= ~flag;\n}\n\nvoid THStorage_(retain)(THStorage *storage)\n{\n  if(storage && (storage->flag & TH_STORAGE_REFCOUNTED))\n    THAtomicIncrementRef(&storage->refcount);\n}\n\nvoid THStorage_(free)(THStorage *storage)\n{\n  if(!storage)\n    return;\n\n  if((storage->flag & TH_STORAGE_REFCOUNTED) && (THAtomicGet(&storage->refcount) > 0))\n  {\n    if(THAtomicDecrementRef(&storage->refcount))\n    {\n      if(storage->flag & TH_STORAGE_FREEMEM) {\n        storage->allocator->free(storage->allocatorContext, storage->data);\n      }\n      if(storage->flag & TH_STORAGE_VIEW) {\n        THStorage_(free)(storage->view);\n      }\n      THFree(storage);\n    }\n  }\n}\n\nTHStorage* THStorage_(newWithData)(real *data, ptrdiff_t size)\n{\n  return THStorage_(newWithDataAndAllocator)(data, size,\n                                             &THDefaultAllocator, NULL);\n}\n\nTHStorage* THStorage_(newWithDataAndAllocator)(real* data, ptrdiff_t size,\n                                               THAllocator* allocator,\n                                               void* allocatorContext) {\n  THStorage *storage = THAlloc(sizeof(THStorage));\n  storage->data = data;\n  storage->size = size;\n  storage->refcount = 1;\n  storage->flag = TH_STORAGE_REFCOUNTED | TH_STORAGE_RESIZABLE | TH_STORAGE_FREEMEM;\n  storage->allocator = allocator;\n  storage->allocatorContext = allocatorContext;\n  return storage;\n}\n\nvoid THStorage_(resize)(THStorage *storage, ptrdiff_t size)\n{\n  if(storage->flag & TH_STORAGE_RESIZABLE)\n  {\n    if(storage->allocator->realloc == NULL) {\n      /* case when the allocator does not have a realloc defined */\n      real *old_data = storage->data;\n      ptrdiff_t old_size = storage->size;\n      if (size == 0) {\n\tstorage->data = NULL;\n      } else {\n\tstorage->data = storage->allocator->malloc(\n\t\t\t\t\t\t   storage->allocatorContext,\n\t\t\t\t\t\t   sizeof(real)*size);\n      }\n      storage->size = size;\n      if (old_data != NULL) {\n\tptrdiff_t copy_size = old_size;\n\tif (storage->size < copy_size) {\n\t  copy_size = storage->size;\n\t}\n\tif (copy_size > 0) {\n\t  memcpy(storage->data, old_data, sizeof(real)*copy_size);\n\t}\n\tstorage->allocator->free(storage->allocatorContext, old_data);\n      }\n    } else {\n      storage->data = storage->allocator->realloc(\n\t\t\t\t\t\t  storage->allocatorContext,\n\t\t\t\t\t\t  storage->data,\n\t\t\t\t\t\t  sizeof(real)*size);\n      storage->size = size;\n    }\n  } else {\n    THError(\"Trying to resize storage that is not resizable\");\n  }\n}\n\nvoid THStorage_(fill)(THStorage *storage, real value)\n{\n  ptrdiff_t i;\n  for(i = 0; i < storage->size; i++)\n    storage->data[i] = value;\n}\n\nvoid THStorage_(set)(THStorage *self, ptrdiff_t idx, real value)\n{\n  THArgCheck((idx >= 0) && (idx < self->size), 2, \"out of bounds\");\n  self->data[idx] = value;\n}\n\nreal THStorage_(get)(const THStorage *self, ptrdiff_t idx)\n{\n  THArgCheck((idx >= 0) && (idx < self->size), 2, \"out of bounds\");\n  return self->data[idx];\n}\n\nvoid THStorage_(swap)(THStorage *storage1, THStorage *storage2)\n{\n#define SWAP(val) { val = storage1->val; storage1->val = storage2->val; storage2->val = val; }\n    real *data;\n    ptrdiff_t size;\n    char flag;\n    THAllocator *allocator;\n    void *allocatorContext;\n    struct THStorage *view;\n\n    SWAP(data);\n    SWAP(size);\n    SWAP(flag);\n    // don't swap refcount!\n    SWAP(allocator);\n    SWAP(allocatorContext);\n    SWAP(view);\n#undef SWAP\n}\n\n#endif\n"
  },
  {
    "path": "lib/TH/generic/THStorage.h",
    "content": "#ifndef TH_GENERIC_FILE\n#define TH_GENERIC_FILE \"generic/THStorage.h\"\n#else\n\n/* on pourrait avoir un liste chainee\n   qui initialise math, lab structures (or more).\n   mouais -- complique.\n\n   Pb: THMapStorage is kind of a class\n   THLab_()... comment je m'en sors?\n\n   en template, faudrait que je les instancie toutes!!! oh boy!\n   Et comment je sais que c'est pour Cuda? Le type float est le meme dans les <>\n\n   au bout du compte, ca serait sur des pointeurs float/double... etc... = facile.\n   primitives??\n */\n\n#define TH_STORAGE_REFCOUNTED 1\n#define TH_STORAGE_RESIZABLE  2\n#define TH_STORAGE_FREEMEM    4\n#define TH_STORAGE_VIEW       8\n\ntypedef struct THStorage\n{\n    real *data;\n    ptrdiff_t size;\n    int refcount;\n    char flag;\n    THAllocator *allocator;\n    void *allocatorContext;\n    struct THStorage *view;\n} THStorage;\n\nTH_API real* THStorage_(data)(const THStorage*);\nTH_API ptrdiff_t THStorage_(size)(const THStorage*);\nTH_API size_t THStorage_(elementSize)(void);\n\n/* slow access -- checks everything */\nTH_API void THStorage_(set)(THStorage*, ptrdiff_t, real);\nTH_API real THStorage_(get)(const THStorage*, ptrdiff_t);\n\nTH_API THStorage* THStorage_(new)(void);\nTH_API THStorage* THStorage_(newWithSize)(ptrdiff_t size);\nTH_API THStorage* THStorage_(newWithSize1)(real);\nTH_API THStorage* THStorage_(newWithSize2)(real, real);\nTH_API THStorage* THStorage_(newWithSize3)(real, real, real);\nTH_API THStorage* THStorage_(newWithSize4)(real, real, real, real);\nTH_API THStorage* THStorage_(newWithMapping)(const char *filename, ptrdiff_t size, int flags);\n\n/* takes ownership of data */\nTH_API THStorage* THStorage_(newWithData)(real *data, ptrdiff_t size);\n\nTH_API THStorage* THStorage_(newWithAllocator)(ptrdiff_t size,\n                                               THAllocator* allocator,\n                                               void *allocatorContext);\nTH_API THStorage* THStorage_(newWithDataAndAllocator)(\n    real* data, ptrdiff_t size, THAllocator* allocator, void *allocatorContext);\n\n/* should not differ with API */\nTH_API void THStorage_(setFlag)(THStorage *storage, const char flag);\nTH_API void THStorage_(clearFlag)(THStorage *storage, const char flag);\nTH_API void THStorage_(retain)(THStorage *storage);\nTH_API void THStorage_(swap)(THStorage *storage1, THStorage *storage2);\n\n/* might differ with other API (like CUDA) */\nTH_API void THStorage_(free)(THStorage *storage);\nTH_API void THStorage_(resize)(THStorage *storage, ptrdiff_t size);\nTH_API void THStorage_(fill)(THStorage *storage, real value);\n\n#endif\n"
  },
  {
    "path": "lib/TH/generic/THStorageCopy.c",
    "content": "#ifndef TH_GENERIC_FILE\n#define TH_GENERIC_FILE \"generic/THStorageCopy.c\"\n#else\n\nvoid THStorage_(rawCopy)(THStorage *storage, real *src)\n{\n  ptrdiff_t i;\n  for(i = 0; i < storage->size; i++)\n    storage->data[i] = src[i];\n}\n\nvoid THStorage_(copy)(THStorage *storage, THStorage *src)\n{\n  THArgCheck(storage->size == src->size, 2, \"size mismatch\");\n  THStorage_(rawCopy)(storage, src->data);\n}\n\n#define IMPLEMENT_THStorage_COPY(TYPENAMESRC) \\\nvoid THStorage_(copy##TYPENAMESRC)(THStorage *storage, TH##TYPENAMESRC##Storage *src) \\\n{ \\\n  ptrdiff_t i;                                                        \\\n  for(i = 0; i < storage->size; i++)                                  \\\n    storage->data[i] = (real)src->data[i];                            \\\n}\n\n#define IMPLEMENT_THStorage_COPY_FROM_HALF(TYPENAMESRC)\t\t\\\nvoid THStorage_(copy##TYPENAMESRC)(THStorage *storage, TH##TYPENAMESRC##Storage *src) \\\n{ \\\n  THArgCheck(storage->size == src->size, 2, \"size mismatch\"); \\\n  ptrdiff_t i;\t\t\t\t\t\t\t\t\\\n  for(i = 0; i < storage->size; i++)\t\t\t\t\t\\\n    storage->data[i] = (real)TH_half2float(src->data[i]);\t\t\\\n}\n\n#define IMPLEMENT_THStorage_COPY_TO_HALF(TYPENAMESRC)\t\t\\\nvoid THStorage_(copy##TYPENAMESRC)(THStorage *storage, TH##TYPENAMESRC##Storage *src) \\\n{ \\\n  THArgCheck(storage->size == src->size, 2, \"size mismatch\"); \\\n  ptrdiff_t i;\t\t\t\t\t\t\t\t\\\n  for(i = 0; i < storage->size; i++)\t\t\t\t\t\\\n    storage->data[i] = TH_float2half((float)(src->data[i]));\t\t\\\n}\n\n#define IMPLEMENT_THStorage_COPY_TO_FROM_HALF(TYPENAMESRC)\t\t\\\nvoid THStorage_(copy##TYPENAMESRC)(THStorage *storage, TH##TYPENAMESRC##Storage *src) \\\n{ \\\n  THArgCheck(storage->size == src->size, 2, \"size mismatch\"); \\\n  ptrdiff_t i;\t\t\t\t\t\t\t\t\\\n  for(i = 0; i < storage->size; i++)\t\t\t\t\t\\\n    storage->data[i] = src->data[i];\t\t\\\n}\n\n#ifndef TH_REAL_IS_HALF\nIMPLEMENT_THStorage_COPY(Byte)\nIMPLEMENT_THStorage_COPY(Char)\nIMPLEMENT_THStorage_COPY(Short)\nIMPLEMENT_THStorage_COPY(Int)\nIMPLEMENT_THStorage_COPY(Long)\nIMPLEMENT_THStorage_COPY(Float)\nIMPLEMENT_THStorage_COPY(Double)\nIMPLEMENT_THStorage_COPY_FROM_HALF(Half)\n#else\n/* only allow pass-through for Half */\nIMPLEMENT_THStorage_COPY_TO_FROM_HALF(Half)\nIMPLEMENT_THStorage_COPY_TO_HALF(Byte)\nIMPLEMENT_THStorage_COPY_TO_HALF(Char)\nIMPLEMENT_THStorage_COPY_TO_HALF(Short)\nIMPLEMENT_THStorage_COPY_TO_HALF(Int)\nIMPLEMENT_THStorage_COPY_TO_HALF(Long)\nIMPLEMENT_THStorage_COPY_TO_HALF(Float)\nIMPLEMENT_THStorage_COPY_TO_HALF(Double)\n#endif\n\n\n#endif\n"
  },
  {
    "path": "lib/TH/generic/THStorageCopy.h",
    "content": "#ifndef TH_GENERIC_FILE\n#define TH_GENERIC_FILE \"generic/THStorageCopy.h\"\n#else\n\n/* Support for copy between different Storage types */\n\nTH_API void THStorage_(rawCopy)(THStorage *storage, real *src);\nTH_API void THStorage_(copy)(THStorage *storage, THStorage *src);\nTH_API void THStorage_(copyByte)(THStorage *storage, struct THByteStorage *src);\nTH_API void THStorage_(copyChar)(THStorage *storage, struct THCharStorage *src);\nTH_API void THStorage_(copyShort)(THStorage *storage, struct THShortStorage *src);\nTH_API void THStorage_(copyInt)(THStorage *storage, struct THIntStorage *src);\nTH_API void THStorage_(copyLong)(THStorage *storage, struct THLongStorage *src);\nTH_API void THStorage_(copyFloat)(THStorage *storage, struct THFloatStorage *src);\nTH_API void THStorage_(copyDouble)(THStorage *storage, struct THDoubleStorage *src);\nTH_API void THStorage_(copyHalf)(THStorage *storage, struct THHalfStorage *src);\n\n#endif\n"
  },
  {
    "path": "lib/TH/generic/THTensor.c",
    "content": "#ifndef TH_GENERIC_FILE\n#define TH_GENERIC_FILE \"generic/THTensor.c\"\n#else\n\n/**** access methods ****/\nTHStorage *THTensor_(storage)(const THTensor *self)\n{\n  return self->storage;\n}\n\nptrdiff_t THTensor_(storageOffset)(const THTensor *self)\n{\n  return self->storageOffset;\n}\n\nint THTensor_(nDimension)(const THTensor *self)\n{\n  return self->nDimension;\n}\n\nlong THTensor_(size)(const THTensor *self, int dim)\n{\n  THArgCheck((dim >= 0) && (dim < self->nDimension), 2, \"dimension %d out of range of %dD tensor\",\n      dim+TH_INDEX_BASE, THTensor_(nDimension)(self));\n  return self->size[dim];\n}\n\nlong THTensor_(stride)(const THTensor *self, int dim)\n{\n  THArgCheck((dim >= 0) && (dim < self->nDimension), 2, \"dimension %d out of range of %dD tensor\",\n      dim+TH_INDEX_BASE, THTensor_(nDimension)(self));\n  return self->stride[dim];\n}\n\nTHLongStorage *THTensor_(newSizeOf)(THTensor *self)\n{\n  THLongStorage *size = THLongStorage_newWithSize(self->nDimension);\n  THLongStorage_rawCopy(size, self->size);\n  return size;\n}\n\nTHLongStorage *THTensor_(newStrideOf)(THTensor *self)\n{\n  THLongStorage *stride = THLongStorage_newWithSize(self->nDimension);\n  THLongStorage_rawCopy(stride, self->stride);\n  return stride;\n}\n\nreal *THTensor_(data)(const THTensor *self)\n{\n  if(self->storage)\n    return (self->storage->data+self->storageOffset);\n  else\n    return NULL;\n}\n\nvoid THTensor_(setFlag)(THTensor *self, const char flag)\n{\n  self->flag |= flag;\n}\n\nvoid THTensor_(clearFlag)(THTensor *self, const char flag)\n{\n  self->flag &= ~flag;\n}\n\n/**** creation methods ****/\n\nstatic void THTensor_(rawInit)(THTensor *self);\n\n\n/* Empty init */\nTHTensor *THTensor_(new)(void)\n{\n  THTensor *self = THAlloc(sizeof(THTensor));\n  THTensor_(rawInit)(self);\n  return self;\n}\n\n/* Pointer-copy init */\nTHTensor *THTensor_(newWithTensor)(THTensor *tensor)\n{\n  THTensor *self = THAlloc(sizeof(THTensor));\n  THTensor_(rawInit)(self);\n  THTensor_(setStorageNd)(self,\n                          tensor->storage,\n                          tensor->storageOffset,\n                          tensor->nDimension,\n                          tensor->size,\n                          tensor->stride);\n  return self;\n}\n\n/* Storage init */\nTHTensor *THTensor_(newWithStorage)(THStorage *storage, ptrdiff_t storageOffset, THLongStorage *size, THLongStorage *stride)\n{\n  THTensor *self = THAlloc(sizeof(THTensor));\n  if(size && stride)\n    THArgCheck(size->size == stride->size, 4, \"inconsistent size\");\n\n  THTensor_(rawInit)(self);\n#ifdef DEBUG\n  THAssert((size ? size->size : (stride ? stride->size : 0)) <= INT_MAX);\n#endif\n  THTensor_(setStorageNd)(self,\n                          storage,\n                          storageOffset,\n                          (size ? size->size : (stride ? stride->size : 0)),\n                          (size ? size->data : NULL),\n                          (stride ? stride->data : NULL));\n\n  return self;\n}\nTHTensor *THTensor_(newWithStorage1d)(THStorage *storage, ptrdiff_t storageOffset,\n                               long size0, long stride0)\n{\n  return THTensor_(newWithStorage4d)(storage, storageOffset, size0, stride0, -1, -1,  -1, -1,  -1, -1);\n}\n\nTHTensor *THTensor_(newWithStorage2d)(THStorage *storage, ptrdiff_t storageOffset,\n                               long size0, long stride0,\n                               long size1, long stride1)\n{\n  return THTensor_(newWithStorage4d)(storage, storageOffset, size0, stride0, size1, stride1,  -1, -1,  -1, -1);\n}\n\nTHTensor *THTensor_(newWithStorage3d)(THStorage *storage, ptrdiff_t storageOffset,\n                               long size0, long stride0,\n                               long size1, long stride1,\n                               long size2, long stride2)\n{\n  return THTensor_(newWithStorage4d)(storage, storageOffset, size0, stride0, size1, stride1,  size2, stride2,  -1, -1);\n}\n\nTHTensor *THTensor_(newWithStorage4d)(THStorage *storage, ptrdiff_t storageOffset,\n                               long size0, long stride0,\n                               long size1, long stride1,\n                               long size2, long stride2,\n                               long size3, long stride3)\n{\n  long size[4] = {size0, size1, size2, size3};\n  long stride[4] = {stride0, stride1, stride2, stride3};\n\n  THTensor *self = THAlloc(sizeof(THTensor));\n  THTensor_(rawInit)(self);\n  THTensor_(setStorageNd)(self, storage, storageOffset, 4, size, stride);\n\n  return self;\n}\n\nTHTensor *THTensor_(newWithSize)(THLongStorage *size, THLongStorage *stride)\n{\n  return THTensor_(newWithStorage)(NULL, 0, size, stride);\n}\n\nTHTensor *THTensor_(newWithSize1d)(long size0)\n{\n  return THTensor_(newWithSize4d)(size0, -1, -1, -1);\n}\n\nTHTensor *THTensor_(newWithSize2d)(long size0, long size1)\n{\n  return THTensor_(newWithSize4d)(size0, size1, -1, -1);\n}\n\nTHTensor *THTensor_(newWithSize3d)(long size0, long size1, long size2)\n{\n  return THTensor_(newWithSize4d)(size0, size1, size2, -1);\n}\n\nTHTensor *THTensor_(newWithSize4d)(long size0, long size1, long size2, long size3)\n{\n  long size[4] = {size0, size1, size2, size3};\n\n  THTensor *self = THAlloc(sizeof(THTensor));\n  THTensor_(rawInit)(self);\n  THTensor_(resizeNd)(self, 4, size, NULL);\n\n  return self;\n}\n\nTHTensor *THTensor_(newClone)(THTensor *self)\n{\n  THTensor *tensor = THTensor_(new)();\n  THTensor_(resizeAs)(tensor, self);\n  THTensor_(copy)(tensor, self);\n  return tensor;\n}\n\nTHTensor *THTensor_(newContiguous)(THTensor *self)\n{\n  if(!THTensor_(isContiguous)(self))\n    return THTensor_(newClone)(self);\n  else\n  {\n    THTensor_(retain)(self);\n    return self;\n  }\n}\n\nTHTensor *THTensor_(newSelect)(THTensor *tensor, int dimension_, long sliceIndex_)\n{\n  THTensor *self = THTensor_(newWithTensor)(tensor);\n  THTensor_(select)(self, NULL, dimension_, sliceIndex_);\n  return self;\n}\n\nTHTensor *THTensor_(newNarrow)(THTensor *tensor, int dimension_, long firstIndex_, long size_)\n{\n  THTensor *self = THTensor_(newWithTensor)(tensor);\n  THTensor_(narrow)(self, NULL, dimension_, firstIndex_, size_);\n  return self;\n}\n\nTHTensor *THTensor_(newTranspose)(THTensor *tensor, int dimension1_, int dimension2_)\n{\n  THTensor *self = THTensor_(newWithTensor)(tensor);\n  THTensor_(transpose)(self, NULL, dimension1_, dimension2_);\n  return self;\n}\n\nTHTensor *THTensor_(newUnfold)(THTensor *tensor, int dimension_, long size_, long step_)\n{\n  THTensor *self = THTensor_(newWithTensor)(tensor);\n  THTensor_(unfold)(self, NULL, dimension_, size_, step_);\n  return self;\n}\n\nTHTensor *THTensor_(newView)(THTensor *tensor, THLongStorage *size)\n{\n  THArgCheck(THTensor_(isContiguous)(tensor), 1, \"input is not contiguous\");\n  ptrdiff_t numel = THTensor_(nElement)(tensor);\n  THTensor *self = THTensor_(new)();\n  THLongStorage *inferred_size = THLongStorage_newInferSize(size, numel);\n  THTensor_(setStorage)(self, tensor->storage, tensor->storageOffset, inferred_size, NULL);\n  THLongStorage_free(inferred_size);\n  return self;\n}\n\n/* Resize */\nvoid THTensor_(resize)(THTensor *self, THLongStorage *size, THLongStorage *stride)\n{\n  THArgCheck(size != NULL, 2, \"invalid size\");\n  if(stride)\n    THArgCheck(stride->size == size->size, 3, \"invalid stride\");\n\n#ifdef DEBUG\n  THAssert(size->size <= INT_MAX);\n#endif\n  THTensor_(resizeNd)(self, size->size, size->data, (stride ? stride->data : NULL));\n}\n\nvoid THTensor_(resizeAs)(THTensor *self, THTensor *src)\n{\n  if(!THTensor_(isSameSizeAs)(self, src))\n    THTensor_(resizeNd)(self, src->nDimension, src->size, NULL);\n}\n\nvoid THTensor_(resize1d)(THTensor *tensor, long size0)\n{\n  THTensor_(resize4d)(tensor, size0, -1, -1, -1);\n}\n\nvoid THTensor_(resize2d)(THTensor *tensor, long size0, long size1)\n{\n  THTensor_(resize4d)(tensor, size0, size1, -1, -1);\n}\n\nvoid THTensor_(resize3d)(THTensor *tensor, long size0, long size1, long size2)\n{\n  THTensor_(resize4d)(tensor, size0, size1, size2, -1);\n}\n\nvoid THTensor_(resize4d)(THTensor *self, long size0, long size1, long size2, long size3)\n{\n  long size[4] = {size0, size1, size2, size3};\n\n  THTensor_(resizeNd)(self, 4, size, NULL);\n}\n\nvoid THTensor_(resize5d)(THTensor *self, long size0, long size1, long size2, long size3, long size4)\n{\n    long size[5] = {size0, size1, size2, size3, size4};\n\n  THTensor_(resizeNd)(self, 5, size, NULL);\n}\n\nTHTensor* THTensor_(newExpand)(THTensor *tensor, THLongStorage *sizes) {\n  THTensor *result = THTensor_(new)();\n  THTensor_(expand)(result, tensor, sizes);\n  return result;\n}\n\nvoid THTensor_(expand)(THTensor *r, THTensor *tensor, THLongStorage *sizes) {\n  THArgCheck(THTensor_(nDimension)(tensor) > 0, 0, \"can't expand an empty tensor\");\n  THArgCheck(THLongStorage_size(sizes) >= THTensor_(nDimension)(tensor), 1,\n             \"the number of sizes provided must be greater or equal to the \"\n             \"number of dimensions in the tensor\");\n\n  long *expandedSizes;\n  long *expandedStrides;\n  char error_buffer[1024];\n  int ret =\n      THLongStorage_inferExpandGeometry(tensor->size, tensor->stride, THTensor_(nDimension)(tensor),\n                                        sizes, &expandedSizes, &expandedStrides, error_buffer, 1024);\n\n  if (ret != 0) {\n    THError(error_buffer);\n    return;\n  }\n\n  THTensor_(setStorageNd)(r, THTensor_(storage)(tensor), THTensor_(storageOffset)(tensor),\n                          THLongStorage_size(sizes), expandedSizes, expandedStrides);\n  THFree(expandedSizes);\n  THFree(expandedStrides);\n}\n\n\nvoid THTensor_(expandNd)(THTensor **rets, THTensor **ops, int count) {\n  for (int i = 0; i < count; ++i) {\n    THArgCheck(THTensor_(nDimension)(ops[i]) > 0, i, \"can't expand empty tensor %d\", i);\n  }\n\n  long **op_sizes = THAlloc(sizeof(long*) * count);\n  long *op_dims = THAlloc(sizeof(long) * count);\n\n  for (int i = 0; i < count; ++i) {\n    op_sizes[i] = ops[i]->size;\n    op_dims[i] = ops[i]->nDimension;\n  }\n\n  THLongStorage *sizes = THLongStorage_new();\n  char error_buffer[1024];\n  int ret = THLongStorage_inferSizeN(sizes,\n                                     count,\n                                     op_sizes,\n                                     op_dims,\n                                     error_buffer,\n                                     1024);\n\n  if(ret != 0) {\n    THFree(op_sizes);\n    THFree(op_dims);\n    THLongStorage_free(sizes);\n    THError(error_buffer);\n    return;\n  }\n\n  for (int i = 0; i < count; ++i) {\n    THTensor_(expand)(rets[i], ops[i], sizes);\n  }\n\n  THFree(op_sizes);\n  THFree(op_dims);\n  THLongStorage_free(sizes);\n}\n\nvoid THTensor_(set)(THTensor *self, THTensor *src)\n{\n  if(self != src)\n    THTensor_(setStorageNd)(self,\n                            src->storage,\n                            src->storageOffset,\n                            src->nDimension,\n                            src->size,\n                            src->stride);\n}\n\nvoid THTensor_(setStorage)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, THLongStorage *size_, THLongStorage *stride_)\n{\n  if(size_ && stride_)\n    THArgCheck(size_->size == stride_->size, 5, \"inconsistent size/stride sizes\");\n\n#ifdef DEBUG\n  THAssert((size_ ? size_->size : (stride_ ? stride_->size : 0)) <= INT_MAX);\n#endif\n  THTensor_(setStorageNd)(self,\n                          storage_,\n                          storageOffset_,\n                          (size_ ? size_->size : (stride_ ? stride_->size : 0)),\n                          (size_ ? size_->data : NULL),\n                          (stride_ ? stride_->data : NULL));\n}\n\nvoid THTensor_(setStorage1d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_,\n                             long size0_, long stride0_)\n{\n  THTensor_(setStorage4d)(self, storage_, storageOffset_,\n                          size0_, stride0_,\n                          -1, -1,\n                          -1, -1,\n                          -1, -1);\n}\n\nvoid THTensor_(setStorage2d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_,\n                             long size0_, long stride0_,\n                             long size1_, long stride1_)\n{\n  THTensor_(setStorage4d)(self, storage_, storageOffset_,\n                          size0_, stride0_,\n                          size1_, stride1_,\n                          -1, -1,\n                          -1, -1);\n}\n\nvoid THTensor_(setStorage3d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_,\n                             long size0_, long stride0_,\n                             long size1_, long stride1_,\n                             long size2_, long stride2_)\n{\n  THTensor_(setStorage4d)(self, storage_, storageOffset_,\n                          size0_, stride0_,\n                          size1_, stride1_,\n                          size2_, stride2_,\n                          -1, -1);\n}\n\nvoid THTensor_(setStorage4d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_,\n                             long size0_, long stride0_,\n                             long size1_, long stride1_,\n                             long size2_, long stride2_,\n                             long size3_, long stride3_)\n{\n\n  long size[4] = {size0_, size1_, size2_, size3_};\n  long stride[4] = {stride0_, stride1_, stride2_, stride3_};\n\n  THTensor_(setStorageNd)(self, storage_, storageOffset_, 4, size, stride);\n}\n\n\nvoid THTensor_(narrow)(THTensor *self, THTensor *src, int dimension, long firstIndex, long size)\n{\n  if(!src)\n    src = self;\n\n  THArgCheck( (dimension >= 0) && (dimension < src->nDimension), 2, \"out of range\");\n  THArgCheck( (firstIndex >= 0) && (firstIndex < src->size[dimension]), 3, \"out of range\");\n  THArgCheck( (size > 0) && (firstIndex <= src->size[dimension] - size), 4, \"out of range\");\n\n  THTensor_(set)(self, src);\n\n  if(firstIndex > 0)\n    self->storageOffset += firstIndex*self->stride[dimension];\n\n  self->size[dimension] = size;\n}\n\nvoid THTensor_(select)(THTensor *self, THTensor *src, int dimension, long sliceIndex)\n{\n  int d;\n\n  if(!src)\n    src = self;\n\n  THArgCheck(src->nDimension > 1, 1, \"cannot select on a vector\");\n  THArgCheck((dimension >= 0) && (dimension < src->nDimension), 2, \"out of range\");\n  THArgCheck((sliceIndex >= 0) && (sliceIndex < src->size[dimension]), 3, \"out of range\");\n\n  THTensor_(set)(self, src);\n  THTensor_(narrow)(self, NULL, dimension, sliceIndex, 1);\n  for(d = dimension; d < self->nDimension-1; d++)\n  {\n    self->size[d] = self->size[d+1];\n    self->stride[d] = self->stride[d+1];\n  }\n  self->nDimension--;\n}\n\nvoid THTensor_(transpose)(THTensor *self, THTensor *src, int dimension1, int dimension2)\n{\n  long z;\n\n  if(!src)\n    src = self;\n\n  THArgCheck( (dimension1 >= 0) && (dimension1 < src->nDimension), 1, \"out of range\");\n  THArgCheck( (dimension2 >= 0) && (dimension2 < src->nDimension), 2, \"out of range\");\n\n  THTensor_(set)(self, src);\n\n  if(dimension1 == dimension2)\n    return;\n\n  z = self->stride[dimension1];\n  self->stride[dimension1] = self->stride[dimension2];\n  self->stride[dimension2] = z;\n  z = self->size[dimension1];\n  self->size[dimension1] = self->size[dimension2];\n  self->size[dimension2] = z;\n}\n\nvoid THTensor_(unfold)(THTensor *self, THTensor *src, int dimension, long size, long step)\n{\n  long *newSize;\n  long *newStride;\n  int d;\n\n  if(!src)\n    src = self;\n\n  THArgCheck( (src->nDimension > 0), 1, \"cannot unfold an empty tensor\");\n  THArgCheck((dimension >= 0) && (dimension < src->nDimension), 2, \"out of range\");\n  THArgCheck(size <= src->size[dimension], 3, \"out of range\");\n  THArgCheck(step > 0, 4, \"invalid step\");\n\n  THTensor_(set)(self, src);\n\n  newSize = THAlloc(sizeof(long)*(self->nDimension+1));\n  newStride = THAlloc(sizeof(long)*(self->nDimension+1));\n\n  newSize[self->nDimension] = size;\n  newStride[self->nDimension] = self->stride[dimension];\n  for(d = 0; d < self->nDimension; d++)\n  {\n    if(d == dimension)\n    {\n      newSize[d] = (self->size[d] - size) / step + 1;\n      newStride[d] = step*self->stride[d];\n    }\n    else\n    {\n      newSize[d] = self->size[d];\n      newStride[d] = self->stride[d];\n    }\n  }\n\n  THFree(self->size);\n  THFree(self->stride);\n\n  self->size = newSize;\n  self->stride = newStride;\n  self->nDimension++;\n}\n\n/* we have to handle the case where the result is a number */\nvoid THTensor_(squeeze)(THTensor *self, THTensor *src)\n{\n  int ndim = 0;\n  int d;\n\n  if(!src)\n    src = self;\n\n  THTensor_(set)(self, src);\n\n  for(d = 0; d < src->nDimension; d++)\n  {\n    if(src->size[d] != 1)\n    {\n      if(d != ndim)\n      {\n        self->size[ndim] = src->size[d];\n        self->stride[ndim] = src->stride[d];\n      }\n      ndim++;\n    }\n  }\n\n  /* right now, we do not handle 0-dimension tensors */\n  if(ndim == 0 && src->nDimension > 0)\n  {\n    self->size[0] = 1;\n    self->stride[0] = 1;\n    ndim = 1;\n  }\n  self->nDimension = ndim;\n}\n\nvoid THTensor_(squeeze1d)(THTensor *self, THTensor *src, int dimension)\n{\n  int d;\n\n  if(!src)\n    src = self;\n\n  THArgCheck((dimension >= 0) && (dimension < src->nDimension), 2, \"dimension out of range\");\n\n  THTensor_(set)(self, src);\n\n  if(src->size[dimension] == 1 && src->nDimension > 1)\n  {\n    for(d = dimension; d < self->nDimension-1; d++)\n    {\n      self->size[d] = self->size[d+1];\n      self->stride[d] = self->stride[d+1];\n    }\n    self->nDimension--;\n  }\n}\n\nvoid THTensor_(unsqueeze1d)(THTensor *self, THTensor *src, int dimension)\n{\n  int d;\n\n  if(!src)\n    src = self;\n\n  THArgCheck((dimension >= 0) && (dimension <= src->nDimension), 2, \"dimension out of range\");\n  THArgCheck(src->nDimension > 0, 2, \"cannot unsqueeze empty tensor\");\n\n  THTensor_(set)(self, src);\n\n  self->size = (long*)THRealloc(self->size, sizeof(long)*(self->nDimension+1));\n  self->stride = (long*)THRealloc(self->stride, sizeof(long)*(self->nDimension+1));\n  self->nDimension++;\n  for (d = self->nDimension-1; d > dimension; d--) {\n    self->size[d] = self->size[d-1];\n    self->stride[d] = self->stride[d-1];\n  }\n  if (dimension+1 < self->nDimension) {\n    self->stride[dimension] = self->size[dimension+1] * self->stride[dimension+1];\n  } else {\n    self->stride[dimension] = 1;\n  }\n  self->size[dimension] = 1;\n}\n\nint THTensor_(isTransposed)(const THTensor *self)\n{\n  if (THTensor_(isContiguous)(self)) {\n    return 0;\n  }\n  long max_stride = 1;\n  long size_max_stride = 1;\n  long z = 1;\n  int d;\n  for (d = 0; d < self->nDimension; ++d) {\n    if (self->stride[d] == 0 && self->size[d] != 1)\n      return 0;\n    if (self->stride[d] > max_stride) {\n      max_stride = self->stride[d];\n      size_max_stride = self->size[d];\n    }\n    z *= self->size[d];\n  }\n  if (z == max_stride * size_max_stride) {\n    return 1;\n  }\n  return 0;\n}\n\nint THTensor_(isContiguous)(const THTensor *self)\n{\n  long z = 1;\n  int d;\n  for(d = self->nDimension-1; d >= 0; d--)\n  {\n    if(self->size[d] != 1)\n    {\n      if(self->stride[d] == z)\n        z *= self->size[d];\n      else\n        return 0;\n    }\n  }\n  return 1;\n}\n\nint THTensor_(isSize)(const THTensor *self, const THLongStorage *dims)\n{\n  int d;\n  if (self->nDimension != dims->size)\n    return 0;\n\n  for(d = 0; d < self->nDimension; ++d)\n  {\n    if(self->size[d] != dims->data[d])\n      return 0;\n  }\n  return 1;\n}\n\nint THTensor_(isSameSizeAs)(const THTensor *self, const THTensor* src)\n{\n  int d;\n  if (self->nDimension != src->nDimension)\n    return 0;\n  for(d = 0; d < self->nDimension; ++d)\n  {\n    if(self->size[d] != src->size[d])\n      return 0;\n  }\n  return 1;\n}\n\nint THTensor_(isSetTo)(const THTensor *self, const THTensor* src)\n{\n  if (!self->storage)\n    return 0;\n  if (self->storage == src->storage &&\n      self->storageOffset == src->storageOffset &&\n      self->nDimension == src->nDimension)\n  {\n    int d;\n    for (d = 0; d < self->nDimension; ++d)\n    {\n      if (self->size[d] != src->size[d] || self->stride[d] != src->stride[d])\n        return 0;\n    }\n    return 1;\n  }\n  return 0;\n}\n\nptrdiff_t THTensor_(nElement)(const THTensor *self)\n{\n  if(self->nDimension == 0)\n    return 0;\n  else\n  {\n    ptrdiff_t nElement = 1;\n    int d;\n    for(d = 0; d < self->nDimension; d++)\n      nElement *= self->size[d];\n    return nElement;\n  }\n}\n\nvoid THTensor_(retain)(THTensor *self)\n{\n  if(self->flag & TH_TENSOR_REFCOUNTED)\n    THAtomicIncrementRef(&self->refcount);\n}\n\nvoid THTensor_(free)(THTensor *self)\n{\n  if(!self)\n    return;\n\n  if(self->flag & TH_TENSOR_REFCOUNTED)\n  {\n    if(THAtomicDecrementRef(&self->refcount))\n    {\n      THFree(self->size);\n      THFree(self->stride);\n      if(self->storage)\n        THStorage_(free)(self->storage);\n      THFree(self);\n    }\n  }\n}\n\nvoid THTensor_(freeCopyTo)(THTensor *self, THTensor *dst)\n{\n  if(self != dst)\n    THTensor_(copy)(dst, self);\n\n  THTensor_(free)(self);\n}\n\n/*******************************************************************************/\n\nstatic void THTensor_(rawInit)(THTensor *self)\n{\n  self->refcount = 1;\n  self->storage = NULL;\n  self->storageOffset = 0;\n  self->size = NULL;\n  self->stride = NULL;\n  self->nDimension = 0;\n  self->flag = TH_TENSOR_REFCOUNTED;\n}\n\nvoid THTensor_(setStorageNd)(THTensor *self, THStorage *storage, ptrdiff_t storageOffset, int nDimension, long *size, long *stride)\n{\n  /* storage */\n  if(self->storage != storage)\n  {\n    if(self->storage)\n      THStorage_(free)(self->storage);\n\n    if(storage)\n    {\n      self->storage = storage;\n      THStorage_(retain)(self->storage);\n    }\n    else\n      self->storage = NULL;\n  }\n\n  /* storageOffset */\n  if(storageOffset < 0)\n    THError(\"Tensor: invalid storage offset\");\n  self->storageOffset = storageOffset;\n\n  /* size and stride */\n  THTensor_(resizeNd)(self, nDimension, size, stride);\n}\n\nvoid THTensor_(resizeNd)(THTensor *self, int nDimension, long *size, long *stride)\n{\n  int d;\n  int nDimension_;\n  ptrdiff_t totalSize;\n  int hascorrectsize = 1;\n\n  nDimension_ = 0;\n  for(d = 0; d < nDimension; d++)\n  {\n    if(size[d] > 0)\n    {\n      nDimension_++;\n      if((self->nDimension > d) && (size[d] != self->size[d]))\n        hascorrectsize = 0;\n\n      if((self->nDimension > d) && stride && (stride[d] >= 0) && (stride[d] != self->stride[d]))\n        hascorrectsize = 0;\n    }\n    else\n      break;\n  }\n  nDimension = nDimension_;\n\n  if(nDimension != self->nDimension)\n    hascorrectsize = 0;\n\n  if(hascorrectsize)\n    return;\n\n  if(nDimension > 0)\n  {\n    if(nDimension != self->nDimension)\n    {\n      self->size = THRealloc(self->size, sizeof(long)*nDimension);\n      self->stride = THRealloc(self->stride, sizeof(long)*nDimension);\n      self->nDimension = nDimension;\n    }\n\n    totalSize = 1;\n    for(d = self->nDimension-1; d >= 0; d--)\n    {\n      self->size[d] = size[d];\n      if(stride && (stride[d] >= 0) )\n        self->stride[d] = stride[d];\n      else\n      {\n        if(d == self->nDimension-1)\n          self->stride[d] = 1;\n        else\n          self->stride[d] = self->size[d+1]*self->stride[d+1];\n      }\n      totalSize += (self->size[d]-1)*self->stride[d];\n    }\n\n    if(totalSize+self->storageOffset > 0)\n    {\n      if(!self->storage)\n        self->storage = THStorage_(new)();\n      if(totalSize+self->storageOffset > self->storage->size)\n        THStorage_(resize)(self->storage, totalSize+self->storageOffset);\n    }\n  }\n  else\n    self->nDimension = 0;\n}\n\nvoid THTensor_(set1d)(THTensor *tensor, long x0, real value)\n{\n  THArgCheck(tensor->nDimension == 1, 1, \"tensor must have one dimension\");\n  THArgCheck( (x0 >= 0) && (x0 < tensor->size[0]), 2, \"out of range\");\n  THStorage_(set)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0], value);\n}\n\nreal THTensor_(get1d)(const THTensor *tensor, long x0)\n{\n  THArgCheck(tensor->nDimension == 1, 1, \"tensor must have one dimension\");\n  THArgCheck( (x0 >= 0) && (x0 < tensor->size[0]), 2, \"out of range\");\n  return THStorage_(get)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0]);\n}\n\nvoid THTensor_(set2d)(THTensor *tensor, long x0, long x1, real value)\n{\n  THArgCheck(tensor->nDimension == 2, 1, \"tensor must have two dimensions\");\n  THArgCheck((x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]), 2, \"out of range\");\n  THStorage_(set)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1], value);\n}\n\nreal THTensor_(get2d)(const THTensor *tensor, long x0, long x1)\n{\n  THArgCheck(tensor->nDimension == 2, 1, \"tensor must have two dimensions\");\n  THArgCheck((x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]), 2, \"out of range\");\n  return THStorage_(get)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]);\n}\n\nvoid THTensor_(set3d)(THTensor *tensor, long x0, long x1, long x2, real value)\n{\n  THArgCheck(tensor->nDimension == 3, 1, \"tensor must have three dimensions\");\n  THArgCheck( (x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]) && (x2 >= 0) && (x2 < tensor->size[2]), 2, \"out of range\");\n  THStorage_(set)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2], value);\n}\n\nreal THTensor_(get3d)(const THTensor *tensor, long x0, long x1, long x2)\n{\n  THArgCheck(tensor->nDimension == 3, 1, \"tensor must have three dimensions\");\n  THArgCheck( (x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]) && (x2 >= 0) && (x2 < tensor->size[2]), 2, \"out of range\");\n  return THStorage_(get)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]);\n}\n\nvoid THTensor_(set4d)(THTensor *tensor, long x0, long x1, long x2, long x3, real value)\n{\n  THArgCheck(tensor->nDimension == 4, 1, \"tensor must have four dimensions\");\n  THArgCheck((x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]) && (x2 >= 0) && (x2 < tensor->size[2]) && (x3 >= 0) && (x3 < tensor->size[3]), 2, \"out of range\");\n  THStorage_(set)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]+x3*tensor->stride[3], value);\n}\n\nreal THTensor_(get4d)(const THTensor *tensor, long x0, long x1, long x2, long x3)\n{\n  THArgCheck(tensor->nDimension == 4, 1, \"tensor must have four dimensions\");\n  THArgCheck((x0 >= 0) && (x0 < tensor->size[0]) && (x1 >= 0) && (x1 < tensor->size[1]) && (x2 >= 0) && (x2 < tensor->size[2]) && (x3 >= 0) && (x3 < tensor->size[3]), 2, \"out of range\");\n  return THStorage_(get)(tensor->storage, tensor->storageOffset+x0*tensor->stride[0]+x1*tensor->stride[1]+x2*tensor->stride[2]+x3*tensor->stride[3]);\n}\n\nTHDescBuff THTensor_(desc)(const THTensor *tensor) {\n  const int L = TH_DESC_BUFF_LEN;\n  THDescBuff buf;\n  char *str = buf.str;\n  int n = 0;\n#define _stringify(x) #x\n  n += snprintf(str, L-n, \"torch.\" _stringify(x) \"Tensor of size \");\n#undef _stringify\n  int i;\n  for(i = 0; i < tensor->nDimension; i++) {\n    if(n >= L) break;\n    n += snprintf(str+n, L-n, \"%ld\", tensor->size[i]);\n    if(i < tensor->nDimension-1) {\n      n += snprintf(str+n, L-n, \"x\");\n    }\n  }\n  if(n >= L) {\n    snprintf(str+L-4, 4, \"...\");\n  }\n  return buf;\n}\n\nTHDescBuff THTensor_(sizeDesc)(const THTensor *tensor) {\n  THLongStorage *size = THTensor_(newSizeOf)((THTensor*)tensor);\n  THDescBuff buf = THLongStorage_sizeDesc(size);\n  THLongStorage_free(size);\n  return buf;\n}\n\n#endif\n"
  },
  {
    "path": "lib/TH/generic/THTensor.h",
    "content": "#ifndef TH_GENERIC_FILE\n#define TH_GENERIC_FILE \"generic/THTensor.h\"\n#else\n\n/* a la lua? dim, storageoffset, ...  et les methodes ? */\n\n#define TH_TENSOR_REFCOUNTED 1\n\ntypedef struct THTensor\n{\n    long *size;\n    long *stride;\n    int nDimension;\n\n    THStorage *storage;\n    ptrdiff_t storageOffset;\n    int refcount;\n\n    char flag;\n\n} THTensor;\n\n\n/**** access methods ****/\nTH_API THStorage* THTensor_(storage)(const THTensor *self);\nTH_API ptrdiff_t THTensor_(storageOffset)(const THTensor *self);\nTH_API int THTensor_(nDimension)(const THTensor *self);\nTH_API long THTensor_(size)(const THTensor *self, int dim);\nTH_API long THTensor_(stride)(const THTensor *self, int dim);\nTH_API THLongStorage *THTensor_(newSizeOf)(THTensor *self);\nTH_API THLongStorage *THTensor_(newStrideOf)(THTensor *self);\nTH_API real *THTensor_(data)(const THTensor *self);\n\nTH_API void THTensor_(setFlag)(THTensor *self, const char flag);\nTH_API void THTensor_(clearFlag)(THTensor *self, const char flag);\n\n\n/**** creation methods ****/\nTH_API THTensor *THTensor_(new)(void);\nTH_API THTensor *THTensor_(newWithTensor)(THTensor *tensor);\n/* stride might be NULL */\nTH_API THTensor *THTensor_(newWithStorage)(THStorage *storage_, ptrdiff_t storageOffset_, THLongStorage *size_, THLongStorage *stride_);\nTH_API THTensor *THTensor_(newWithStorage1d)(THStorage *storage_, ptrdiff_t storageOffset_,\n                                long size0_, long stride0_);\nTH_API THTensor *THTensor_(newWithStorage2d)(THStorage *storage_, ptrdiff_t storageOffset_,\n                                long size0_, long stride0_,\n                                long size1_, long stride1_);\nTH_API THTensor *THTensor_(newWithStorage3d)(THStorage *storage_, ptrdiff_t storageOffset_,\n                                long size0_, long stride0_,\n                                long size1_, long stride1_,\n                                long size2_, long stride2_);\nTH_API THTensor *THTensor_(newWithStorage4d)(THStorage *storage_, ptrdiff_t storageOffset_,\n                                long size0_, long stride0_,\n                                long size1_, long stride1_,\n                                long size2_, long stride2_,\n                                long size3_, long stride3_);\n\n/* stride might be NULL */\nTH_API THTensor *THTensor_(newWithSize)(THLongStorage *size_, THLongStorage *stride_);\nTH_API THTensor *THTensor_(newWithSize1d)(long size0_);\nTH_API THTensor *THTensor_(newWithSize2d)(long size0_, long size1_);\nTH_API THTensor *THTensor_(newWithSize3d)(long size0_, long size1_, long size2_);\nTH_API THTensor *THTensor_(newWithSize4d)(long size0_, long size1_, long size2_, long size3_);\n\nTH_API THTensor *THTensor_(newClone)(THTensor *self);\nTH_API THTensor *THTensor_(newContiguous)(THTensor *tensor);\nTH_API THTensor *THTensor_(newSelect)(THTensor *tensor, int dimension_, long sliceIndex_);\nTH_API THTensor *THTensor_(newNarrow)(THTensor *tensor, int dimension_, long firstIndex_, long size_);\nTH_API THTensor *THTensor_(newTranspose)(THTensor *tensor, int dimension1_, int dimension2_);\nTH_API THTensor *THTensor_(newUnfold)(THTensor *tensor, int dimension_, long size_, long step_);\nTH_API THTensor *THTensor_(newView)(THTensor *tensor, THLongStorage *size);\nTH_API THTensor *THTensor_(newExpand)(THTensor *tensor, THLongStorage *size);\n\nTH_API void THTensor_(expand)(THTensor *r, THTensor *tensor, THLongStorage *size);\nTH_API void THTensor_(expandNd)(THTensor **rets, THTensor **ops, int count);\n\nTH_API void THTensor_(resize)(THTensor *tensor, THLongStorage *size, THLongStorage *stride);\nTH_API void THTensor_(resizeAs)(THTensor *tensor, THTensor *src);\nTH_API void THTensor_(resizeNd)(THTensor *tensor, int nDimension, long *size, long *stride);\nTH_API void THTensor_(resize1d)(THTensor *tensor, long size0_);\nTH_API void THTensor_(resize2d)(THTensor *tensor, long size0_, long size1_);\nTH_API void THTensor_(resize3d)(THTensor *tensor, long size0_, long size1_, long size2_);\nTH_API void THTensor_(resize4d)(THTensor *tensor, long size0_, long size1_, long size2_, long size3_);\nTH_API void THTensor_(resize5d)(THTensor *tensor, long size0_, long size1_, long size2_, long size3_, long size4_);\n\nTH_API void THTensor_(set)(THTensor *self, THTensor *src);\nTH_API void THTensor_(setStorage)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, THLongStorage *size_, THLongStorage *stride_);\nTH_API void THTensor_(setStorageNd)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_, int nDimension, long *size, long *stride);\nTH_API void THTensor_(setStorage1d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_,\n                                    long size0_, long stride0_);\nTH_API void THTensor_(setStorage2d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_,\n                                    long size0_, long stride0_,\n                                    long size1_, long stride1_);\nTH_API void THTensor_(setStorage3d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_,\n                                    long size0_, long stride0_,\n                                    long size1_, long stride1_,\n                                    long size2_, long stride2_);\nTH_API void THTensor_(setStorage4d)(THTensor *self, THStorage *storage_, ptrdiff_t storageOffset_,\n                                    long size0_, long stride0_,\n                                    long size1_, long stride1_,\n                                    long size2_, long stride2_,\n                                    long size3_, long stride3_);\n\nTH_API void THTensor_(narrow)(THTensor *self, THTensor *src, int dimension_, long firstIndex_, long size_);\nTH_API void THTensor_(select)(THTensor *self, THTensor *src, int dimension_, long sliceIndex_);\nTH_API void THTensor_(transpose)(THTensor *self, THTensor *src, int dimension1_, int dimension2_);\nTH_API void THTensor_(unfold)(THTensor *self, THTensor *src, int dimension_, long size_, long step_);\n\nTH_API void THTensor_(squeeze)(THTensor *self, THTensor *src);\nTH_API void THTensor_(squeeze1d)(THTensor *self, THTensor *src, int dimension_);\nTH_API void THTensor_(unsqueeze1d)(THTensor *self, THTensor *src, int dimension_);\n\nTH_API int THTensor_(isContiguous)(const THTensor *self);\nTH_API int THTensor_(isSameSizeAs)(const THTensor *self, const THTensor *src);\nTH_API int THTensor_(isSetTo)(const THTensor *self, const THTensor *src);\nTH_API int THTensor_(isSize)(const THTensor *self, const THLongStorage *dims);\nTH_API ptrdiff_t THTensor_(nElement)(const THTensor *self);\n\nTH_API void THTensor_(retain)(THTensor *self);\nTH_API void THTensor_(free)(THTensor *self);\nTH_API void THTensor_(freeCopyTo)(THTensor *self, THTensor *dst);\n\n/* Slow access methods [check everything] */\nTH_API void THTensor_(set1d)(THTensor *tensor, long x0, real value);\nTH_API void THTensor_(set2d)(THTensor *tensor, long x0, long x1, real value);\nTH_API void THTensor_(set3d)(THTensor *tensor, long x0, long x1, long x2, real value);\nTH_API void THTensor_(set4d)(THTensor *tensor, long x0, long x1, long x2, long x3, real value);\n\nTH_API real THTensor_(get1d)(const THTensor *tensor, long x0);\nTH_API real THTensor_(get2d)(const THTensor *tensor, long x0, long x1);\nTH_API real THTensor_(get3d)(const THTensor *tensor, long x0, long x1, long x2);\nTH_API real THTensor_(get4d)(const THTensor *tensor, long x0, long x1, long x2, long x3);\n\n/* Debug methods */\nTH_API THDescBuff THTensor_(desc)(const THTensor *tensor);\nTH_API THDescBuff THTensor_(sizeDesc)(const THTensor *tensor);\n\n#endif\n"
  },
  {
    "path": "lib/TH/generic/THTensorConv.c",
    "content": "#ifndef TH_GENERIC_FILE\n#define TH_GENERIC_FILE \"generic/THTensorConv.c\"\n#else\n\n/*\n  2D Input, 2D kernel  : convolve given image with the given kernel.\n*/\nvoid THTensor_(validXCorr2Dptr)(real *r_,\n                                       real alpha,\n                                       real *t_, long ir, long ic,\n                                       real *k_, long kr, long kc,\n                                       long sr, long sc)\n{\n  long or = (ir - kr) / sr + 1;\n  long oc = (ic - kc) / sc + 1;\n\n  long xx, yy, kx, ky;\n\n  if ((sc != 1) || (oc < 4))  {\n    /* regular convolution */\n    for(yy = 0; yy < or; yy++) {\n      for(xx = 0; xx < oc; xx++) {\n        /* Dot product in two dimensions... (between input image and the mask) */\n        real *pi_ = t_ + yy*sr*ic + xx*sc;\n        real *pw_ = k_;\n        real sum = 0;\n        for(ky = 0; ky < kr; ky++) {\n          for(kx = 0; kx < kc; kx++) {\n            sum += pi_[kx]*pw_[kx];\n          }\n          pi_ += ic; /* next input line */\n          pw_ += kc; /* next mask line */\n        }\n        /* Update output */\n        *r_++ += alpha*sum;\n      }\n    }\n\n  } else {\n    /* SSE-based convolution */\n    for(yy = 0; yy < or; yy++) {\n      real *pi_ = t_ + yy*sr*ic;\n      real *pw_ = k_;\n      for (ky = 0; ky < kr; ky++) {\n        real *pis_ = pi_;\n        for (kx = 0; kx < kc; kx++) {\n          THVector_(cadd)(r_, r_, pis_, alpha*pw_[kx], oc);\n          pis_++;\n        }\n        pi_ += ic; /* next input line */\n        pw_ += kc; /* next mask line */\n      }\n      r_ += oc;\n    }\n  }\n}\n\n/*\n  2D Input, 2D kernel  : convolve given image with the given kernel.\n*/\nvoid THTensor_(validConv2Dptr)(real *r_,\n                                      real alpha,\n                                      real *t_, long ir, long ic,\n                                      real *k_, long kr, long kc,\n                                      long sr, long sc)\n{\n  long or = (ir - kr) / sr + 1;\n  long oc = (ic - kc) / sc + 1;\n\n  long xx, yy, kx, ky;\n\n  if ((sc != 1) || (oc < 4))  {\n    /* regular convolution */\n    for(yy = 0; yy < or; yy++) {\n      for(xx = 0; xx < oc; xx++) {\n        /* Dot product in two dimensions... (between input image and the mask) */\n        real *pi_ = t_ + yy*sr*ic + xx*sc;\n        real *pw_ = k_ + kr*kc - 1;\n        real sum = 0;\n        for(ky = 0; ky < kr; ky++) {\n          for(kx = 0; kx < kc; kx++) {\n            sum += pi_[kx]*pw_[-kx];\n          }\n          pi_ += ic; /* next input line */\n          pw_ -= kc; /* next mask line */\n        }\n        /* Update output */\n        *r_++ += alpha*sum;\n      }\n    }\n\n  } else {\n    /* SSE-based convolution */\n    for(yy = 0; yy < or; yy++) {\n      real *pw_ = k_ + kr*kc - 1;\n      real *pi_ = t_ + yy*sr*ic;\n      for (ky = 0; ky < kr; ky++) {\n        real *pis_ = pi_;\n        for (kx = 0; kx < kc; kx++) {\n          THVector_(cadd)(r_, r_, pis_, alpha*pw_[-kx], oc);\n          pis_++;\n        }\n        pi_ += ic; /* next input line */\n        pw_ -= kc; /* next mask line */\n      }\n      r_ += oc;\n    }\n  }\n}\n\n/*\n  2D Input, 2D kernel  : convolve given image with the given kernel, full convolution.\n*/\nvoid THTensor_(fullConv2Dptr)(real *r_,\n                                     real alpha,\n                                     real *t_, long ir, long ic,\n                                     real *k_, long kr, long kc,\n                                     long sr, long sc)\n{\n  long oc = (ic - 1) * sc + kc;\n\n  long xx, yy, kx, ky;\n\n  if ((sc != 1) || (ic < 4))  {\n    /* regular convolution */\n    for(yy = 0; yy < ir; yy++) {\n      for(xx = 0; xx < ic; xx++) {\n        /* Outer product in two dimensions... (between input image and the mask) */\n        real *po_ = r_ + yy*sr*oc + xx*sc;\n        real *pw_ = k_;\n        for(ky = 0; ky < kr; ky++)\n        {\n          real z = *t_ * alpha;\n          for(kx = 0; kx < kc; kx++) {\n            po_[kx] += z * pw_[kx];\n          }\n          po_ += oc; /* next input line */\n          pw_ += kc; /* next mask line */\n        }\n        t_++;\n      }\n    }\n\n  } else {\n    /* SSE-based convolution */\n    for(yy = 0; yy < ir; yy++) {\n      real *po_ = r_ + yy*sr*oc;\n      real *pw_ = k_;\n      for (ky = 0; ky < kr; ky++) {\n        real *pos_ = po_;\n        for (kx = 0; kx < kc; kx++) {\n          THVector_(cadd)(pos_, pos_, t_, alpha*pw_[kx], ic);\n          pos_++;\n        }\n        po_ += oc; /* next input line */\n        pw_ += kc; /* next mask line */\n      }\n      t_ += ic;\n    }\n  }\n}\n\n/*\n  2D Input, 2D kernel  : convolve given image with the given kernel, full convolution.\n*/\nvoid THTensor_(fullXCorr2Dptr)(real *r_,\n                                      real alpha,\n                                      real *t_, long ir, long ic,\n                                      real *k_, long kr, long kc,\n                                      long sr, long sc)\n{\n  long oc = (ic - 1) * sc + kc;\n\n  long xx, yy, kx, ky;\n\n  if ((sc != 1) || (ic < 4))  {\n    /* regular convolution */\n    for(yy = 0; yy < ir; yy++) {\n      for(xx = 0; xx < ic; xx++) {\n        /* Outer product in two dimensions... (between input image and the mask) */\n        real *po_ = r_ + yy*sr*oc + xx*sc;\n        real *pw_ = k_ + kr*kc -1;\n        long kx, ky;\n        for(ky = 0; ky < kr; ky++)\n        {\n          real z = *t_ * alpha;\n          for(kx = 0; kx < kc; kx++) {\n            po_[kx] += z * pw_[-kx];\n          }\n          po_ += oc; /* next input line */\n          pw_ -= kc; /* next mask line */\n        }\n        t_++;\n      }\n    }\n\n  } else {\n    /* SSE-based convolution */\n    for(yy = 0; yy < ir; yy++) {\n      real *po_ = r_ + yy*sr*oc;\n      real *pw_ = k_ + kr*kc -1;\n      for (ky = 0; ky < kr; ky++) {\n        real *pos_ = po_;\n        for (kx = 0; kx < kc; kx++) {\n          THVector_(cadd)(pos_, pos_, t_, pw_[-kx]*alpha, ic);\n          pos_++;\n        }\n        po_ += oc; /* next input line */\n        pw_ -= kc; /* next mask line */\n      }\n      t_ += ic;\n    }\n  }\n}\n\n/*\n  2D Input, 2D kernel  : convolve given image with the given kernel, valid convolution.\n  for sr,sc=1 this is equivalent to validXCorr2Dptr, but otherwise it is useful for\n  calculating derivatives wrt a kernel that is applied with stride sr,sc != 1\n*/\nvoid THTensor_(validXCorr2DRevptr)(real *r_,\n                                          real alpha,\n                                          real *t_, long ir, long ic,\n                                          real *k_, long kr, long kc,\n                                          long sr, long sc)\n{\n  long or = ir - (kr - 1) * sr;\n  long oc = ic - (kc - 1) * sc;\n\n  long xx, yy, kx, ky;\n\n  if ((sc != 1) || (kc < 4))  {\n    /* regular convolution */\n    for(yy = 0; yy < kr; yy++) {\n      for(xx = 0; xx < kc; xx++) {\n        real *po_ = r_;\n        real *pi_ = t_ + yy*sr*ic + xx*sc;\n        real z = *k_++ * alpha;\n\n        for(ky = 0; ky < or; ky++) {\n          for(kx = 0; kx < oc; kx++)\n            po_[kx] += z * pi_[kx];\n          pi_ += ic;\n          po_ += oc;\n        }\n      }\n    }\n\n  } else {\n    /* SSE-based convolution */\n    for(yy = 0; yy < kr; yy++) {\n      for(xx = 0; xx < kc; xx++) {\n        real *po_ = r_;\n        real *pi_ = t_ + yy*sr*ic + xx*sc;\n        real z = *k_++ * alpha;\n\n        for(ky = 0; ky < or; ky++) {\n          THVector_(cadd)(po_, po_, pi_, z, oc);\n          pi_ += ic;\n          po_ += oc;\n        }\n      }\n    }\n  }\n}\n/*\n  3D Input, 3D kernel  : convolve given volume with the given kernel.\n*/\nvoid THTensor_(validXCorr3Dptr)(real *r_,\n                                       real alpha,\n                                       real *t_, long it, long ir, long ic,\n                                       real *k_, long kt, long kr, long kc,\n                                       long st, long sr, long sc)\n{\n  long ot = (it - kt) / st + 1;\n  long or = (ir - kr) / sr + 1;\n  long oc = (ic - kc) / sc + 1;\n\n  long zz, xx, yy;\n\n  for (zz = 0; zz < ot; zz++)\n  {\n    for(yy = 0; yy < or; yy++)\n    {\n      for(xx = 0; xx < oc; xx++)\n      {\n        /* Dot product in two dimensions... (between input image and the mask) */\n        real *pi_ = t_ + zz*st*ir*ic + yy*sr*ic + xx*sc;\n        real *pw_ = k_;\n        real sum = 0;\n        long kz, kx, ky;\n        for(kz = 0; kz < kt; kz++)\n        {\n          for(ky = 0; ky < kr; ky++)\n          {\n            for(kx = 0; kx < kc; kx++) {\n              sum += pi_[kx]*pw_[kx];\n            }\n            pi_ += ic; /* next input line */\n            pw_ += kc; /* next mask line */\n          }\n          pi_ += (ir-kr)*ic; /* next input slice */\n        }\n        /* Update output */\n        *r_++ += sum*alpha;\n      }\n    }\n  }\n}\n\n/*\n  3D Input, 3D kernel  : convolve given volume with the given kernel.\n*/\nvoid THTensor_(validConv3Dptr)(real *r_,\n                                      real alpha,\n                                      real *t_, long it, long ir, long ic,\n                                      real *k_, long kt, long kr, long kc,\n                                      long st, long sr, long sc)\n{\n  long ot = (it - kt) / st + 1;\n  long or = (ir - kr) / sr + 1;\n  long oc = (ic - kc) / sc + 1;\n\n  long zz, xx, yy;\n\n  for(zz = 0; zz < ot; zz++)\n  {\n    for(yy = 0; yy < or; yy++)\n    {\n      for(xx = 0; xx < oc; xx++)\n      {\n        /* Dot product in two dimensions... (between input image and the mask) */\n        real *pi_ = t_ + zz*st*ir*ic + yy*sr*ic + xx*sc;\n        real *pw_ = k_ + kt*kr*kc - 1;\n        real sum = 0;\n        long kz, kx, ky;\n        for(kz = 0; kz < kt; kz++)\n        {\n          for(ky = 0; ky < kr; ky++)\n          {\n            for(kx = 0; kx < kc; kx++) {\n              sum += pi_[kx]*pw_[-kx];\n            }\n            pi_ += ic; /* next input line */\n            pw_ -= kc; /* next mask line */\n          }\n          pi_ += (ir-kr)*ic; /* next input slice */\n        }\n        /* Update output */\n        *r_++ += alpha*sum;\n      }\n    }\n  }\n}\n\n\n/*\n  3D Input, 3D kernel  : convolve given volume with the given kernel, full convolution.\n*/\nvoid THTensor_(fullConv3Dptr)(real *r_,\n                                     real alpha,\n                                     real *t_, long it, long ir, long ic,\n                                     real *k_, long kt, long kr, long kc,\n                                     long st, long sr, long sc)\n{\n  long or = (ir - 1) * sr + kr;\n  long oc = (ic - 1) * sc + kc;\n\n  long zz, xx, yy;\n\n  for(zz = 0; zz < it; zz++)\n  {\n    for(yy = 0; yy < ir; yy++)\n    {\n      for(xx = 0; xx < ic; xx++)\n      {\n        /* Outer product in two dimensions... (between input image and the mask) */\n        real *po_ = r_ + zz*st*or*oc + yy*sr*oc + xx*sc;\n        real *pw_ = k_;\n        long kz, kx, ky;\n        /* printf(\"Output Plane : %ld,%ld,%ld, input val=%g\\n\",zz,yy,xx,*t_); */\n        for(kz = 0; kz < kt; kz++)\n        {\n          for(ky = 0; ky < kr; ky++)\n          {\n            real z = *t_ * alpha;\n            for(kx = 0; kx < kc; kx++) {\n              /* printf(\"o=%g,k=%g,\" , po_[kx],pw_[kx]); */\n              po_[kx] += z * pw_[kx];\n              /* printf(\"o=%g \" , po_[kx]); */\n            }\n            /* printf(\"\\n\"); */\n            po_ += oc; /* next input line */\n            pw_ += kc; /* next mask line */\n          }\n          po_ += (or-kr)*oc; /* next output slice */\n          /* printf(\"\\n\"); */\n        }\n        t_++;\n      }\n    }\n  }\n}\n\n/*\n  3D Input, 3D kernel  : convolve given volume with the given kernel, full convolution.\n*/\nvoid THTensor_(fullXCorr3Dptr)(real *r_,\n                                      real alpha,\n                                      real *t_, long it, long ir, long ic,\n                                      real *k_, long kt, long kr, long kc,\n                                      long st, long sr, long sc)\n{\n  long or = (ir - 1) * sr + kr;\n  long oc = (ic - 1) * sc + kc;\n\n  long zz, xx, yy;\n\n  for(zz = 0; zz < it; zz++)\n  {\n    for(yy = 0; yy < ir; yy++)\n    {\n      for(xx = 0; xx < ic; xx++)\n      {\n        /* Outer product in two dimensions... (between input image and the mask) */\n        real *po_ = r_ + zz*st*or*oc + yy*sr*oc + xx*sc;\n        real *pw_ = k_ + kt*kr*kc -1;\n        long kz, kx, ky;\n        for(kz = 0; kz < kt; kz++)\n        {\n          for(ky = 0; ky < kr; ky++)\n          {\n            real z = *t_ * alpha;\n            for(kx = 0; kx < kc; kx++) {\n              po_[kx] += z * pw_[-kx];\n            }\n            po_ += oc; /* next input line */\n            pw_ -= kc; /* next mask line */\n          }\n          po_ += (or-kr)*oc; /* next output slice */\n        }\n        t_++;\n      }\n    }\n  }\n}\n\n/*\n  3D Input, 3D kernel  : convolve given image with the given kernel, valid convolution.\n  for sr,sc=1 this is equivalent to validXCorr3Dptr, but otherwise it is useful for\n  calculating derivatives wrt a kernel that is applied with stride sr,sc != 1\n*/\nvoid THTensor_(validXCorr3DRevptr)(real *r_,\n                                          real alpha,\n                                          real *t_, long it, long ir, long ic,\n                                          real *k_, long kt, long kr, long kc,\n                                          long st, long sr, long sc)\n{\n  long ot = it - (kt - 1) * st;\n  long or = ir - (kr - 1) * sr;\n  long oc = ic - (kc - 1) * sc;\n\n  long zz, xx, yy;\n  for(zz = 0; zz < kt; zz++)\n  {\n    for(yy = 0; yy < kr; yy++)\n    {\n      for(xx = 0; xx < kc; xx++)\n      {\n        real *po_ = r_;\n        real *pi_ = t_ + zz*st*ir*ic + yy*sr*ic + xx*sc;\n        real z = *k_++ * alpha;\n        long kz, kx, ky;\n        for(kz = 0; kz < ot; kz++)\n        {\n          for(ky = 0; ky < or; ky++)\n          {\n            for(kx = 0; kx < oc; kx++)\n              po_[kx] += z * pi_[kx];\n            pi_ += ic;\n            po_ += oc;\n          }\n          pi_ += (ir-or)*ic; /* next input slice */\n        }\n      }\n    }\n  }\n}\n\nvoid THTensor_(conv2d)(real* output_data,\n                       real alpha,\n                       real* ptr_input, long nInputRows, long nInputCols,\n                       real* ptr_weight, long nKernelRows, long nKernelCols,\n                       long srow, long scol,\n                       const char *vf, const char *xc)\n{\n  THArgCheck(*vf == 'V' || *vf == 'F', 7, \"type of convolution can be 'V' or 'F'\");\n  THArgCheck(*xc == 'C' || *xc == 'X', 7, \"type of convolution can be 'X' or 'C'\");\n  if (*vf == 'F')\n    if (*xc == 'X')\n      THTensor_(fullXCorr2Dptr)(output_data,\n                                alpha,\n                                ptr_input,  nInputRows,  nInputCols,\n                                ptr_weight, nKernelRows, nKernelCols,\n                                srow, scol);\n    else\n      THTensor_(fullConv2Dptr)(output_data,\n                               alpha,\n                               ptr_input,  nInputRows,  nInputCols,\n                               ptr_weight, nKernelRows, nKernelCols,\n                               srow, scol);\n  else\n    if (*xc == 'X')\n      THTensor_(validXCorr2Dptr)(output_data,\n                                 alpha,\n                                 ptr_input,  nInputRows,  nInputCols,\n                                 ptr_weight, nKernelRows, nKernelCols,\n                                 srow, scol);\n    else\n      THTensor_(validConv2Dptr)(output_data,\n                                alpha,\n                                ptr_input,  nInputRows,  nInputCols,\n                                ptr_weight, nKernelRows, nKernelCols,\n                                srow, scol);\n}\n\nvoid THTensor_(conv3d)(real* output_data,\n                       real alpha,\n                       real* ptr_input, long nInputDepth, long nInputRows, long nInputCols,\n                       real* ptr_weight, long nKernelDepth, long nKernelRows, long nKernelCols,\n                       long sdepth, long srow, long scol,\n                       const char *vf, const char *xc)\n{\n  THArgCheck(*vf == 'V' || *vf == 'F', 7, \"type of convolution can be 'V' or 'F'\");\n  THArgCheck(*xc == 'C' || *xc == 'X', 7, \"type of convolution can be 'X' or 'C'\");\n  if (*vf == 'F')\n    if (*xc == 'X')\n      THTensor_(fullXCorr3Dptr)(output_data,\n                                alpha,\n                                ptr_input, nInputDepth, nInputRows,  nInputCols,\n                                ptr_weight, nKernelDepth, nKernelRows, nKernelCols,\n                                sdepth, srow, scol);\n    else\n      THTensor_(fullConv3Dptr)(output_data,\n                               alpha,\n                               ptr_input, nInputDepth, nInputRows,  nInputCols,\n                               ptr_weight, nKernelDepth, nKernelRows, nKernelCols,\n                               sdepth, srow, scol);\n  else\n    if (*xc == 'X')\n      THTensor_(validXCorr3Dptr)(output_data,\n                                 alpha,\n                                 ptr_input, nInputDepth, nInputRows,  nInputCols,\n                                 ptr_weight, nKernelDepth, nKernelRows, nKernelCols,\n                                 sdepth, srow, scol);\n    else\n      THTensor_(validConv3Dptr)(output_data,\n                                alpha,\n                                ptr_input, nInputDepth, nInputRows,  nInputCols,\n                                ptr_weight, nKernelDepth, nKernelRows, nKernelCols,\n                                sdepth, srow, scol);\n}\n\nlong THTensor_(convsize)(long x, long k, long s, const char* vf)\n{\n  THArgCheck(*vf == 'V' || *vf == 'F', 1, \"type of convolution can be 'V' or 'F'\");\n  if (*vf == 'V')\n    return (x-k)/s + 1;\n  else\n    return (x-1)*s + k;\n}\n\n\n/*\n  3D input, 3D kernel, 4D output\n  like rank1 update\n  A <- xx' + beta*A\n  for sr,sc=1 this is equivalent to conv2Dger, but otherwise it is useful for\n  calculating derivatives wrt a kernel that is applied with stride sr,sc != 1\n*/\nvoid THTensor_(conv2DRevger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol)\n{\n  long nInputPlane, nInputRows, nInputCols;\n  long nKernelPlane, nKernelRows, nKernelCols;\n  long nOutputPlane, nOutputRows, nOutputCols;\n  long istride0, kstride0;\n  THTensor *input;\n  THTensor *kernel;\n  real *input_data;\n  real *weight_data;\n  real *output_data;\n  ptrdiff_t nelem;\n  long k;\n\n  THArgCheck(t_->nDimension == 3 , 3, \"input: 3D Tensor expected\");\n  THArgCheck(k_->nDimension == 3 , 4, \"kernel: 3D Tensor expected\");\n  THArgCheck(srow >= 1, 5, \"Stride should be a positive integer\");\n  THArgCheck(scol >= 1, 6, \"Stride should be a positive integer\");\n\n  input = THTensor_(newContiguous)(t_);\n  kernel = THTensor_(newContiguous)(k_);\n\n  nInputPlane = input->size[0];\n  istride0    = input->stride[0];\n  nInputRows  = input->size[1];\n  nInputCols  = input->size[2];\n\n  kstride0 = kernel->stride[0];\n  nKernelPlane = kernel->size[0];\n  nKernelRows = kernel->size[1];\n  nKernelCols = kernel->size[2];\n  nOutputPlane = nInputPlane * kernel->size[0];\n\n  THArgCheck(nInputRows >= nKernelRows && nInputCols >= nKernelCols , 2, \"covn2DRevger : Input image is smaller than kernel\");\n\n  nOutputRows = nInputRows - (nKernelRows - 1) * srow;\n  nOutputCols = nInputCols - (nKernelCols - 1) * scol;\n\n  nelem = THTensor_(nElement)(r_);\n  THTensor_(resize4d)(r_,nKernelPlane, nInputPlane, nOutputRows, nOutputCols);\n\n  input_data = THTensor_(data)(input);\n  weight_data = THTensor_(data)(kernel);\n  output_data = THTensor_(data)(r_);\n\n  if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))\n  {\n    /*THTensor_(zero)(r_);*/\n\n#pragma omp parallel for private(k)\n    for (k = 0; k < r_->size[0]*r_->size[1]; k++)\n    {\n      real* ptr_output = output_data + k*nOutputCols*nOutputRows;\n      long l;\n      for (l = 0; l < nOutputRows*nOutputCols; l++)\n        ptr_output[l] = 0.0;\n    }\n  }\n  else if (beta != 1)\n  {\n    /*THTensor_(mul)(r_, beta);*/\n#pragma omp parallel for private(k)\n    for (k = 0; k < r_->size[0]*r_->size[1]; k++)\n    {\n      real* ptr_output = output_data + k*nOutputCols*nOutputRows;\n      long l;\n      for (l = 0; l < nOutputRows*nOutputCols; l++)\n        ptr_output[l] *= beta;\n    }\n  }\n\n#pragma omp parallel for private(k)\n  for(k = 0; k < nKernelPlane; k++)\n  {\n    long i;\n    /* get kernel */\n    real *ptr_weight = weight_data+k*kstride0;\n\n    for(i = 0; i < nInputPlane; i++)\n    {\n      /* get output */\n      real *ptr_output = output_data + k*nInputPlane*nOutputCols*nOutputRows + i*nOutputCols*nOutputRows;\n      /* get input */\n      real *ptr_input = input_data+i*istride0;\n\n      /* do image, kernel convolution */\n      THTensor_(validXCorr2DRevptr)(ptr_output,\n                                    alpha,\n                                    ptr_input,  nInputRows,  nInputCols,\n                                    ptr_weight, nKernelRows, nKernelCols,\n                                    srow, scol);\n      /* Next output plane */\n      /* output_data += nOutputCols*nOutputRows; */\n    }\n  }\n  THTensor_(free)(input);\n  THTensor_(free)(kernel);\n}\n\n\n/*\n  3D input, 3D kernel, 4D output\n  like rank1 update\n  A <- xx' + beta*A\n  for sr,sc=1 this is equivalent to conv2Dger, but otherwise it is useful for\n  calculating derivatives wrt a kernel that is applied with stride sr,sc != 1\n*/\nvoid THTensor_(conv2DRevgerm)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol)\n{\n  long nbatch, nInputPlane, nInputRows, nInputCols;\n  long nKernelPlane, nKernelRows, nKernelCols;\n  long nOutputRows, nOutputCols;\n  long istride0, kstride0, istride1, kstride1;\n  THTensor *input;\n  THTensor *kernel;\n  real *input_data;\n  real *weight_data;\n  real *output_data;\n  ptrdiff_t nelem;\n  long k;\n\n  THArgCheck(t_->nDimension == 4 , 3, \"input: 4D Tensor expected\");\n  THArgCheck(k_->nDimension == 4 , 4, \"kernel: 4D Tensor expected\");\n  THArgCheck(srow >= 1, 5, \"Stride should be a positive integer\");\n  THArgCheck(scol >= 1, 6, \"Stride should be a positive integer\");\n\n  input = THTensor_(newContiguous)(t_);\n  kernel = THTensor_(newContiguous)(k_);\n\n  istride0    = input->stride[0];\n  istride1    = input->stride[1];\n  nbatch      = input->size[0];\n  nInputPlane = input->size[1];\n  nInputRows  = input->size[2];\n  nInputCols  = input->size[3];\n\n  kstride0 = kernel->stride[0];\n  kstride1 = kernel->stride[1];\n  nKernelPlane = kernel->size[1];\n  nKernelRows = kernel->size[2];\n  nKernelCols = kernel->size[3];\n\n  THArgCheck(nInputRows >= nKernelRows && nInputCols >= nKernelCols , 2, \"conv2DRevger : Input image is smaller than kernel\");\n  THArgCheck(kernel->size[0] == input->size[0] , 2, \"conv2DRevger : Input batch and kernel batch is not same size\");\n\n  nOutputRows = nInputRows - (nKernelRows - 1) * srow;\n  nOutputCols = nInputCols - (nKernelCols - 1) * scol;\n\n  nelem = THTensor_(nElement)(r_);\n  THTensor_(resize4d)(r_,nKernelPlane, nInputPlane, nOutputRows, nOutputCols);\n\n  input_data = THTensor_(data)(input);\n  weight_data = THTensor_(data)(kernel);\n  output_data = THTensor_(data)(r_);\n\n  if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))\n  {\n    /*THTensor_(zero)(r_);*/\n\n#pragma omp parallel for private(k)\n    for (k = 0; k < r_->size[0]*r_->size[1]; k++)\n    {\n      real* ptr_output = output_data + k*nOutputCols*nOutputRows;\n      long l;\n      for (l = 0; l < nOutputRows*nOutputCols; l++)\n        ptr_output[l] = 0.0;\n    }\n  }\n  else if (beta != 1)\n  {\n    /*THTensor_(mul)(r_, beta);*/\n#pragma omp parallel for private(k)\n    for (k = 0; k < r_->size[0]*r_->size[1]; k++)\n    {\n      real* ptr_output = output_data + k*nOutputCols*nOutputRows;\n      long l;\n      for (l = 0; l < nOutputRows*nOutputCols; l++)\n        ptr_output[l] *= beta;\n    }\n  }\n\n#pragma omp parallel for private(k)\n  for(k = 0; k < nKernelPlane; k++)\n  {\n    long i;\n    for(i = 0; i < nInputPlane; i++)\n    {\n      long p;\n      for(p = 0; p < nbatch; p++)\n      {\n        /* get kernel */\n        real *ptr_weight = weight_data + p*kstride0 + k*kstride1;\n        /* get output */\n        real *ptr_output = output_data + k*nInputPlane*nOutputCols*nOutputRows + i*nOutputCols*nOutputRows;\n        /* get input */\n        real *ptr_input = input_data + p*istride0 + i*istride1;\n\n        /* do image, kernel convolution */\n        THTensor_(validXCorr2DRevptr)(ptr_output,\n                                      alpha,\n                                      ptr_input,  nInputRows,  nInputCols,\n                                      ptr_weight, nKernelRows, nKernelCols,\n                                      srow, scol);\n        /* Next output plane */\n        /* output_data += nOutputCols*nOutputRows; */\n      }\n    }\n  }\n  THTensor_(free)(input);\n  THTensor_(free)(kernel);\n}\n\n\n/*\n  3D input, 3D kernel, 4D output\n  like rank1 update\n  A <- xx' + beta*A\n*/\nvoid THTensor_(conv2Dger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc)\n{\n  long nInputPlane, nInputRows, nInputCols;\n  long nKernelPlane, nKernelRows, nKernelCols;\n  long nOutputPlane, nOutputRows, nOutputCols;\n  long istride0, kstride0;\n\n  THTensor *input;\n  THTensor *kernel;\n  real *input_data;\n  real *weight_data;\n  real *output_data;\n  ptrdiff_t nelem;\n  long k;\n\n  THArgCheck(t_->nDimension == 3 , 3, \"input: 3D Tensor expected\");\n  THArgCheck(k_->nDimension == 3 , 4, \"kernel: 3D Tensor expected\");\n  THArgCheck(srow >= 1, 5, \"Stride should be a positive integer\");\n  THArgCheck(scol >= 1, 6, \"Stride should be a positive integer\");\n  THArgCheck(*vf == 'V' || *vf == 'F', 7, \"type of convolution can 'V' or 'F'\");\n  THArgCheck(*xc == 'C' || *xc == 'X', 7, \"type of convolution can 'X' or 'C'\");\n\n  input = THTensor_(newContiguous)(t_);\n  kernel = THTensor_(newContiguous)(k_);\n\n  nInputPlane = input->size[0];\n  istride0    = input->stride[0];\n  nInputRows  = input->size[1];\n  nInputCols  = input->size[2];\n\n  kstride0 = kernel->stride[0];\n  nKernelPlane = kernel->size[0];\n  nKernelRows = kernel->size[1];\n  nKernelCols = kernel->size[2];\n  nOutputPlane = nInputPlane * kernel->size[0];\n\n  THArgCheck((nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, \"conv2Dger : Input image is smaller than kernel\");\n\n  if (*vf == 'F') {\n    nOutputRows = (nInputRows - 1) * srow + nKernelRows;\n    nOutputCols = (nInputCols - 1) * scol + nKernelCols;\n  } else { /* valid */\n    nOutputRows = (nInputRows - nKernelRows) / srow + 1;\n    nOutputCols = (nInputCols - nKernelCols) / scol + 1;\n  }\n\n  nelem = THTensor_(nElement)(r_);\n  THTensor_(resize4d)(r_, nKernelPlane, nInputPlane, nOutputRows, nOutputCols);\n\n  input_data = THTensor_(data)(input);\n  weight_data = THTensor_(data)(kernel);\n  output_data = THTensor_(data)(r_);\n\n  if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))\n  {\n    /*THTensor_(zero)(r_);*/\n#pragma omp parallel for private(k)\n    for (k = 0; k < r_->size[0]*r_->size[1]; k++)\n    {\n      real* ptr_output = output_data + k*nOutputCols*nOutputRows;\n      long l;\n      for (l = 0; l < nOutputRows*nOutputCols; l++)\n        ptr_output[l] = 0.0;\n    }\n  }\n  else if (beta != 1)\n  {\n    /*THTensor_(mul)(r_, beta);*/\n#pragma omp parallel for private(k)\n    for (k = 0; k < r_->size[0]*r_->size[1]; k++)\n    {\n      real* ptr_output = output_data + k*nOutputCols*nOutputRows;\n      long l;\n      for (l = 0; l < nOutputRows*nOutputCols; l++)\n        ptr_output[l] *= beta;\n    }\n  }\n\n#pragma omp parallel for private(k)\n  for(k = 0; k < nKernelPlane; k++)\n  {\n    long i;\n    /* get kernel */\n    real *ptr_weight = weight_data+k*kstride0;\n\n    for(i = 0; i < nInputPlane; i++)\n    {\n      /* get output */\n      real *ptr_output = output_data + k*nInputPlane*nOutputCols*nOutputRows + i*nOutputCols*nOutputRows;\n      /* get input */\n      real *ptr_input = input_data+i*istride0;\n\n      /* do image, kernel convolution */\n      if (*vf == 'F')\n        if (*xc == 'X')\n          THTensor_(fullXCorr2Dptr)(ptr_output,\n                                    alpha,\n                                    ptr_input,  nInputRows,  nInputCols,\n                                    ptr_weight, nKernelRows, nKernelCols,\n                                    srow, scol);\n        else\n          THTensor_(fullConv2Dptr)(ptr_output,\n                                   alpha,\n                                   ptr_input,  nInputRows,  nInputCols,\n                                   ptr_weight, nKernelRows, nKernelCols,\n                                   srow, scol);\n      else\n        if (*xc == 'X')\n          THTensor_(validXCorr2Dptr)(ptr_output,\n                                     alpha,\n                                     ptr_input,  nInputRows,  nInputCols,\n                                     ptr_weight, nKernelRows, nKernelCols,\n                                     srow, scol);\n        else\n          THTensor_(validConv2Dptr)(ptr_output,\n                                    alpha,\n                                    ptr_input,  nInputRows,  nInputCols,\n                                    ptr_weight, nKernelRows, nKernelCols,\n                                    srow, scol);\n      /* Next output plane */\n      /* output_data += nOutputCols*nOutputRows; */\n    }\n  }\n  THTensor_(free)(input);\n  THTensor_(free)(kernel);\n}\n\n\n/*\n  3D input, 4D kernel, 3D output\n  matrix vector product like\n  y <- Ax + beta*y\n*/\nvoid THTensor_(conv2Dmv)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc)\n{\n  long nInputPlane, nInputRows, nInputCols;\n  long nKernelRows, nKernelCols;\n  long nOutputPlane, nOutputRows, nOutputCols;\n  long istride0, kstride0, kstride1;\n  THTensor *input;\n  THTensor* kernel;\n  real *input_data;\n  real *weight_data;\n  real *output_data;\n  ptrdiff_t nelem;\n  long k;\n\n  THArgCheck(t_->nDimension == 3 , 3, \"input: 3D Tensor expected\");\n  THArgCheck(k_->nDimension == 4 , 4, \"kernel: 4D Tensor expected\");\n  THArgCheck(srow >= 1, 5, \"Stride should be a positive integer\");\n  THArgCheck(scol >= 1, 6, \"Stride should be a positive integer\");\n  THArgCheck(*vf == 'V' || *vf == 'F', 7, \"type of convolution can 'V' or 'F'\");\n  THArgCheck(*xc == 'C' || *xc == 'X', 7, \"type of convolution can 'X' or 'C'\");\n\n  input = THTensor_(newContiguous)(t_);\n  if (!(k_->stride[3] == 1) || !(k_->stride[2] == k_->size[3])) {\n    kernel = THTensor_(newContiguous)(k_);\n  } else {\n    THTensor_(retain)(k_);\n    kernel = k_;\n  }\n\n  nInputPlane = input->size[0];\n  istride0    = input->stride[0];\n  nInputRows  = input->size[1];\n  nInputCols  = input->size[2];\n\n  kstride0    = kernel->stride[0];\n  kstride1    = kernel->stride[1];\n  nKernelRows = kernel->size[2];\n  nKernelCols = kernel->size[3];\n  nOutputPlane = kernel->size[0];\n  THArgCheck(kernel->size[1] == nInputPlane, 2, \"invalid number of input planes\");\n\n  THArgCheck( (nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, \"conv2Dmv : Input image is smaller than kernel\");\n\n  if (*vf == 'F') {\n    nOutputRows = (nInputRows - 1) * srow + nKernelRows;\n    nOutputCols = (nInputCols - 1) * scol + nKernelCols;\n  } else { /* valid */\n    nOutputRows = (nInputRows - nKernelRows) / srow + 1;\n    nOutputCols = (nInputCols - nKernelCols) / scol + 1;\n  }\n\n  nelem = THTensor_(nElement)(r_);\n  THTensor_(resize3d)(r_, nOutputPlane, nOutputRows, nOutputCols);\n\n  input_data = THTensor_(data)(input);\n  weight_data = THTensor_(data)(kernel);\n  output_data = THTensor_(data)(r_);\n\n  if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))\n  {\n    /*THTensor_(zero)(r_);*/\n#pragma omp parallel for private(k)\n    for (k = 0; k < r_->size[0]; k++)\n    {\n      real* ptr_output = output_data + k*nOutputCols*nOutputRows;\n      long l;\n      for (l = 0; l < nOutputRows*nOutputCols; l++)\n        ptr_output[l] = 0.0;\n    }\n  }\n  else if (beta != 1)\n  {\n    /*THTensor_(mul)(r_, beta);*/\n#pragma omp parallel for private(k)\n    for (k = 0; k < r_->size[0]; k++)\n    {\n      real* ptr_output = output_data + k*nOutputCols*nOutputRows;\n      long l;\n      for (l = 0; l < nOutputRows*nOutputCols; l++)\n        ptr_output[l] *= beta;\n    }\n  }\n\n#pragma omp parallel for private(k)\n  for(k = 0; k < nOutputPlane; k++)\n  {\n    long i;\n    /* get output */\n    real *ptr_output = output_data + k*nOutputCols*nOutputRows;\n    for(i = 0; i < nInputPlane; i++)\n    {\n      /* get kernel */\n      real *ptr_weight = weight_data + k*kstride0 + i*kstride1;\n      /* get input */\n      real *ptr_input = input_data + i*istride0;\n\n      /* do image, kernel convolution */\n      if (*vf == 'F')\n        if (*xc == 'X')\n          THTensor_(fullXCorr2Dptr)(ptr_output,\n                                    alpha,\n                                    ptr_input,  nInputRows,  nInputCols,\n                                    ptr_weight, nKernelRows, nKernelCols,\n                                    srow, scol);\n        else\n          THTensor_(fullConv2Dptr)(ptr_output,\n                                   alpha,\n                                   ptr_input,  nInputRows,  nInputCols,\n                                   ptr_weight, nKernelRows, nKernelCols,\n                                   srow, scol);\n      else\n        if (*xc == 'X')\n          THTensor_(validXCorr2Dptr)(ptr_output,\n                                     alpha,\n                                     ptr_input,  nInputRows,  nInputCols,\n                                     ptr_weight, nKernelRows, nKernelCols,\n                                     srow, scol);\n        else\n          THTensor_(validConv2Dptr)(ptr_output,\n                                    alpha,\n                                    ptr_input,  nInputRows,  nInputCols,\n                                    ptr_weight, nKernelRows, nKernelCols,\n                                    srow, scol);\n    }\n    /* Next output plane */\n    /* output_data += nOutputCols*nOutputRows;*/\n  }\n  THTensor_(free)(input);\n  THTensor_(free)(kernel);\n}\n\n\n/*\n  3D input, 4D kernel, 3D output\n  matrix vector product like\n  y <- Ax + beta*y\n*/\nvoid THTensor_(conv2Dmm)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc)\n{\n  long nInputPlane, nInputRows, nInputCols;\n  long nKernelRows, nKernelCols;\n  long nOutputPlane, nOutputRows, nOutputCols;\n  long kstride0, kstride1;\n  THTensor *input;\n  THTensor* kernel;\n  long nbatch;\n  ptrdiff_t nelem;\n  real *input_data;\n  real *weight_data;\n  real *output_data;\n  long p;\n\n  THArgCheck(t_->nDimension == 4 , 3, \"input: 4D Tensor expected\");\n  THArgCheck(k_->nDimension == 4 , 4, \"kernel: 4D Tensor expected\");\n  THArgCheck(srow >= 1, 5, \"Stride should be a positive integer\");\n  THArgCheck(scol >= 1, 6, \"Stride should be a positive integer\");\n  THArgCheck(*vf == 'V' || *vf == 'F', 7, \"type of convolution can 'V' or 'F'\");\n  THArgCheck(*xc == 'C' || *xc == 'X', 7, \"type of convolution can 'X' or 'C'\");\n\n  input = THTensor_(newContiguous)(t_);\n  if (!(k_->stride[3] == 1) || !(k_->stride[2] == k_->size[3])) {\n    kernel = THTensor_(newContiguous)(k_);\n  } else {\n    THTensor_(retain)(k_);\n    kernel = k_;\n  }\n\n  nbatch = input->size[0];\n  nInputPlane = input->size[1];\n  nInputRows  = input->size[2];\n  nInputCols  = input->size[3];\n\n  kstride0    = kernel->stride[0];\n  kstride1    = kernel->stride[1];\n  nKernelRows = kernel->size[2];\n  nKernelCols = kernel->size[3];\n  nOutputPlane = kernel->size[0];\n  THArgCheck(kernel->size[1] == nInputPlane, 2, \"invalid number of input planes\");\n\n  THArgCheck( (nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, \"conv2Dmv : Input image is smaller than kernel\");\n\n  if (*vf == 'F') {\n    nOutputRows = (nInputRows - 1) * srow + nKernelRows;\n    nOutputCols = (nInputCols - 1) * scol + nKernelCols;\n  } else { /* valid */\n    nOutputRows = (nInputRows - nKernelRows) / srow + 1;\n    nOutputCols = (nInputCols - nKernelCols) / scol + 1;\n  }\n\n  nelem = THTensor_(nElement)(r_);\n  THTensor_(resize4d)(r_, nbatch, nOutputPlane, nOutputRows, nOutputCols);\n\n  input_data = THTensor_(data)(input);\n  weight_data = THTensor_(data)(kernel);\n  output_data = THTensor_(data)(r_);\n\n  if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))\n  {\n    /*THTensor_(zero)(r_);*/\n#pragma omp parallel for private(p)\n    for (p=0; p < r_->size[0]; p++)\n    {\n      long k;\n      for (k = 0; k < r_->size[1]; k++)\n      {\n        real* ptr_output = output_data + p*nOutputPlane*nOutputRows*nOutputCols + k*nOutputCols*nOutputRows;\n        long l;\n        for (l = 0; l < nOutputRows*nOutputCols; l++)\n          ptr_output[l] = 0.0;\n      }\n    }\n  }\n  else if (beta != 1)\n  {\n    /*THTensor_(mul)(r_, beta);*/\n#pragma omp parallel for private(p)\n    for(p=0; p < r_->size[0]; p++)\n    {\n      long k;\n      for (k = 0; k < r_->size[1]; k++)\n      {\n        real* ptr_output = output_data + p*nOutputPlane*nOutputRows*nOutputCols + k*nOutputCols*nOutputRows;\n        long l;\n        for (l = 0; l < nOutputRows*nOutputCols; l++)\n          ptr_output[l] *= beta;\n      }\n    }\n  }\n\n#pragma omp parallel for private(p)\n  for(p=0; p < nbatch; p++)\n  {\n    long k;\n    for(k = 0; k < nOutputPlane; k++)\n    {\n      long i;\n      /* get output */\n      real *ptr_output = output_data + p*nOutputPlane*nOutputCols*nOutputRows + k*nOutputCols*nOutputRows;\n      for(i = 0; i < nInputPlane; i++)\n      {\n        /* get kernel */\n        real *ptr_weight = weight_data + k*kstride0 + i*kstride1;\n        /* get input */\n        real *ptr_input = input_data + p*nInputPlane*nInputRows*nInputCols + i*nInputRows*nInputCols;\n\n        /* do image, kernel convolution */\n        if (*vf == 'F')\n          if (*xc == 'X')\n            THTensor_(fullXCorr2Dptr)(ptr_output,\n                                      alpha,\n                                      ptr_input,  nInputRows,  nInputCols,\n                                      ptr_weight, nKernelRows, nKernelCols,\n                                      srow, scol);\n          else\n            THTensor_(fullConv2Dptr)(ptr_output,\n                                     alpha,\n                                     ptr_input,  nInputRows,  nInputCols,\n                                     ptr_weight, nKernelRows, nKernelCols,\n                                     srow, scol);\n        else\n          if (*xc == 'X')\n            THTensor_(validXCorr2Dptr)(ptr_output,\n                                       alpha,\n                                       ptr_input,  nInputRows,  nInputCols,\n                                       ptr_weight, nKernelRows, nKernelCols,\n                                       srow, scol);\n          else\n            THTensor_(validConv2Dptr)(ptr_output,\n                                      alpha,\n                                      ptr_input,  nInputRows,  nInputCols,\n                                      ptr_weight, nKernelRows, nKernelCols,\n                                      srow, scol);\n      }\n      /* Next output plane */\n      /* output_data += nOutputCols*nOutputRows;*/\n    }\n  }\n  THTensor_(free)(input);\n  THTensor_(free)(kernel);\n}\n\n\n/*\n  2D input, 2D kernel, 2D output\n  scalar multiplication like\n  y <- x*y + beta*y\n*/\nvoid THTensor_(conv2Dmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc)\n{\n  THTensor *input;\n  THTensor* kernel;\n  long nInputRows;\n  long nInputCols;\n  long nKernelRows;\n  long nKernelCols;\n  long nOutputRows, nOutputCols;\n  real *ptr_input;\n  real *ptr_weight;\n  real *output_data;\n  ptrdiff_t nelem;\n\n  THArgCheck(t_->nDimension == 2 , 3, \"input: 2D Tensor expected\");\n  THArgCheck(k_->nDimension == 2 , 4, \"kernel: 2D Tensor expected\");\n  THArgCheck(srow >= 1, 5, \"Stride should be a positive integer\");\n  THArgCheck(scol >= 1, 6, \"Stride should be a positive integer\");\n\n  input = THTensor_(newContiguous)(t_);\n  kernel = THTensor_(newContiguous)(k_);\n\n  nInputRows  = input->size[0];\n  nInputCols  = input->size[1];\n  nKernelRows = kernel->size[0];\n  nKernelCols = kernel->size[1];\n\n  THArgCheck((nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, \"conv2Dmul : Input image is smaller than kernel\");\n\n  nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf);\n  nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf);\n\n  nelem = THTensor_(nElement)(r_);\n  THTensor_(resize2d)(r_, nOutputRows, nOutputCols);\n  if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))\n    THTensor_(zero)(r_);\n  else if (beta != 1)\n    THTensor_(mul)(r_, r_, beta);\n\n  ptr_input = THTensor_(data)(input);\n  ptr_weight = THTensor_(data)(kernel);\n  output_data = THTensor_(data)(r_);\n\n\n  /* do image, kernel convolution */\n  THTensor_(conv2d)(output_data,\n                    alpha,\n                    ptr_input, nInputRows, nInputCols,\n                    ptr_weight, nKernelRows, nKernelCols,\n                    srow, scol, vf, xc);\n  THTensor_(free)(input);\n  THTensor_(free)(kernel);\n}\n\n/*\n  3D input, 3D kernel, 3D output\n  component wise multiplication like\n  y <- y.*x + beta*y\n*/\nvoid THTensor_(conv2Dcmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc)\n{\n  long nInputPlane, nInputRows, nInputCols;\n  long nKernelRows, nKernelCols;\n  long nOutputPlane, nOutputRows, nOutputCols;\n  long istride0, kstride0;\n  THTensor *input;\n  THTensor *kernel;\n  real *input_data;\n  real *weight_data;\n  real *output_data;\n  ptrdiff_t nelem;\n  long k;\n\n  THArgCheck(t_->nDimension == 3 , 3, \"input: 3D Tensor expected\");\n  THArgCheck(k_->nDimension == 3 , 4, \"kernel: 3D Tensor expected\");\n  THArgCheck(srow >= 1, 5, \"Stride should be a positive integer\");\n  THArgCheck(scol >= 1, 6, \"Stride should be a positive integer\");\n\n  input = THTensor_(newContiguous)(t_);\n  kernel = THTensor_(newContiguous)(k_);\n\n  istride0    = input->stride[0];\n  nInputPlane = input->size[0];\n  nInputRows  = input->size[1];\n  nInputCols  = input->size[2];\n\n  kstride0    = kernel->stride[0];\n  nOutputPlane = kernel->size[0];\n  nKernelRows = kernel->size[1];\n  nKernelCols = kernel->size[2];\n\n  THArgCheck(nOutputPlane == nInputPlane, 2, \"invalid number of input/kernel planes\");\n  THArgCheck( (nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, \"conv2Dcmul : Input image is smaller than kernel\");\n\n  nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf);\n  nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf);\n\n  nelem = THTensor_(nElement)(r_);\n  THTensor_(resize3d)(r_, nOutputPlane, nOutputRows, nOutputCols);\n\n  if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))\n  {\n    THTensor_(zero)(r_);\n  }\n  else if (beta != 1)\n    THTensor_(mul)(r_, r_, beta);\n\n  input_data = THTensor_(data)(input);\n  weight_data = THTensor_(data)(kernel);\n  output_data = THTensor_(data)(r_);\n\n  for(k = 0; k < nOutputPlane; k++)\n  {\n    /* get kernel */\n    real *ptr_weight = weight_data + k*kstride0;\n    /* get input */\n    real *ptr_input = input_data + k*istride0;\n\n    /* do image, kernel convolution */\n    THTensor_(conv2d)(output_data,\n                      alpha,\n                      ptr_input, nInputRows, nInputCols,\n                      ptr_weight, nKernelRows, nKernelCols,\n                      srow, scol, vf, xc);\n    /* Next output plane */\n    output_data += nOutputCols*nOutputRows;\n  }\n  THTensor_(free)(input);\n  THTensor_(free)(kernel);\n}\n\n/*\n  3D input, 3D kernel, 3D output\n  component wise multiplication like with a permutation map\n  y <- y.*x + beta*y\n*/\nvoid THTensor_(conv2Dmap)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, THTensor *map, long srow, long scol, const char *vf, const char *xc)\n{\n  long nInputPlane, nInputRows, nInputCols;\n  long nKernelRows, nKernelCols;\n  long nOutputPlane, nOutputRows, nOutputCols;\n  long istride0, kstride0;\n  THTensor *input;\n  THTensor* kernel;\n  real *input_data;\n  real *weight_data;\n  real *output_data;\n  long nmaps;\n  ptrdiff_t nelem;\n  long k;\n\n  THArgCheck(t_->nDimension == 3 , 3, \"input: 3D Tensor expected\");\n  THArgCheck(k_->nDimension == 3 , 4, \"kernel: 3D Tensor expected\");\n  THArgCheck(map->nDimension == 2 , 4, \"map: 2D Tensor expected\");\n  THArgCheck(srow >= 1, 6, \"Stride should be a positive integer\");\n  THArgCheck(scol >= 1, 7, \"Stride should be a positive integer\");\n\n  input = THTensor_(newContiguous)(t_);\n  kernel = THTensor_(newContiguous)(k_);\n\n  istride0    = input->stride[0];\n  nInputPlane = input->size[0];\n  nInputRows  = input->size[1];\n  nInputCols  = input->size[2];\n\n  kstride0    = kernel->stride[0];\n  nOutputPlane = kernel->size[0];\n  nKernelRows = kernel->size[1];\n  nKernelCols = kernel->size[2];\n\n  THArgCheck(nOutputPlane == nInputPlane, 2, \"invalid number of input/kernel planes\");\n  THArgCheck( (nInputRows >= nKernelRows && nInputCols >= nKernelCols)\n              || *vf == 'F', 2, \"conv2Dmap : Input image is smaller than kernel\");\n\n  nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf);\n  nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf);\n\n  nelem = THTensor_(nElement)(r_);\n  THTensor_(resize3d)(r_, nOutputPlane, nOutputRows, nOutputCols);\n\n  if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))\n  {\n    THTensor_(zero)(r_);\n  }\n  else if (beta != 1)\n    THTensor_(mul)(r_, r_, beta);\n\n  input_data = THTensor_(data)(input);\n  weight_data = THTensor_(data)(kernel);\n  output_data = THTensor_(data)(r_);\n\n  nmaps = map->size[0];\n\n  for(k = 0; k < nmaps; k++)\n  {\n    /* get indices */\n    long from = (long)THTensor_(get2d)(map,k,0)-1;\n    long to   = (long)THTensor_(get2d)(map,k,1)-1;\n\n    /* get kernel */\n    real *ptr_weight = weight_data + k*kstride0;\n    /* get input */\n    real *ptr_input = input_data + from*istride0;\n    /* get output */\n    real *ptr_output = output_data + to*nOutputRows*nOutputCols;\n\n    /* do image, kernel convolution */\n    THTensor_(conv2d)(ptr_output,\n                      alpha,\n                      ptr_input, nInputRows, nInputCols,\n                      ptr_weight, nKernelRows, nKernelCols,\n                      srow, scol, vf, xc);\n  }\n  THTensor_(free)(input);\n  THTensor_(free)(kernel);\n}\n\n/*\n  4D input, 4D kernel, 5D output\n  like rank1 update\n  A <- xx' + beta*A\n  for sr,sc=1 this is equivalent to xcorr2Dger, but otherwise it is useful for\n  calculating derivatives wrt a kernel that is applied with stride sr,sc != 1\n*/\nvoid THTensor_(conv3DRevger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_,\n                             long sdepth, long srow, long scol)\n{\n  long nInputPlane, nInputDepth, nInputRows, nInputCols;\n  long nKernelPlane, nKernelDepth, nKernelRows, nKernelCols;\n  long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols;\n  long istride0, kstride0;\n  THTensor *input;\n  THTensor *kernel;\n  real *input_data;\n  real *weight_data;\n  real *output_data;\n  ptrdiff_t nelem;\n  long k, i;\n\n  THArgCheck(t_->nDimension == 4 , 3, \"input: 4D Tensor expected\");\n  THArgCheck(k_->nDimension == 4 , 4, \"kernel: 4D Tensor expected\");\n  THArgCheck(sdepth >= 1, 5, \"Stride should be a positive integer\");\n  THArgCheck(srow >= 1, 6, \"Stride should be a positive integer\");\n  THArgCheck(scol >= 1, 7, \"Stride should be a positive integer\");\n\n  input = THTensor_(newContiguous)(t_);\n  kernel = THTensor_(newContiguous)(k_);\n\n  nInputPlane = input->size[0];\n  istride0    = input->stride[0];\n  nInputDepth = input->size[1];\n  nInputRows  = input->size[2];\n  nInputCols  = input->size[3];\n\n  kstride0 = kernel->stride[0];\n  nKernelPlane = kernel->size[0];\n  nKernelDepth= kernel->size[1];\n  nKernelRows = kernel->size[2];\n  nKernelCols = kernel->size[3];\n  nOutputPlane = nInputPlane * kernel->size[0];\n\n  THArgCheck(nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols , 2, \"conv3DRevger : Input image is smaller than kernel\");\n\n  nOutputDepth = nInputDepth - (nKernelDepth - 1) * sdepth;\n  nOutputRows = nInputRows - (nKernelRows - 1) * srow;\n  nOutputCols = nInputCols - (nKernelCols - 1) * scol;\n\n  nelem = THTensor_(nElement)(r_);\n  THTensor_(resize5d)(r_,nKernelPlane, nInputPlane, nOutputDepth, nOutputRows, nOutputCols);\n\n  if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))\n  {\n    THTensor_(zero)(r_);\n  }\n  else if (beta != 1)\n    THTensor_(mul)(r_, r_, beta);\n\n  input_data = THTensor_(data)(input);\n  weight_data = THTensor_(data)(kernel);\n  output_data = THTensor_(data)(r_);\n\n  for(k = 0; k < nKernelPlane; k++)\n  {\n    /* get kernel */\n    real *ptr_weight = weight_data+k*kstride0;\n\n    for(i = 0; i < nInputPlane; i++)\n    {\n      /* get input */\n      real *ptr_input = input_data+i*istride0;\n\n      /* do image, kernel convolution */\n      THTensor_(validXCorr3DRevptr)(output_data,\n                                    alpha,\n                                    ptr_input,  nInputDepth, nInputRows,  nInputCols,\n                                    ptr_weight, nKernelDepth, nKernelRows, nKernelCols,\n                                    sdepth, srow, scol);\n      /* Next output plane */\n      output_data += nOutputDepth*nOutputCols*nOutputRows;\n    }\n  }\n  THTensor_(free)(input);\n  THTensor_(free)(kernel);\n}\n\n\n/*\n  4D input, 4D kernel, 5D output\n  like rank1 update\n  A <- xx' + beta*A\n*/\nvoid THTensor_(conv3Dger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_,\n                          long sdepth, long srow, long scol, const char *vf, const char *xc)\n{\n  long nInputPlane, nInputDepth, nInputRows, nInputCols;\n  long nKernelPlane, nKernelDepth, nKernelRows, nKernelCols;\n  long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols;\n  long istride0, kstride0;\n  THTensor *input;\n  THTensor *kernel;\n  real *input_data;\n  real *weight_data;\n  real *output_data;\n  ptrdiff_t nelem;\n  long k, i;\n\n  THArgCheck(t_->nDimension == 4 , 3, \"input: 4D Tensor expected\");\n  THArgCheck(k_->nDimension == 4 , 4, \"kernel: 4D Tensor expected\");\n  THArgCheck(sdepth >= 1, 5, \"Stride should be a positive integer\");\n  THArgCheck(srow >= 1, 6, \"Stride should be a positive integer\");\n  THArgCheck(scol >= 1, 7, \"Stride should be a positive integer\");\n  THArgCheck(*vf == 'V' || *vf == 'F', 8, \"type of convolution can 'V' or 'F'\");\n  THArgCheck(*xc == 'C' || *xc == 'X', 8, \"type of convolution can 'X' or 'C'\");\n\n  input = THTensor_(newContiguous)(t_);\n  kernel = THTensor_(newContiguous)(k_);\n\n  nInputPlane = input->size[0];\n  istride0    = input->stride[0];\n  nInputDepth = input->size[1];\n  nInputRows  = input->size[2];\n  nInputCols  = input->size[3];\n\n  kstride0     = kernel->stride[0];\n  nKernelPlane = kernel->size[0];\n  nKernelDepth = kernel->size[1];\n  nKernelRows  = kernel->size[2];\n  nKernelCols  = kernel->size[3];\n  nOutputPlane = nInputPlane * kernel->size[0];\n\n  THArgCheck((nInputDepth >= nKernelDepth\n              && nInputRows >= nKernelRows\n              && nInputCols >= nKernelCols)\n             || *vf == 'F', 2, \"conv3Dger : Input image is smaller than kernel\");\n\n  nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf);\n  nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf);\n  nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf);\n\n  nelem = THTensor_(nElement)(r_);\n  THTensor_(resize5d)(r_,nKernelPlane, nInputPlane, nOutputDepth, nOutputRows, nOutputCols);\n\n  if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))\n  {\n    THTensor_(zero)(r_);\n  }\n  else if (beta != 1)\n    THTensor_(mul)(r_, r_, beta);\n\n  input_data = THTensor_(data)(input);\n  weight_data = THTensor_(data)(kernel);\n  output_data = THTensor_(data)(r_);\n\n  for(k = 0; k < nKernelPlane; k++)\n  {\n    /* get kernel */\n    real *ptr_weight = weight_data+k*kstride0;\n\n    for(i = 0; i < nInputPlane; i++)\n    {\n      /* get input */\n      real *ptr_input = input_data+i*istride0;\n\n      /* do image, kernel convolution */\n      THTensor_(conv3d)(output_data,\n                        alpha,\n                        ptr_input,  nInputDepth, nInputRows,  nInputCols,\n                        ptr_weight, nKernelDepth, nKernelRows, nKernelCols,\n                        sdepth, srow, scol, vf, xc);\n\n      /* Next output plane */\n      output_data += nOutputDepth*nOutputCols*nOutputRows;\n    }\n  }\n  THTensor_(free)(input);\n  THTensor_(free)(kernel);\n}\n\n/*\n  4D input, 5D kernel, 4D output\n  matrix vector product like\n  y <- Ax + beta*y\n*/\nvoid THTensor_(conv3Dmv)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_,\n                         long sdepth, long srow, long scol, const char *vf, const char *xc)\n{\n  long nInputPlane, nInputDepth, nInputRows, nInputCols;\n  long nKernelDepth, nKernelRows, nKernelCols;\n  long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols;\n  long istride0, kstride0, kstride1;\n  THTensor *input;\n  THTensor *kernel;\n  real *input_data;\n  real *weight_data;\n  real *output_data;\n  ptrdiff_t nelem;\n  long k, i;\n\n  THArgCheck(t_->nDimension == 4 , 3, \"input: 4D Tensor expected\");\n  THArgCheck(k_->nDimension == 5 , 4, \"kernel: 5D Tensor expected\");\n  THArgCheck(sdepth >= 1, 5, \"Stride should be a positive integer\");\n  THArgCheck(srow >= 1, 6, \"Stride should be a positive integer\");\n  THArgCheck(scol >= 1, 7, \"Stride should be a positive integer\");\n  THArgCheck(*vf == 'V' || *vf == 'F', 8, \"type of convolution can 'V' or 'F'\");\n  THArgCheck(*xc == 'C' || *xc == 'X', 8, \"type of convolution can 'X' or 'C'\");\n\n  input = THTensor_(newContiguous)(t_);\n  if (!(k_->stride[4] == 1) || !(k_->stride[3] == k_->size[4])) {\n    kernel = THTensor_(newContiguous)(k_);\n  } else {\n    THTensor_(retain)(k_);\n    kernel = k_;\n  }\n\n  nInputPlane = input->size[0];\n  istride0    = input->stride[0];\n  nInputDepth = input->size[1];\n  nInputRows  = input->size[2];\n  nInputCols  = input->size[3];\n\n  kstride0    = kernel->stride[0];\n  kstride1    = kernel->stride[1];\n  nKernelDepth = kernel->size[2];\n  nKernelRows = kernel->size[3];\n  nKernelCols = kernel->size[4];\n  nOutputPlane = kernel->size[0];\n  THArgCheck(kernel->size[1] == nInputPlane, 2, \"invalid number of input planes\");\n\n  THArgCheck( (nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, \"conv3Dmv : Input image is smaller than kernel\");\n\n  nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf);\n  nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf);\n  nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf);\n\n  nelem = THTensor_(nElement)(r_);\n  THTensor_(resize4d)(r_, nOutputPlane, nOutputDepth, nOutputRows, nOutputCols);\n\n  if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))\n  {\n    THTensor_(zero)(r_);\n  }\n  else if (beta != 1)\n    THTensor_(mul)(r_, r_, beta);\n\n  input_data = THTensor_(data)(input);\n  weight_data = THTensor_(data)(kernel);\n  output_data = THTensor_(data)(r_);\n\n  for(k = 0; k < nOutputPlane; k++)\n  {\n    for(i = 0; i < nInputPlane; i++)\n    {\n      /* get kernel */\n      real *ptr_weight = weight_data + k*kstride0 + i*kstride1;\n      /* get input */\n      real *ptr_input = input_data + i*istride0;\n\n      /* do image, kernel convolution */\n      THTensor_(conv3d)(output_data,\n                        alpha,\n                        ptr_input,  nInputDepth, nInputRows,  nInputCols,\n                        ptr_weight, nKernelDepth, nKernelRows, nKernelCols,\n                        sdepth, srow, scol, vf, xc);\n    }\n    /* Next output plane */\n    output_data += nOutputDepth*nOutputCols*nOutputRows;\n  }\n  THTensor_(free)(input);\n  THTensor_(free)(kernel);\n}\n\n/*\n  3D input, 3D kernel, 3D output\n  scalar multiplication like\n  y <- x*y + beta*y\n*/\nvoid THTensor_(conv3Dmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_,\n                          long sdepth, long srow, long scol, const char *vf, const char *xc)\n{\n  THTensor *input;\n  THTensor* kernel;\n  long nInputDepth;\n  long nInputRows;\n  long nInputCols;\n  long nKernelDepth;\n  long nKernelRows;\n  long nKernelCols;\n  long nOutputDepth, nOutputRows, nOutputCols;\n  real *ptr_input;\n  real *ptr_weight;\n  real *output_data;\n  ptrdiff_t nelem;\n\n  THArgCheck(t_->nDimension == 3 , 3, \"input: 3D Tensor expected\");\n  THArgCheck(k_->nDimension == 3 , 4, \"kernel: 3D Tensor expected\");\n  THArgCheck(sdepth >= 1, 5, \"Stride should be a positive integer\");\n  THArgCheck(srow >= 1, 6, \"Stride should be a positive integer\");\n  THArgCheck(scol >= 1, 7, \"Stride should be a positive integer\");\n  THArgCheck(*vf == 'V' || *vf == 'F', 8, \"type of convolution can 'V' or 'F'\");\n  THArgCheck(*xc == 'C' || *xc == 'X', 8, \"type of convolution can 'X' or 'C'\");\n\n  input = THTensor_(newContiguous)(t_);\n  kernel = THTensor_(newContiguous)(k_);\n\n  nInputDepth = input->size[0];\n  nInputRows  = input->size[1];\n  nInputCols  = input->size[2];\n  nKernelDepth = kernel->size[0];\n  nKernelRows = kernel->size[1];\n  nKernelCols = kernel->size[2];\n\n  THArgCheck((nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, \"conv3Dmul : Input image is smaller than kernel\");\n\n  nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf);\n  nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf);\n  nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf);\n\n  nelem = THTensor_(nElement)(r_);\n  THTensor_(resize3d)(r_, nOutputDepth, nOutputRows, nOutputCols);\n  if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))\n    THTensor_(zero)(r_);\n  else if (beta != 1)\n    THTensor_(mul)(r_, r_, beta);\n\n  ptr_input = THTensor_(data)(input);\n  ptr_weight = THTensor_(data)(kernel);\n  output_data = THTensor_(data)(r_);\n\n\n  /* do image, kernel convolution */\n  THTensor_(conv3d)(output_data,\n                    alpha,\n                    ptr_input,  nInputDepth, nInputRows,  nInputCols,\n                    ptr_weight, nKernelDepth, nKernelRows, nKernelCols,\n                    sdepth, srow, scol, vf, xc);\n  THTensor_(free)(input);\n  THTensor_(free)(kernel);\n}\n\n/*\n  4D input, 4D kernel, 4D output\n  component wise multiplication like\n  y <- y.*x + beta*y\n*/\nvoid THTensor_(conv3Dcmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_,\n                           long sdepth, long srow, long scol, const char *vf, const char *xc)\n{\n  long nInputPlane, nInputDepth, nInputRows, nInputCols;\n  long nKernelDepth, nKernelRows, nKernelCols;\n  long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols;\n  long istride0, kstride0;\n\n  THTensor *input;\n  THTensor *kernel;\n  real *input_data;\n  real *weight_data;\n  real *output_data;\n  ptrdiff_t nelem;\n  long k;\n\n  THArgCheck(t_->nDimension == 4 , 3, \"input: 3D Tensor expected\");\n  THArgCheck(k_->nDimension == 4 , 4, \"kernel: 3D Tensor expected\");\n  THArgCheck(srow >= 1, 5, \"Stride should be a positive integer\");\n  THArgCheck(scol >= 1, 6, \"Stride should be a positive integer\");\n  THArgCheck(*vf == 'V' || *vf == 'F', 7, \"type of convolution can 'V' or 'F'\");\n  THArgCheck(*xc == 'C' || *xc == 'X', 7, \"type of convolution can 'X' or 'C'\");\n\n  input = THTensor_(newContiguous)(t_);\n  kernel = THTensor_(newContiguous)(k_);\n\n  istride0    = input->stride[0];\n  nInputPlane = input->size[0];\n  nInputDepth = input->size[1];\n  nInputRows  = input->size[2];\n  nInputCols  = input->size[3];\n\n  kstride0    = kernel->stride[0];\n  nOutputPlane = kernel->size[0];\n  nKernelDepth = kernel->size[1];\n  nKernelRows = kernel->size[2];\n  nKernelCols = kernel->size[3];\n\n  THArgCheck(nOutputPlane == nInputPlane, 2, \"invalid number of input/kernel planes\");\n  THArgCheck( (nInputDepth >= nKernelDepth && nInputRows >= nKernelRows && nInputCols >= nKernelCols) || *vf == 'F', 2, \"conv3Dcmul : Input image is smaller than kernel\");\n\n  nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf);\n  nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf);\n  nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf);\n\n  nelem = THTensor_(nElement)(r_);\n  THTensor_(resize4d)(r_, nOutputPlane, nOutputDepth, nOutputRows, nOutputCols);\n\n  if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))\n  {\n    THTensor_(zero)(r_);\n  }\n  else if (beta != 1)\n    THTensor_(mul)(r_, r_, beta);\n\n  input_data = THTensor_(data)(input);\n  weight_data = THTensor_(data)(kernel);\n  output_data = THTensor_(data)(r_);\n\n  for(k = 0; k < nOutputPlane; k++)\n  {\n    /* get kernel */\n    real *ptr_weight = weight_data + k*kstride0;\n    /* get input */\n    real *ptr_input = input_data + k*istride0;\n\n    /* do image, kernel convolution */\n    THTensor_(conv3d)(output_data,\n                      alpha,\n                      ptr_input,  nInputDepth, nInputRows,  nInputCols,\n                      ptr_weight, nKernelDepth, nKernelRows, nKernelCols,\n                      sdepth, srow, scol, vf, xc);\n\n    /* Next output plane */\n    output_data += nOutputDepth*nOutputCols*nOutputRows;\n  }\n  THTensor_(free)(input);\n  THTensor_(free)(kernel);\n}\n\n/*\n  4D input, 4D kernel, 4D output\n  component wise multiplication like with a permutation map\n  y <- y.*x + beta*y\n*/\nvoid THTensor_(conv3Dmap)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, THTensor *map,\n                          long sdepth, long srow, long scol, const char *vf, const char *xc)\n{\n  long nInputPlane, nInputDepth, nInputRows, nInputCols;\n  long nKernelDepth, nKernelRows, nKernelCols;\n  long nOutputPlane, nOutputDepth, nOutputRows, nOutputCols;\n  long istride0, kstride0;\n\n  THTensor *input;\n  THTensor *kernel;\n  ptrdiff_t nelem;\n  real *input_data;\n  real *weight_data;\n  real *output_data;\n  long nmaps;\n  long k;\n\n  THArgCheck(t_->nDimension == 4 , 3, \"input: 4D Tensor expected\");\n  THArgCheck(k_->nDimension == 4 , 4, \"kernel: 4D Tensor expected\");\n  THArgCheck(map->nDimension == 2 , 4, \"map: 2D Tensor expected\");\n  THArgCheck(srow >= 1, 6, \"Stride should be a positive integer\");\n  THArgCheck(scol >= 1, 7, \"Stride should be a positive integer\");\n  THArgCheck(*vf == 'V' || *vf == 'F', 8, \"type of convolution can 'V' or 'F'\");\n  THArgCheck(*xc == 'C' || *xc == 'X', 8, \"type of convolution can 'X' or 'C'\");\n\n  input = THTensor_(newContiguous)(t_);\n  kernel = THTensor_(newContiguous)(k_);\n\n  istride0    = input->stride[0];\n  nInputPlane = input->size[0];\n  nInputDepth = input->size[1];\n  nInputRows  = input->size[2];\n  nInputCols  = input->size[3];\n\n  kstride0    = kernel->stride[0];\n  nOutputPlane = kernel->size[0];\n  nKernelDepth = kernel->size[1];\n  nKernelRows = kernel->size[2];\n  nKernelCols = kernel->size[3];\n\n  THArgCheck(nOutputPlane == nInputPlane, 2, \"invalid number of input/kernel planes\");\n  THArgCheck((nInputDepth >= nKernelDepth\n              && nInputRows >= nKernelRows\n              && nInputCols >= nKernelCols) || *vf == 'F',\n             2, \"conv3Dmap : Input image is smaller than kernel\");\n\n  nOutputDepth = THTensor_(convsize)(nInputDepth, nKernelDepth, sdepth, vf);\n  nOutputRows = THTensor_(convsize)(nInputRows, nKernelRows, srow, vf);\n  nOutputCols = THTensor_(convsize)(nInputCols, nKernelCols, scol, vf);\n\n  nelem = THTensor_(nElement)(r_);\n  THTensor_(resize4d)(r_, nOutputPlane, nOutputDepth, nOutputRows, nOutputCols);\n\n  if (nelem == 0 || beta == 0 || nelem != THTensor_(nElement)(r_))\n  {\n    THTensor_(zero)(r_);\n  }\n  else if (beta != 1)\n    THTensor_(mul)(r_, r_, beta);\n\n  input_data = THTensor_(data)(input);\n  weight_data = THTensor_(data)(kernel);\n  output_data = THTensor_(data)(r_);\n\n  nmaps = map->size[0];\n\n  for(k = 0; k < nmaps; k++)\n  {\n    /* get indices */\n    long from = (long)THTensor_(get2d)(map,k,0)-1;\n    long to   = (long)THTensor_(get2d)(map,k,1)-1;\n\n    /* get kernel */\n    real *ptr_weight = weight_data + k*kstride0;\n    /* get input */\n    real *ptr_input = input_data + from*istride0;\n    /* get output */\n    real *ptr_output = output_data + to*nOutputDepth*nOutputRows*nOutputCols;\n\n    /* do image, kernel convolution */\n    THTensor_(conv3d)(ptr_output,\n                      alpha,\n                      ptr_input,  nInputDepth, nInputRows,  nInputCols,\n                      ptr_weight, nKernelDepth, nKernelRows, nKernelCols,\n                      sdepth, srow, scol, vf, xc);\n  }\n  THTensor_(free)(input);\n  THTensor_(free)(kernel);\n}\n#endif\n"
  },
  {
    "path": "lib/TH/generic/THTensorConv.h",
    "content": "#ifndef TH_GENERIC_FILE\n#define TH_GENERIC_FILE \"generic/THTensorConv.h\"\n#else\n\nTH_API void THTensor_(validXCorr2Dptr)(real *r_,\n                                    real alpha,\n                                    real *t_, long ir, long ic,\n                                    real *k_, long kr, long kc,\n                                    long sr, long sc);\n\nTH_API void THTensor_(validConv2Dptr)(real *r_,\n                                   real alpha,\n                                   real *t_, long ir, long ic,\n                                   real *k_, long kr, long kc,\n                                   long sr, long sc);\n\nTH_API void THTensor_(fullXCorr2Dptr)(real *r_,\n                                   real alpha,\n                                   real *t_, long ir, long ic,\n                                   real *k_, long kr, long kc,\n                                   long sr, long sc);\n\nTH_API void THTensor_(fullConv2Dptr)(real *r_,\n                                  real alpha,\n                                  real *t_, long ir, long ic,\n                                  real *k_, long kr, long kc,\n                                  long sr, long sc);\n\nTH_API void THTensor_(validXCorr2DRevptr)(real *r_,\n                                       real alpha,\n                                       real *t_, long ir, long ic,\n                                       real *k_, long kr, long kc,\n                                       long sr, long sc);\n\nTH_API void THTensor_(conv2DRevger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol);\nTH_API void THTensor_(conv2DRevgerm)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol);\nTH_API void THTensor_(conv2Dger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc);\nTH_API void THTensor_(conv2Dmv)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc);\nTH_API void THTensor_(conv2Dmm)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc);\nTH_API void THTensor_(conv2Dmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc);\nTH_API void THTensor_(conv2Dcmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long srow, long scol, const char *vf, const char *xc);\n\nTH_API void THTensor_(validXCorr3Dptr)(real *r_,\n                                    real alpha,\n                                    real *t_, long it, long ir, long ic,\n                                    real *k_, long kt, long kr, long kc,\n                                    long st, long sr, long sc);\n\nTH_API void THTensor_(validConv3Dptr)(real *r_,\n                                   real alpha,\n                                   real *t_, long it, long ir, long ic,\n                                   real *k_, long kt, long kr, long kc,\n                                   long st, long sr, long sc);\n\nTH_API void THTensor_(fullXCorr3Dptr)(real *r_,\n                                   real alpha,\n                                   real *t_, long it, long ir, long ic,\n                                   real *k_, long kt, long kr, long kc,\n                                   long st, long sr, long sc);\n\nTH_API void THTensor_(fullConv3Dptr)(real *r_,\n                                  real alpha,\n                                  real *t_, long it, long ir, long ic,\n                                  real *k_, long kt, long kr, long kc,\n                                  long st, long sr, long sc);\n\nTH_API void THTensor_(validXCorr3DRevptr)(real *r_,\n                                       real alpha,\n                                       real *t_, long it, long ir, long ic,\n                                       real *k_, long kt, long kr, long kc,\n                                       long st, long sr, long sc);\n\nTH_API void THTensor_(conv3DRevger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol);\nTH_API void THTensor_(conv3Dger)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol, const char *vf, const char *xc);\nTH_API void THTensor_(conv3Dmv)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol, const char *vf, const char *xc);\nTH_API void THTensor_(conv3Dmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol, const char *vf, const char *xc);\nTH_API void THTensor_(conv3Dcmul)(THTensor *r_, real beta, real alpha, THTensor *t_, THTensor *k_, long sdepth, long srow, long scol, const char *vf, const char *xc);\n\n#endif\n"
  },
  {
    "path": "lib/TH/generic/THTensorCopy.c",
    "content": "#ifndef TH_GENERIC_FILE\n#define TH_GENERIC_FILE \"generic/THTensorCopy.c\"\n#else\n\nint THTensor_(copyTransposeValid)(THTensor *tensor, THTensor *src) {\n  const int MIN_SZ = 60 * 60;\n  return THTensor_(isContiguous)(tensor) &&\n         THTensor_(nDimension)(src) == 2 &&\n         THTensor_(stride)(src, 0) == 1 &&\n         THTensor_(stride)(src, 1) == THTensor_(size)(src, 0) &&\n         THTensor_(nElement)(tensor) >= MIN_SZ;\n}\n\n// special case copy where tensor is contiguous and src is a transposed matrix\n// This can be generalized to most copies, but it's tricker\nvoid THTensor_(copyTranspose)(THTensor *tensor, THTensor *src) {\n  #define MIN(x, y) (((x) < (y)) ? (x) : (y))\n  #define MAX(x, y) (((x) > (y)) ? (x) : (y))\n\n#ifdef TH_REAL_IS_BYTE\n  const int BLOCK_SZ = 120;\n#else\n  const int BLOCK_SZ = 60;\n#endif\n\n  THTensor *buf = THTensor_(newWithSize2d)(BLOCK_SZ, BLOCK_SZ);\n  real *sp = THTensor_(data)(src);\n  real *rp = THTensor_(data)(tensor);\n  real *bp = THTensor_(data)(buf);\n\n  long NR = THTensor_(size)(src, 0);\n  long NC = THTensor_(size)(src, 1);\n  for (long R = 0; R < NR; R += BLOCK_SZ) {\n    for (long C = 0; C < NC; C += BLOCK_SZ) {\n      real *spo = sp + R + C * NR;\n      real *rpo = rp + C + R * NC;\n\n      int nr = MIN(NR - R, BLOCK_SZ);\n      int nc = MIN(NC - C, BLOCK_SZ);\n\n      // 1. copy columns from src to buf\n      for (int c = 0; c < nc; c++) {\n        memcpy(bp + c * BLOCK_SZ, spo + c * NR, nr * sizeof(real));\n      }\n\n      // 2. transpose buf in place\n      int rc_max = MAX(nr, nc);\n      int rc_min = MIN(nr, nc);\n      for (int r = 0; r < rc_max; r++) {\n        int end = MIN(r, rc_min);\n        for (int c = 0; c < end; c++) {\n          real tmp = bp[r + BLOCK_SZ * c];\n          bp[r + BLOCK_SZ * c] = bp[r * BLOCK_SZ + c];\n          bp[r * BLOCK_SZ + c] = tmp;\n        }\n      }\n\n      // 3. copy rows from buf to dst\n      for (int r = 0; r < nr; r++) {\n        memcpy(rpo + r * NC, bp + r * BLOCK_SZ, nc * sizeof(real));\n      }\n    }\n  }\n  THTensor_(free)(buf);\n  #undef MIN\n  #undef MAX\n}\n\nvoid THTensor_(copy)(THTensor *tensor, THTensor *src)\n{\n  if (tensor == src) return;\n  if (THTensor_(isContiguous)(tensor) && THTensor_(isContiguous)(src) && THTensor_(nElement)(tensor) == THTensor_(nElement)(src)) {\n    real *sp = THTensor_(data)(src);\n    real *rp = THTensor_(data)(tensor);\n    ptrdiff_t sz = THTensor_(nElement)(tensor);\n#ifndef TH_REAL_IS_HALF\n    THVector_(copy)(rp, sp, sz);\n#else\n    memcpy(rp, sp, sz * sizeof(real));\n#endif\n#ifndef TH_REAL_IS_HALF\n  } else if (THTensor_(copyTransposeValid)(tensor, src)) {\n    THTensor_(copyTranspose)(tensor, src);\n#endif\n  } else {\n    TH_TENSOR_APPLY2(real, tensor, real, src, *tensor_data = *src_data;)\n  }\n}\n\n#define IMPLEMENT_THTensor_COPY(TYPENAMESRC, TYPE_SRC) \\\nvoid THTensor_(copy##TYPENAMESRC)(THTensor *tensor, TH##TYPENAMESRC##Tensor *src) \\\n{ \\\n  TH_TENSOR_APPLY2(real, tensor, TYPE_SRC, src, *tensor_data = (real)(*src_data);) \\\n}\n\n#define IMPLEMENT_THTensor_COPY_TO_HALF(TYPENAMESRC, TYPE_SRC) \\\nvoid THTensor_(copy##TYPENAMESRC)(THTensor *tensor, TH##TYPENAMESRC##Tensor *src) \\\n{ \\\n TH_TENSOR_APPLY2(real, tensor, TYPE_SRC, src, *tensor_data = TH_float2half((float)*src_data);) \\\n}\n\n#define IMPLEMENT_THTensor_COPY_FROM_HALF(TYPENAMESRC, TYPE_SRC) \\\nvoid THTensor_(copy##TYPENAMESRC)(THTensor *tensor, TH##TYPENAMESRC##Tensor *src) \\\n{ \\\n TH_TENSOR_APPLY2(real, tensor, TYPE_SRC, src, *tensor_data = (real)TH_half2float(*src_data);) \\\n}\n\n#define IMPLEMENT_THTensor_COPY_TO_FROM_HALF(TYPENAMESRC, TYPE_SRC) \\\nvoid THTensor_(copy##TYPENAMESRC)(THTensor *tensor, TH##TYPENAMESRC##Tensor *src) \\\n{ \\\n TH_TENSOR_APPLY2(real, tensor, TYPE_SRC, src, *tensor_data = *src_data;) \\\n}\n\n#ifndef TH_REAL_IS_HALF\nIMPLEMENT_THTensor_COPY(Byte, unsigned char)\nIMPLEMENT_THTensor_COPY(Char, char)\nIMPLEMENT_THTensor_COPY(Short, short)\nIMPLEMENT_THTensor_COPY(Int, int)\nIMPLEMENT_THTensor_COPY(Long, long)\nIMPLEMENT_THTensor_COPY(Float, float)\nIMPLEMENT_THTensor_COPY(Double, double)\nIMPLEMENT_THTensor_COPY_FROM_HALF(Half, THHalf)\n#else\n/* only allow pass-through for Half */\nIMPLEMENT_THTensor_COPY_TO_FROM_HALF(Half, THHalf)\nIMPLEMENT_THTensor_COPY_TO_HALF(Byte, unsigned char)\nIMPLEMENT_THTensor_COPY_TO_HALF(Char, char)\nIMPLEMENT_THTensor_COPY_TO_HALF(Short, short)\nIMPLEMENT_THTensor_COPY_TO_HALF(Int, int)\nIMPLEMENT_THTensor_COPY_TO_HALF(Long, long)\nIMPLEMENT_THTensor_COPY_TO_HALF(Float, float)\nIMPLEMENT_THTensor_COPY_TO_HALF(Double, double)\n\n#endif /* REAL_IS_HALF */\n\n#endif\n"
  },
  {
    "path": "lib/TH/generic/THTensorCopy.h",
    "content": "#ifndef TH_GENERIC_FILE\n#define TH_GENERIC_FILE \"generic/THTensorCopy.h\"\n#else\n\n/* Support for copy between different Tensor types */\n\nTH_API void THTensor_(copy)(THTensor *tensor, THTensor *src);\nTH_API void THTensor_(copyByte)(THTensor *tensor, struct THByteTensor *src);\nTH_API void THTensor_(copyChar)(THTensor *tensor, struct THCharTensor *src);\nTH_API void THTensor_(copyShort)(THTensor *tensor, struct THShortTensor *src);\nTH_API void THTensor_(copyInt)(THTensor *tensor, struct THIntTensor *src);\nTH_API void THTensor_(copyLong)(THTensor *tensor, struct THLongTensor *src);\nTH_API void THTensor_(copyFloat)(THTensor *tensor, struct THFloatTensor *src);\nTH_API void THTensor_(copyDouble)(THTensor *tensor, struct THDoubleTensor *src);\nTH_API void THTensor_(copyHalf)(THTensor *tensor, struct THHalfTensor *src);\n\n#endif\n"
  },
  {
    "path": "lib/TH/generic/THTensorLapack.c",
    "content": "#ifndef TH_GENERIC_FILE\n#define TH_GENERIC_FILE \"generic/THTensorLapack.c\"\n#else\n\n/*\nCheck if self is transpose of a contiguous matrix\n*/\nstatic int THTensor_(isTransposedContiguous)(THTensor *self)\n{\n  return self->stride[0] == 1 && self->stride[1] == self->size[0];\n}\n/*\nIf a matrix is a regular contiguous matrix, make sure it is transposed\nbecause this is what we return from Lapack calls.\n*/\nstatic void THTensor_(checkTransposed)(THTensor *self)\n{\n  if(THTensor_(isContiguous)(self))\n    THTensor_(transpose)(self, NULL, 0, 1);\n  return;\n}\n/*\nnewContiguous followed by transpose\nSimilar to (newContiguous), but checks if the transpose of the matrix\nis contiguous and also limited to 2D matrices.\n*/\nstatic THTensor *THTensor_(newTransposedContiguous)(THTensor *self)\n{\n  THTensor *tensor;\n  if(THTensor_(isTransposedContiguous)(self))\n  {\n    THTensor_(retain)(self);\n    tensor = self;\n  }\n  else\n  {\n    tensor = THTensor_(newContiguous)(self);\n    THTensor_(transpose)(tensor, NULL, 0, 1);\n  }\n\n  return tensor;\n}\n\n/*\nGiven the result tensor and src tensor, decide if the lapack call should use the\nprovided result tensor or should allocate a new space to put the result in.\n\nThe returned tensor have to be freed by the calling function.\n\nnrows is required, because some lapack calls, require output space smaller than\ninput space, like underdetermined gels.\n*/\nstatic THTensor *THTensor_(checkLapackClone)(THTensor *result, THTensor *src, int nrows)\n{\n  /* check if user wants to reuse src and if it is correct shape/size */\n  if (src == result && THTensor_(isTransposedContiguous)(src) && src->size[1] == nrows)\n    THTensor_(retain)(result);\n  else if(src == result || result == NULL) /* in this case, user wants reuse of src, but its structure is not OK */\n    result = THTensor_(new)();\n  else\n    THTensor_(retain)(result);\n  return result;\n}\n\n/*\nSame as cloneColumnMajor, but accepts nrows argument, because some lapack calls require\nthe resulting tensor to be larger than src.\n*/\nstatic THTensor *THTensor_(cloneColumnMajorNrows)(THTensor *self, THTensor *src, int nrows)\n{\n  THTensor *result;\n  THTensor *view;\n\n  if (src == NULL)\n    src = self;\n  result = THTensor_(checkLapackClone)(self, src, nrows);\n  if (src == result)\n    return result;\n\n  THTensor_(resize2d)(result, src->size[1], nrows);\n  THTensor_(checkTransposed)(result);\n\n  if (src->size[0] == nrows)\n    THTensor_(copy)(result, src);\n  else\n  {\n    view = THTensor_(newNarrow)(result, 0, 0, src->size[0]);\n    THTensor_(copy)(view, src);\n    THTensor_(free)(view);\n  }\n  return result;\n}\n\n/*\nCreate a clone of src in self column major order for use with Lapack.\nIf src == self, a new tensor is allocated, in any case, the return tensor should be\nfreed by calling function.\n*/\nstatic THTensor *THTensor_(cloneColumnMajor)(THTensor *self, THTensor *src)\n{\n  return THTensor_(cloneColumnMajorNrows)(self, src, src->size[0]);\n}\n\nvoid THTensor_(gesv)(THTensor *rb_, THTensor *ra_, THTensor *b, THTensor *a)\n{\n  int free_b = 0;\n  if (a == NULL) a = ra_;\n  if (b == NULL) b = rb_;\n  THArgCheck(a->nDimension == 2, 2, \"A should have 2 dimensions, but has %d\",\n      a->nDimension);\n  THArgCheck(b->nDimension == 1 || b->nDimension == 2, 1, \"B should have 1 or 2 \"\n      \"dimensions, but has %d\", b->nDimension);\n  THArgCheck(a->size[0] == a->size[1], 2, \"A should be square, but is %ldx%ld\",\n      a->size[0], a->size[1]);\n  THArgCheck(a->size[0] == b->size[0], 2, \"A,B size incompatible - A has %ld \"\n      \"rows, B has %ld\", a->size[0], b->size[0]);\n\n  if (b->nDimension == 1) {\n    b = THTensor_(newWithStorage2d)(b->storage, b->storageOffset, b->size[0],\n            b->stride[0], 1, 0);\n    free_b = 1;\n  }\n\n  int n, nrhs, lda, ldb, info;\n  THIntTensor *ipiv;\n  THTensor *ra__;  // working version of A matrix to be passed into lapack GELS\n  THTensor *rb__;  // working version of B matrix to be passed into lapack GELS\n\n  ra__ = THTensor_(cloneColumnMajor)(ra_, a);\n  rb__ = THTensor_(cloneColumnMajor)(rb_, b);\n\n  n    = (int)ra__->size[0];\n  nrhs = (int)rb__->size[1];\n  lda  = n;\n  ldb  = n;\n\n  ipiv = THIntTensor_newWithSize1d((long)n);\n  THLapack_(gesv)(n, nrhs,\n\t\t  THTensor_(data)(ra__), lda, THIntTensor_data(ipiv),\n\t\t  THTensor_(data)(rb__), ldb, &info);\n\n  THLapackCheckWithCleanup(\"Lapack Error in %s : U(%d,%d) is zero, singular U.\",\n                           THCleanup(\n                               THTensor_(free)(ra__);\n                               THTensor_(free)(rb__);\n                               THIntTensor_free(ipiv);\n                               if (free_b) THTensor_(free)(b);),\n                           \"gesv\", info, info);\n\n  THTensor_(freeCopyTo)(ra__, ra_);\n  THTensor_(freeCopyTo)(rb__, rb_);\n  THIntTensor_free(ipiv);\n  if (free_b) THTensor_(free)(b);\n}\n\nvoid THTensor_(trtrs)(THTensor *rb_, THTensor *ra_, THTensor *b, THTensor *a,\n                      const char *uplo, const char *trans, const char *diag)\n{\n  int free_b = 0;\n  if (a == NULL) a = ra_;\n  if (b == NULL) b = rb_;\n  THArgCheck(a->nDimension == 2, 2, \"A should have 2 dimensions, but has %d\",\n      a->nDimension);\n  THArgCheck(b->nDimension == 1 || b->nDimension == 2, 1, \"B should have 1 or 2 \"\n      \"dimensions, but has %d\", b->nDimension);\n  THArgCheck(a->size[0] == a->size[1], 2, \"A should be square, but is %ldx%ld\",\n      a->size[0], a->size[1]);\n  THArgCheck(a->size[0] == b->size[0], 2, \"A,B size incompatible - A has %ld \"\n      \"rows, B has %ld\", a->size[0], b->size[0]);\n\n  if (b->nDimension == 1) {\n    b = THTensor_(newWithStorage2d)(b->storage, b->storageOffset, b->size[0],\n            b->stride[0], 1, 0);\n    free_b = 1;\n  }\n\n  int n, nrhs, lda, ldb, info;\n  THTensor *ra__; // working version of A matrix to be passed into lapack TRTRS\n  THTensor *rb__; // working version of B matrix to be passed into lapack TRTRS\n\n  ra__ = THTensor_(cloneColumnMajor)(ra_, a);\n  rb__ = THTensor_(cloneColumnMajor)(rb_, b);\n\n  n    = (int)ra__->size[0];\n  nrhs = (int)rb__->size[1];\n  lda  = n;\n  ldb  = n;\n\n  THLapack_(trtrs)(uplo[0], trans[0], diag[0], n, nrhs,\n                   THTensor_(data)(ra__), lda,\n                   THTensor_(data)(rb__), ldb, &info);\n\n\n  THLapackCheckWithCleanup(\"Lapack Error in %s : A(%d,%d) is zero, singular A\",\n                           THCleanup(\n                              THTensor_(free)(ra__);\n                              THTensor_(free)(rb__);\n                              if (free_b) THTensor_(free)(b);),\n                           \"trtrs\", info, info);\n\n  THTensor_(freeCopyTo)(ra__, ra_);\n  THTensor_(freeCopyTo)(rb__, rb_);\n  if (free_b) THTensor_(free)(b);\n}\n\nvoid THTensor_(gels)(THTensor *rb_, THTensor *ra_, THTensor *b, THTensor *a)\n{\n  int free_b = 0;\n  // Note that a = NULL is interpreted as a = ra_, and b = NULL as b = rb_.\n  if (a == NULL) a = ra_;\n  if (b == NULL) b = rb_;\n  THArgCheck(a->nDimension == 2, 2, \"A should have 2 dimensions, but has %d\",\n      a->nDimension);\n  THArgCheck(b->nDimension == 1 || b->nDimension == 2, 1, \"B should have 1 or 2 \"\n      \"dimensions, but has %d\", b->nDimension);\n  THArgCheck(a->size[0] == b->size[0], 2, \"A,B size incompatible - A has %ld \"\n      \"rows, B has %ld\", a->size[0], b->size[0]);\n\n  if (b->nDimension == 1) {\n    b = THTensor_(newWithStorage2d)(b->storage, b->storageOffset, b->size[0],\n            b->stride[0], 1, 0);\n    free_b = 1;\n  }\n\n  int m, n, nrhs, lda, ldb, info, lwork;\n  THTensor *work = NULL;\n  real wkopt = 0;\n\n  THTensor *ra__ = NULL;  // working version of A matrix to be passed into lapack GELS\n  THTensor *rb__ = NULL;  // working version of B matrix to be passed into lapack GELS\n\n  ra__ = THTensor_(cloneColumnMajor)(ra_, a);\n\n  m = ra__->size[0];\n  n = ra__->size[1];\n  lda = m;\n  ldb = (m > n) ? m : n;\n\n  rb__ = THTensor_(cloneColumnMajorNrows)(rb_, b, ldb);\n\n  nrhs = rb__->size[1];\n  info = 0;\n\n\n  /* get optimal workspace size */\n  THLapack_(gels)('N', m, n, nrhs, THTensor_(data)(ra__), lda,\n\t\t  THTensor_(data)(rb__), ldb,\n\t\t  &wkopt, -1, &info);\n  lwork = (int)wkopt;\n  work = THTensor_(newWithSize1d)(lwork);\n  THLapack_(gels)('N', m, n, nrhs, THTensor_(data)(ra__), lda,\n\t\t  THTensor_(data)(rb__), ldb,\n\t\t  THTensor_(data)(work), lwork, &info);\n\n  THLapackCheckWithCleanup(\"Lapack Error in %s : The %d-th diagonal element of the triangular factor of A is zero\",\n                           THCleanup(THTensor_(free)(ra__);\n                                     THTensor_(free)(rb__);\n                                     THTensor_(free)(work);\n                                     if (free_b) THTensor_(free)(b);),\n                           \"gels\", info,\"\");\n\n  /* rb__ is currently ldb by nrhs; resize it to n by nrhs */\n  rb__->size[0] = n;\n  if (rb__ != rb_)\n    THTensor_(resize2d)(rb_, n, nrhs);\n\n  THTensor_(freeCopyTo)(ra__, ra_);\n  THTensor_(freeCopyTo)(rb__, rb_);\n  THTensor_(free)(work);\n  if (free_b) THTensor_(free)(b);\n}\n\nvoid THTensor_(geev)(THTensor *re_, THTensor *rv_, THTensor *a_, const char *jobvr)\n{\n  int n, lda, lwork, info, ldvr;\n  THTensor *work, *wi, *wr, *a;\n  real wkopt;\n  real *rv_data;\n  long i;\n\n  THTensor *re__ = NULL;\n  THTensor *rv__ = NULL;\n\n  THArgCheck(a_->nDimension == 2, 1, \"A should be 2 dimensional\");\n  THArgCheck(a_->size[0] == a_->size[1], 1,\"A should be square\");\n\n  /* we want to definitely clone a_ for geev*/\n  a = THTensor_(cloneColumnMajor)(NULL, a_);\n\n  n = a->size[0];\n  lda = n;\n\n  wi = THTensor_(newWithSize1d)(n);\n  wr = THTensor_(newWithSize1d)(n);\n\n  rv_data = NULL;\n  ldvr = 1;\n  if (*jobvr == 'V')\n  {\n    THTensor_(resize2d)(rv_,n,n);\n    /* guard against someone passing a correct size, but wrong stride */\n    rv__ = THTensor_(newTransposedContiguous)(rv_);\n    rv_data = THTensor_(data)(rv__);\n    ldvr = n;\n  }\n  THTensor_(resize2d)(re_,n,2);\n  re__ = THTensor_(newContiguous)(re_);\n\n  /* get optimal workspace size */\n  THLapack_(geev)('N', jobvr[0], n, THTensor_(data)(a), lda, THTensor_(data)(wr), THTensor_(data)(wi),\n      NULL, 1, rv_data, ldvr, &wkopt, -1, &info);\n\n  lwork = (int)wkopt;\n  work = THTensor_(newWithSize1d)(lwork);\n\n  THLapack_(geev)('N', jobvr[0], n, THTensor_(data)(a), lda, THTensor_(data)(wr), THTensor_(data)(wi),\n      NULL, 1, rv_data, ldvr, THTensor_(data)(work), lwork, &info);\n\n  THLapackCheckWithCleanup(\" Lapack Error in %s : %d off-diagonal elements of an didn't converge to zero\",\n                           THCleanup(THTensor_(free)(re__);\n                                     THTensor_(free)(rv__);\n                                     THTensor_(free)(a);\n                                     THTensor_(free)(wi);\n                                     THTensor_(free)(wr);\n                                     THTensor_(free)(work);),\n                           \"geev\", info,\"\");\n\n  {\n    real *re_data = THTensor_(data)(re__);\n    real *wi_data = THTensor_(data)(wi);\n    real *wr_data = THTensor_(data)(wr);\n    for (i=0; i<n; i++)\n    {\n      re_data[2*i] = wr_data[i];\n      re_data[2*i+1] = wi_data[i];\n    }\n  }\n\n  if (*jobvr == 'V')\n  {\n    THTensor_(checkTransposed)(rv_);\n    THTensor_(freeCopyTo)(rv__, rv_);\n  }\n  THTensor_(freeCopyTo)(re__, re_);\n  THTensor_(free)(a);\n  THTensor_(free)(wi);\n  THTensor_(free)(wr);\n  THTensor_(free)(work);\n}\n\nvoid THTensor_(syev)(THTensor *re_, THTensor *rv_, THTensor *a, const char *jobz, const char *uplo)\n{\n  if (a == NULL) a = rv_;\n  THArgCheck(a->nDimension == 2, 1, \"A should be 2 dimensional\");\n  THArgCheck(a->size[0] == a->size[1], 1,\"A should be square\");\n\n  int n, lda, lwork, info;\n  THTensor *work;\n  real wkopt;\n\n  THTensor *rv__ = NULL;\n  THTensor *re__ = NULL;\n\n  rv__ = THTensor_(cloneColumnMajor)(rv_, a);\n\n  n = rv__->size[0];\n  lda = n;\n\n  THTensor_(resize1d)(re_,n);\n  re__ = THTensor_(newContiguous)(re_);\n\n  /* get optimal workspace size */\n  THLapack_(syev)(jobz[0], uplo[0], n, THTensor_(data)(rv__), lda,\n\t\t  THTensor_(data)(re_), &wkopt, -1, &info);\n  lwork = (int)wkopt;\n  work = THTensor_(newWithSize1d)(lwork);\n  THLapack_(syev)(jobz[0], uplo[0], n, THTensor_(data)(rv__), lda,\n\t\t  THTensor_(data)(re_), THTensor_(data)(work), lwork, &info);\n\n  THLapackCheckWithCleanup(\"Lapack Error %s : %d off-diagonal elements didn't converge to zero\",\n                           THCleanup(THTensor_(free)(rv__);\n                                     THTensor_(free)(re__);\n                                     THTensor_(free)(work);),\n                           \"syev\", info,\"\");\n\n  THTensor_(freeCopyTo)(rv__, rv_);\n  THTensor_(freeCopyTo)(re__, re_);\n  THTensor_(free)(work);\n}\n\nvoid THTensor_(gesvd)(THTensor *ru_, THTensor *rs_, THTensor *rv_, THTensor *a, const char* jobu)\n{\n  THTensor *ra_ = THTensor_(new)();\n  THTensor_(gesvd2)(ru_, rs_, rv_,  ra_, a, jobu);\n  THTensor_(free)(ra_);\n}\n\nvoid THTensor_(gesvd2)(THTensor *ru_, THTensor *rs_, THTensor *rv_, THTensor *ra_, THTensor *a, const char* jobu)\n{\n  if (a == NULL) a = ra_;\n  THArgCheck(a->nDimension == 2, 1, \"A should be 2 dimensional\");\n\n  int k,m, n, lda, ldu, ldvt, lwork, info;\n  THTensor *work;\n  THTensor *rvf_ = THTensor_(new)();\n  real wkopt;\n\n  THTensor *ra__ = NULL;\n  THTensor *ru__ = NULL;\n  THTensor *rs__ = NULL;\n  THTensor *rv__ = NULL;\n\n  ra__ = THTensor_(cloneColumnMajor)(ra_, a);\n\n  m = ra__->size[0];\n  n = ra__->size[1];\n  k = (m < n ? m : n);\n\n  lda = m;\n  ldu = m;\n  ldvt = n;\n\n  THTensor_(resize1d)(rs_,k);\n  THTensor_(resize2d)(rvf_,ldvt,n);\n  if (*jobu == 'A')\n    THTensor_(resize2d)(ru_,m,ldu);\n  else\n    THTensor_(resize2d)(ru_,k,ldu);\n\n  THTensor_(checkTransposed)(ru_);\n\n  /* guard against someone passing a correct size, but wrong stride */\n  ru__ = THTensor_(newTransposedContiguous)(ru_);\n  rs__ = THTensor_(newContiguous)(rs_);\n  rv__ = THTensor_(newContiguous)(rvf_);\n\n  THLapack_(gesvd)(jobu[0],jobu[0],\n\t\t   m,n,THTensor_(data)(ra__),lda,\n\t\t   THTensor_(data)(rs__),\n\t\t   THTensor_(data)(ru__),\n\t\t   ldu,\n\t\t   THTensor_(data)(rv__), ldvt,\n\t\t   &wkopt, -1, &info);\n  lwork = (int)wkopt;\n  work = THTensor_(newWithSize1d)(lwork);\n  THLapack_(gesvd)(jobu[0],jobu[0],\n\t\t   m,n,THTensor_(data)(ra__),lda,\n\t\t   THTensor_(data)(rs__),\n\t\t   THTensor_(data)(ru__),\n\t\t   ldu,\n\t\t   THTensor_(data)(rv__), ldvt,\n\t\t   THTensor_(data)(work),lwork, &info);\n\n  THLapackCheckWithCleanup(\" Lapack Error %s : %d superdiagonals failed to converge.\",\n                           THCleanup(\n                               THTensor_(free)(ru__);\n                               THTensor_(free)(rs__);\n                               THTensor_(free)(rv__);\n                               THTensor_(free)(ra__);\n                               THTensor_(free)(work);),\n                           \"gesvd\", info,\"\");\n\n  if (*jobu == 'S')\n    THTensor_(narrow)(rv__,NULL,1,0,k);\n\n  THTensor_(freeCopyTo)(ru__, ru_);\n  THTensor_(freeCopyTo)(rs__, rs_);\n  THTensor_(freeCopyTo)(rv__, rvf_);\n  THTensor_(freeCopyTo)(ra__, ra_);\n  THTensor_(free)(work);\n\n  if (*jobu == 'S') {\n    THTensor_(narrow)(rvf_,NULL,1,0,k);\n  }\n  THTensor_(resizeAs)(rv_, rvf_);\n  THTensor_(copy)(rv_, rvf_);\n  THTensor_(free)(rvf_);\n}\n\nvoid THTensor_(getri)(THTensor *ra_, THTensor *a)\n{\n  if (a == NULL) a = ra_;\n  THArgCheck(a->nDimension == 2, 1, \"A should be 2 dimensional\");\n  THArgCheck(a->size[0] == a->size[1], 1, \"A should be square\");\n\n  int m, n, lda, info, lwork;\n  real wkopt;\n  THIntTensor *ipiv;\n  THTensor *work;\n  THTensor *ra__ = NULL;\n\n  ra__ = THTensor_(cloneColumnMajor)(ra_, a);\n\n  m = ra__->size[0];\n  n = ra__->size[1];\n  lda = m;\n  ipiv = THIntTensor_newWithSize1d((long)m);\n\n  /* Run LU */\n  THLapack_(getrf)(n, n, THTensor_(data)(ra__), lda, THIntTensor_data(ipiv), &info);\n  THLapackCheckWithCleanup(\"Lapack Error %s : U(%d,%d) is 0, U is singular\",\n                           THCleanup(\n                               THTensor_(free)(ra__);\n                               THIntTensor_free(ipiv);),\n                           \"getrf\", info, info);\n\n  /* Run inverse */\n  THLapack_(getri)(n, THTensor_(data)(ra__), lda, THIntTensor_data(ipiv), &wkopt, -1, &info);\n  lwork = (int)wkopt;\n  work = THTensor_(newWithSize1d)(lwork);\n  THLapack_(getri)(n, THTensor_(data)(ra__), lda, THIntTensor_data(ipiv), THTensor_(data)(work), lwork, &info);\n  THLapackCheckWithCleanup(\"Lapack Error %s : U(%d,%d) is 0, U is singular\",\n                           THCleanup(\n                               THTensor_(free)(ra__);\n                               THTensor_(free)(work);\n                               THIntTensor_free(ipiv);),\n                           \"getri\", info, info);\n\n  THTensor_(freeCopyTo)(ra__, ra_);\n  THTensor_(free)(work);\n  THIntTensor_free(ipiv);\n}\n\nvoid THTensor_(clearUpLoTriangle)(THTensor *a, const char *uplo)\n{\n  THArgCheck(a->nDimension == 2, 1, \"A should be 2 dimensional\");\n  THArgCheck(a->size[0] == a->size[1], 1, \"A should be square\");\n\n  int n = a->size[0];\n\n  /* Build full matrix */\n  real *p = THTensor_(data)(a);\n  long i, j;\n\n  /* Upper Triangular Case */\n  if (uplo[0] == 'U')\n  {\n    /* Clear lower triangle (excluding diagonals) */\n    for (i=0; i<n; i++) {\n     for (j=i+1; j<n; j++) {\n        p[n*i + j] = 0;\n      }\n    }\n  }\n  /* Lower Triangular Case */\n  else if (uplo[0] == 'L')\n  {\n    /* Clear upper triangle (excluding diagonals) */\n    for (i=0; i<n; i++) {\n      for (j=0; j<i; j++) {\n        p[n*i + j] = 0;\n      }\n    }\n  }\n}\n\nvoid THTensor_(copyUpLoTriangle)(THTensor *a, const char *uplo)\n{\n  THArgCheck(a->nDimension == 2, 1, \"A should be 2 dimensional\");\n  THArgCheck(a->size[0] == a->size[1], 1, \"A should be square\");\n\n  int n = a->size[0];\n\n  /* Build full matrix */\n  real *p = THTensor_(data)(a);\n  long i, j;\n\n  /* Upper Triangular Case */\n  if (uplo[0] == 'U')\n  {\n    /* Clear lower triangle (excluding diagonals) */\n    for (i=0; i<n; i++) {\n     for (j=i+1; j<n; j++) {\n        p[n*i + j] = p[n*j+i];\n      }\n    }\n  }\n  /* Lower Triangular Case */\n  else if (uplo[0] == 'L')\n  {\n    /* Clear upper triangle (excluding diagonals) */\n    for (i=0; i<n; i++) {\n      for (j=0; j<i; j++) {\n        p[n*i + j] = p[n*j+i];\n      }\n    }\n  }\n}\n\nvoid THTensor_(potrf)(THTensor *ra_, THTensor *a, const char *uplo)\n{\n  if (a == NULL) a = ra_;\n  THArgCheck(a->nDimension == 2, 1, \"A should be 2 dimensional\");\n  THArgCheck(a->size[0] == a->size[1], 1, \"A should be square\");\n\n  int n, lda, info;\n  THTensor *ra__ = NULL;\n\n  ra__ = THTensor_(cloneColumnMajor)(ra_, a);\n\n  n = ra__->size[0];\n  lda = n;\n\n  /* Run Factorization */\n  THLapack_(potrf)(uplo[0], n, THTensor_(data)(ra__), lda, &info);\n  THLapackCheckWithCleanup(\"Lapack Error in %s : the leading minor of order %d is not positive definite\",\n                           THCleanup(THTensor_(free)(ra__);),\n                           \"potrf\", info, \"\");\n\n  THTensor_(clearUpLoTriangle)(ra__, uplo);\n  THTensor_(freeCopyTo)(ra__, ra_);\n}\n\nvoid THTensor_(potrs)(THTensor *rb_, THTensor *b, THTensor *a, const char *uplo)\n{\n  int free_b = 0;\n  if (b == NULL) b = rb_;\n\n  THArgCheck(a->nDimension == 2, 2, \"A should have 2 dimensions, but has %d\",\n      a->nDimension);\n  THArgCheck(b->nDimension == 1 || b->nDimension == 2, 1, \"B should have 1 or 2 \"\n      \"dimensions, but has %d\", b->nDimension);\n  THArgCheck(a->size[0] == a->size[1], 2, \"A should be square, but is %ldx%ld\",\n      a->size[0], a->size[1]);\n  THArgCheck(a->size[0] == b->size[0], 2, \"A,B size incompatible - A has %ld \"\n      \"rows, B has %ld\", a->size[0], b->size[0]);\n\n  if (b->nDimension == 1) {\n    b = THTensor_(newWithStorage2d)(b->storage, b->storageOffset, b->size[0],\n            b->stride[0], 1, 0);\n    free_b = 1;\n  }\n\n  int n, nrhs, lda, ldb, info;\n  THTensor *ra__; // working version of A matrix to be passed into lapack TRTRS\n  THTensor *rb__; // working version of B matrix to be passed into lapack TRTRS\n\n  ra__ = THTensor_(cloneColumnMajor)(NULL, a);\n  rb__ = THTensor_(cloneColumnMajor)(rb_, b);\n\n  n    = (int)ra__->size[0];\n  nrhs = (int)rb__->size[1];\n  lda  = n;\n  ldb  = n;\n\n  THLapack_(potrs)(uplo[0], n, nrhs, THTensor_(data)(ra__),\n                   lda, THTensor_(data)(rb__), ldb, &info);\n\n\n  THLapackCheckWithCleanup(\"Lapack Error in %s : A(%d,%d) is zero, singular A\",\n                           THCleanup(\n                               THTensor_(free)(ra__);\n                               THTensor_(free)(rb__);\n                               if (free_b) THTensor_(free)(b);),\n                           \"potrs\", info, info);\n\n  if (free_b) THTensor_(free)(b);\n  THTensor_(free)(ra__);\n  THTensor_(freeCopyTo)(rb__, rb_);\n}\n\nvoid THTensor_(potri)(THTensor *ra_, THTensor *a, const char *uplo)\n{\n  if (a == NULL) a = ra_;\n  THArgCheck(a->nDimension == 2, 1, \"A should be 2 dimensional\");\n  THArgCheck(a->size[0] == a->size[1], 1, \"A should be square\");\n\n  int n, lda, info;\n  THTensor *ra__ = NULL;\n\n  ra__ = THTensor_(cloneColumnMajor)(ra_, a);\n\n  n = ra__->size[0];\n  lda = n;\n\n  /* Run inverse */\n  THLapack_(potri)(uplo[0], n, THTensor_(data)(ra__), lda, &info);\n  THLapackCheckWithCleanup(\"Lapack Error %s : A(%d,%d) is 0, A cannot be factorized\",\n                           THCleanup(THTensor_(free)(ra__);),\n                           \"potri\", info, info);\n\n  THTensor_(copyUpLoTriangle)(ra__, uplo);\n  THTensor_(freeCopyTo)(ra__, ra_);\n}\n\n/*\n Computes the Cholesky factorization with complete pivoting of a real symmetric\n positive semidefinite matrix.\n\n Args:\n * `ra_`    - result Tensor in which to store the factor U or L from the\n              Cholesky factorization.\n * `rpiv_`  - result IntTensor containing sparse permutation matrix P, encoded\n              as P[rpiv_[k], k] = 1.\n * `a`      - input Tensor; the input matrix to factorize.\n * `uplo`   - string; specifies whether the upper or lower triangular part of\n              the symmetric matrix A is stored. \"U\"/\"L\" for upper/lower\n              triangular.\n * `tol`    - double; user defined tolerance, or < 0 for automatic choice.\n              The algorithm terminates when the pivot <= tol.\n */\nvoid THTensor_(pstrf)(THTensor *ra_, THIntTensor *rpiv_, THTensor *a, const char *uplo, real tol) {\n  THArgCheck(a->nDimension == 2, 1, \"A should be 2 dimensional\");\n  THArgCheck(a->size[0] == a->size[1], 1, \"A should be square\");\n\n  int n = a->size[0];\n\n  THTensor *ra__ = THTensor_(cloneColumnMajor)(ra_, a);\n  THIntTensor_resize1d(rpiv_, n);\n\n  // Allocate working tensor\n  THTensor *work = THTensor_(newWithSize1d)(2 * n);\n\n  // Run Cholesky factorization\n  int lda = n;\n  int rank, info;\n\n  THLapack_(pstrf)(uplo[0], n, THTensor_(data)(ra__), lda,\n                   THIntTensor_data(rpiv_), &rank, tol,\n                   THTensor_(data)(work), &info);\n\n  THLapackCheckWithCleanup(\"Lapack Error %s : matrix is rank deficient or not positive semidefinite\",\n                           THCleanup(\n                               THTensor_(free)(ra__);\n                               THTensor_(free)(work);),\n                           \"pstrf\", info,\"\");\n\n  THTensor_(clearUpLoTriangle)(ra__, uplo);\n\n  THTensor_(freeCopyTo)(ra__, ra_);\n  THTensor_(free)(work);\n}\n\n/*\n  Perform a QR decomposition of a matrix.\n\n  In LAPACK, two parts of the QR decomposition are implemented as two separate\n  functions: geqrf and orgqr. For flexibility and efficiency, these are wrapped\n  directly, below - but to make the common usage convenient, we also provide\n  this function, which calls them both and returns the results in a more\n  intuitive form.\n\n  Args:\n  * `rq_` - result Tensor in which to store the Q part of the decomposition.\n  * `rr_` - result Tensor in which to store the R part of the decomposition.\n  * `a`   - input Tensor; the matrix to decompose.\n\n*/\nvoid THTensor_(qr)(THTensor *rq_, THTensor *rr_, THTensor *a)\n{\n  int m = a->size[0];\n  int n = a->size[1];\n  int k = (m < n ? m : n);\n  THTensor *ra_ = THTensor_(new)();\n  THTensor *rtau_ = THTensor_(new)();\n  THTensor *rr__ = THTensor_(new)();\n  THTensor_(geqrf)(ra_, rtau_, a);\n  THTensor_(resize2d)(rr__, k, ra_->size[1]);\n  THTensor_(narrow)(rr__, ra_, 0, 0, k);\n  THTensor_(triu)(rr_, rr__, 0);\n  THTensor_(resize2d)(rq_, ra_->size[0], k);\n  THTensor_(orgqr)(rq_, ra_, rtau_);\n  THTensor_(narrow)(rq_, rq_, 1, 0, k);\n  THTensor_(free)(ra_);\n  THTensor_(free)(rtau_);\n  THTensor_(free)(rr__);\n}\n\n/*\n  The geqrf function does the main work of QR-decomposing a matrix.\n  However, rather than producing a Q matrix directly, it produces a sequence of\n  elementary reflectors which may later be composed to construct Q - for example\n  with the orgqr function, below.\n\n  Args:\n  * `ra_`   - Result matrix which will contain:\n              i)  The elements of R, on and above the diagonal.\n              ii) Directions of the reflectors implicitly defining Q.\n  * `rtau_` - Result tensor which will contain the magnitudes of the reflectors\n              implicitly defining Q.\n  * `a`     - Input matrix, to decompose. If NULL, `ra_` is used as input.\n\n  For further details, please see the LAPACK documentation.\n\n*/\nvoid THTensor_(geqrf)(THTensor *ra_, THTensor *rtau_, THTensor *a)\n{\n  if (a == NULL) ra_ = a;\n  THArgCheck(a->nDimension == 2, 1, \"A should be 2 dimensional\");\n\n  THTensor *ra__ = NULL;\n\n  /* Prepare the input for LAPACK, making a copy if necessary. */\n  ra__ = THTensor_(cloneColumnMajor)(ra_, a);\n\n  int m = ra__->size[0];\n  int n = ra__->size[1];\n  int k = (m < n ? m : n);\n  int lda = m;\n  THTensor_(resize1d)(rtau_, k);\n\n  /* Dry-run to query the suggested size of the workspace. */\n  int info = 0;\n  real wkopt = 0;\n  THLapack_(geqrf)(m, n, THTensor_(data)(ra__), lda,\n                   THTensor_(data)(rtau_),\n                   &wkopt, -1, &info);\n\n  /* Allocate the workspace and call LAPACK to do the real work. */\n  int lwork = (int)wkopt;\n  THTensor *work = THTensor_(newWithSize1d)(lwork);\n  THLapack_(geqrf)(m, n, THTensor_(data)(ra__), lda,\n                   THTensor_(data)(rtau_),\n                   THTensor_(data)(work), lwork, &info);\n\n  THLapackCheckWithCleanup(\"Lapack Error %s : unknown Lapack error. info = %i\",\n                           THCleanup(\n                               THTensor_(free)(ra__);\n                               THTensor_(free)(work);),\n                           \"geqrf\", info,\"\");\n\n  THTensor_(freeCopyTo)(ra__, ra_);\n  THTensor_(free)(work);\n}\n\n/*\n  The orgqr function allows reconstruction of a matrix Q with orthogonal\n  columns, from a sequence of elementary reflectors, such as is produced by the\n  geqrf function.\n\n  Args:\n  * `ra_` - result Tensor, which will contain the matrix Q.\n  * `a`   - input Tensor, which should be a matrix with the directions of the\n            elementary reflectors below the diagonal. If NULL, `ra_` is used as\n            input.\n  * `tau` - input Tensor, containing the magnitudes of the elementary\n            reflectors.\n\n  For further details, please see the LAPACK documentation.\n\n*/\nvoid THTensor_(orgqr)(THTensor *ra_, THTensor *a, THTensor *tau)\n{\n  if (a == NULL) a = ra_;\n  THArgCheck(a->nDimension == 2, 1, \"A should be 2 dimensional\");\n\n  THTensor *ra__ = NULL;\n  ra__ = THTensor_(cloneColumnMajor)(ra_, a);\n\n  int m = ra__->size[0];\n  int n = ra__->size[1];\n  int k = tau->size[0];\n  int lda = m;\n\n  /* Dry-run to query the suggested size of the workspace. */\n  int info = 0;\n  real wkopt = 0;\n  THLapack_(orgqr)(m, k, k, THTensor_(data)(ra__), lda,\n                   THTensor_(data)(tau),\n                   &wkopt, -1, &info);\n\n  /* Allocate the workspace and call LAPACK to do the real work. */\n  int lwork = (int)wkopt;\n  THTensor *work = THTensor_(newWithSize1d)(lwork);\n  THLapack_(orgqr)(m, k, k, THTensor_(data)(ra__), lda,\n                   THTensor_(data)(tau),\n                   THTensor_(data)(work), lwork, &info);\n\n  THLapackCheckWithCleanup(\" Lapack Error %s : unknown Lapack error. info = %i\",\n                           THCleanup(\n                               THTensor_(free)(ra__);\n                               THTensor_(free)(work);),\n                           \"orgqr\", info,\"\");\n  THTensor_(freeCopyTo)(ra__, ra_);\n  THTensor_(free)(work);\n}\n\n/*\n  The ormqr function multiplies Q with another matrix from a sequence of\n  elementary reflectors, such as is produced by the geqrf function.\n\n  Args:\n  * `ra_`   - result Tensor, which will contain the matrix Q' c.\n  * `a`     - input Tensor, which should be a matrix with the directions of the\n              elementary reflectors below the diagonal. If NULL, `ra_` is used as\n              input.\n  * `tau`   - input Tensor, containing the magnitudes of the elementary\n              reflectors.\n  * `c`     - input Tensor, containing the matrix to be multiplied.\n  * `side`  - char, determining whether c is left- or right-multiplied with Q.\n  * `trans` - char, determining whether to transpose Q before multiplying.\n\n  For further details, please see the LAPACK documentation.\n\n*/\nvoid THTensor_(ormqr)(THTensor *ra_, THTensor *a, THTensor *tau, THTensor *c, const char *side, const char *trans)\n{\n  if (a == NULL) a = ra_;\n  THArgCheck(a->nDimension == 2, 1, \"A should be 2 dimensional\");\n\n  THTensor *ra__ = NULL;\n  ra__ = THTensor_(cloneColumnMajor)(ra_, c);\n\n  int m = c->size[0];\n  int n = c->size[1];\n  int k = tau->size[0];\n  int lda;\n  if (*side == 'L')\n  {\n    lda = m;\n  }\n  else\n  {\n    lda = n;\n  }\n  int ldc = m;\n\n  /* Dry-run to query the suggested size of the workspace. */\n  int info = 0;\n  real wkopt = 0;\n  THLapack_(ormqr)(side[0], trans[0], m, n, k, THTensor_(data)(a), lda,\n                   THTensor_(data)(tau), THTensor_(data)(ra__), ldc,\n                   &wkopt, -1, &info);\n\n  /* Allocate the workspace and call LAPACK to do the real work. */\n  int lwork = (int)wkopt;\n  THTensor *work = THTensor_(newWithSize1d)(lwork);\n  THLapack_(ormqr)(side[0], trans[0], m, n, k, THTensor_(data)(a), lda,\n                   THTensor_(data)(tau), THTensor_(data)(ra__), ldc,\n                   THTensor_(data)(work), lwork, &info);\n\n  THLapackCheckWithCleanup(\" Lapack Error %s : unknown Lapack error. info = %i\",\n                           THCleanup(\n                               THTensor_(free)(ra__);\n                               THTensor_(free)(work);),\n                           \"ormqr\", info,\"\");\n  THTensor_(freeCopyTo)(ra__, ra_);\n  THTensor_(free)(work);\n}\n\nvoid THTensor_(btrifact)(THTensor *ra_, THIntTensor *rpivots_, THIntTensor *rinfo_, int pivot, THTensor *a)\n{\n  THArgCheck(THTensor_(nDimension)(a) == 3, 1, \"expected 3D tensor, got %dD\", THTensor_(nDimension)(a));\n  if (!pivot) {\n    THError(\"btrifact without pivoting is not implemented on the CPU\");\n  }\n\n  if (ra_ != a) {\n    THTensor_(resizeAs)(ra_, a);\n    THTensor_(copy)(ra_, a);\n  }\n\n  int m = a->size[1];\n  int n = a->size[2];\n  if (m != n) {\n    THError(\"btrifact is only implemented for square matrices\");\n  }\n  long num_batches = THTensor_(size)(a, 0);\n  THTensor *ra__;\n  int lda;\n\n  if (ra_->stride[1] == 1) {\n    // column ordered, what BLAS wants\n    lda = ra_->stride[2];\n    ra__ = ra_;\n  } else {\n    // not column ordered, need to make it such (requires copy)\n    THTensor *transp_r_ = THTensor_(newTranspose)(ra_, 1, 2);\n    ra__ = THTensor_(newClone)(transp_r_);\n    THTensor_(free)(transp_r_);\n    THTensor_(transpose)(ra__, NULL, 1, 2);\n    lda = ra__->stride[2];\n  }\n\n  THTensor *ai = THTensor_(new)();\n  THTensor *rai = THTensor_(new)();\n  THIntTensor *rpivoti = THIntTensor_new();\n\n  int info = 0;\n  int *info_ptr = &info;\n  if (rinfo_) {\n    THIntTensor_resize1d(rinfo_, num_batches);\n    info_ptr = THIntTensor_data(rinfo_);\n  }\n\n  THIntTensor_resize2d(rpivots_, num_batches, n);\n\n  long batch = 0;\n  for (; batch < num_batches; ++batch) {\n    THTensor_(select)(ai, a, 0, batch);\n    THTensor_(select)(rai, ra__, 0, batch);\n    THIntTensor_select(rpivoti, rpivots_, 0, batch);\n\n    THLapack_(getrf)(n, n, THTensor_(data)(rai), lda,\n                     THIntTensor_data(rpivoti), info_ptr);\n    if (rinfo_) {\n      info_ptr++;\n    } else if (info != 0) {\n      break;\n    }\n  }\n\n  THTensor_(free)(ai);\n  THTensor_(free)(rai);\n  THIntTensor_free(rpivoti);\n\n  if (ra__ != ra_) {\n    THTensor_(freeCopyTo)(ra__, ra_);\n  }\n\n  if (!rinfo_ && info != 0) {\n    THError(\"failed to factorize batch element %ld (info == %d)\", batch, info);\n  }\n}\n\nvoid THTensor_(btrisolve)(THTensor *rb_, THTensor *b, THTensor *atf, THIntTensor *pivots)\n{\n  THArgCheck(THTensor_(nDimension)(atf) == 3, 1, \"expected 3D tensor, got %dD\",\n             THTensor_(nDimension)(atf));\n  THArgCheck(THTensor_(nDimension)(b) == 3 ||\n             THTensor_(nDimension)(b) == 2, 4, \"expected 2D or 3D tensor\");\n  THArgCheck(THTensor_(size)(atf, 0) ==\n             THTensor_(size)(b, 0), 3, \"number of batches must be equal\");\n  THArgCheck(THTensor_(size)(atf, 1) ==\n             THTensor_(size)(atf, 2), 3, \"A matrices must be square\");\n  THArgCheck(THTensor_(size)(atf, 1) ==\n             THTensor_(size)(b, 1), 3, \"dimensions of A and b must be equal\");\n\n  if (rb_ != b) {\n    THTensor_(resizeAs)(rb_, b);\n    THTensor_(copy)(rb_, b);\n  }\n\n  long num_batches = atf->size[0];\n  long n = atf->size[1];\n  int nrhs = rb_->nDimension > 2 ? rb_->size[2] : 1;\n\n  int lda, ldb;\n  THTensor *atf_;\n  THTensor *rb__;\n\n  // correct ordering of A\n  if (atf->stride[1] == 1) {\n    // column ordered, what BLAS wants\n    lda = atf->stride[2];\n    atf_ = atf;\n  } else {\n    // not column ordered, need to make it such (requires copy)\n    // it would be nice if we could use the op(A) flags to automatically\n    // transpose A if needed, but this leads to unpredictable behavior if the\n    // user clones A_tf later with a different ordering\n    THTensor *transp_r_ = THTensor_(newTranspose)(atf, 1, 2);\n    atf_ = THTensor_(newClone)(transp_r_);\n    THTensor_(free)(transp_r_);\n    THTensor_(transpose)(atf_, NULL, 1, 2);\n    lda = atf_->stride[2];\n  }\n\n  // correct ordering of B\n  if (rb_->stride[1] == 1) {\n    // column ordered\n    if (rb_->nDimension == 2 || rb_->size[2] == 1) {\n      ldb = n;\n    } else {\n      ldb = rb_->stride[2];\n    }\n    rb__ = rb_;\n  } else {\n    // make column ordered\n    if (rb_->nDimension > 2) {\n      THTensor *transp_r_ = THTensor_(newTranspose)(rb_, 1, 2);\n      rb__ = THTensor_(newClone)(transp_r_);\n      THTensor_(free)(transp_r_);\n      THTensor_(transpose)(rb__, NULL, 1, 2);\n      ldb = rb__->stride[2];\n    } else {\n      rb__ = THTensor_(newClone)(rb_);\n      ldb = n;\n    }\n  }\n\n  THTensor *ai = THTensor_(new)();\n  THTensor *rbi = THTensor_(new)();\n  THIntTensor *pivoti = THIntTensor_new();\n\n  if (!THIntTensor_isContiguous(pivots)) {\n      THError(\"Error: rpivots_ is not contiguous.\");\n  }\n\n  for (long batch = 0; batch < num_batches; ++batch) {\n    THTensor_(select)(ai, atf_, 0, batch);\n    THTensor_(select)(rbi, rb__, 0, batch);\n    THIntTensor_select(pivoti, pivots, 0, batch);\n\n#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)\n    int info;\n    THLapack_(getrs)('N', n, nrhs, THTensor_(data)(ai), lda,\n                     THIntTensor_data(pivoti), THTensor_(data)(rbi),\n                     ldb, &info);\n    if (info != 0) {\n      THError(\"Error: Nonzero info.\");\n    }\n#else\n    THError(\"Unimplemented\");\n#endif\n  }\n\n  THTensor_(free)(ai);\n  THTensor_(free)(rbi);\n  THIntTensor_free(pivoti);\n\n  if (atf_ != atf) {\n    THTensor_(free)(atf_);\n  }\n\n  if (rb__ != rb_) {\n    THTensor_(freeCopyTo)(rb__, rb_);\n  }\n}\n\n#endif\n"
  },
  {
    "path": "lib/TH/generic/THTensorLapack.h",
    "content": "#ifndef TH_GENERIC_FILE\n#define TH_GENERIC_FILE \"generic/THTensorLapack.h\"\n#else\n\nTH_API void THTensor_(gesv)(THTensor *rb_, THTensor *ra_, THTensor *b_, THTensor *a_);\nTH_API void THTensor_(trtrs)(THTensor *rb_, THTensor *ra_, THTensor *b_, THTensor *a_, const char *uplo, const char *trans, const char *diag);\nTH_API void THTensor_(gels)(THTensor *rb_, THTensor *ra_, THTensor *b_, THTensor *a_);\nTH_API void THTensor_(syev)(THTensor *re_, THTensor *rv_, THTensor *a_, const char *jobz, const char *uplo);\nTH_API void THTensor_(geev)(THTensor *re_, THTensor *rv_, THTensor *a_, const char *jobvr);\nTH_API void THTensor_(gesvd)(THTensor *ru_, THTensor *rs_, THTensor *rv_, THTensor *a, const char *jobu);\nTH_API void THTensor_(gesvd2)(THTensor *ru_, THTensor *rs_, THTensor *rv_, THTensor *ra_, THTensor *a, const char *jobu);\nTH_API void THTensor_(getri)(THTensor *ra_, THTensor *a);\nTH_API void THTensor_(potrf)(THTensor *ra_, THTensor *a, const char *uplo);\nTH_API void THTensor_(potrs)(THTensor *rb_, THTensor *b_, THTensor *a_,  const char *uplo);\nTH_API void THTensor_(potri)(THTensor *ra_, THTensor *a, const char *uplo);\nTH_API void THTensor_(qr)(THTensor *rq_, THTensor *rr_, THTensor *a);\nTH_API void THTensor_(geqrf)(THTensor *ra_, THTensor *rtau_, THTensor *a);\nTH_API void THTensor_(orgqr)(THTensor *ra_, THTensor *a, THTensor *tau);\nTH_API void THTensor_(ormqr)(THTensor *ra_, THTensor *a, THTensor *tau, THTensor *c, const char *side, const char *trans);\nTH_API void THTensor_(pstrf)(THTensor *ra_, THIntTensor *rpiv_, THTensor*a, const char* uplo, real tol);\n\nTH_API void THTensor_(btrifact)(THTensor *ra_, THIntTensor *rpivots_, THIntTensor *rinfo_, int pivot, THTensor *a);\nTH_API void THTensor_(btrisolve)(THTensor *rb_, THTensor *b, THTensor *atf, THIntTensor *pivots);\n\n#endif\n"
  },
  {
    "path": "lib/TH/generic/THTensorMath.c",
    "content": "#ifndef TH_GENERIC_FILE\n#define TH_GENERIC_FILE \"generic/THTensorMath.c\"\n#else\n\n#ifndef NAN\n  #define NAN (nan(NULL))\n#endif\n\n#ifdef _OPENMP\n#include <omp.h>\n#endif\n\n#define TH_OMP_OVERHEAD_THRESHOLD 100000\n\n#ifdef _OPENMP\n\n#ifndef _WIN32\n#define PRAGMA(P) _Pragma(#P)\n#else\n#define PRAGMA(P) __pragma(P)\n#endif\n\n#define TH_TENSOR_APPLY_CONTIG(TYPE, TENSOR, CODE) \\\n{ \\\n  ptrdiff_t TH_TENSOR_size = THTensor_(nElement)(TENSOR); \\\n  PRAGMA(omp parallel if (TH_TENSOR_size > TH_OMP_OVERHEAD_THRESHOLD)) \\\n  { \\\n    size_t num_threads = omp_get_num_threads(); \\\n    size_t tid = omp_get_thread_num(); \\\n    ptrdiff_t TH_TENSOR_offset = tid * (TH_TENSOR_size / num_threads); \\\n    ptrdiff_t TH_TENSOR_end = tid == num_threads - 1 ? TH_TENSOR_size : \\\n      TH_TENSOR_offset + TH_TENSOR_size / num_threads; \\\n    ptrdiff_t TENSOR##_len = TH_TENSOR_end - TH_TENSOR_offset; \\\n    TYPE *TENSOR##_data = THTensor_(data)(TENSOR) + TH_TENSOR_offset; \\\n    CODE \\\n  } \\\n}\n#else\n#define TH_TENSOR_APPLY_CONTIG(TYPE, TENSOR, CODE) \\\n{ \\\n  TYPE *TENSOR##_data = THTensor_(data)(TENSOR); \\\n  ptrdiff_t TENSOR##_len = THTensor_(nElement)(TENSOR); \\\n  CODE \\\n}\n#endif\n\n#ifdef _OPENMP\n#define TH_TENSOR_APPLY2_CONTIG(TYPE1, TENSOR1, TYPE2, TENSOR2, CODE) \\\n{ \\\n  ptrdiff_t TH_TENSOR_size = THTensor_(nElement)(TENSOR1); \\\n  PRAGMA(omp parallel if (TH_TENSOR_size > TH_OMP_OVERHEAD_THRESHOLD)) \\\n  { \\\n    size_t num_threads = omp_get_num_threads(); \\\n    size_t tid = omp_get_thread_num(); \\\n    ptrdiff_t TH_TENSOR_offset = tid * (TH_TENSOR_size / num_threads); \\\n    ptrdiff_t TH_TENSOR_end = tid == num_threads - 1 ? TH_TENSOR_size : \\\n      TH_TENSOR_offset + TH_TENSOR_size / num_threads; \\\n    ptrdiff_t TENSOR1##_len = TH_TENSOR_end - TH_TENSOR_offset; \\\n    TYPE1 *TENSOR1##_data = THTensor_(data)(TENSOR1) + TH_TENSOR_offset; \\\n    TYPE2 *TENSOR2##_data = THTensor_(data)(TENSOR2) + TH_TENSOR_offset; \\\n    CODE \\\n  } \\\n}\n#else\n#define TH_TENSOR_APPLY2_CONTIG(TYPE1, TENSOR1, TYPE2, TENSOR2, CODE) \\\n{ \\\n  TYPE1 *TENSOR1##_data = THTensor_(data)(TENSOR1); \\\n  TYPE2 *TENSOR2##_data = THTensor_(data)(TENSOR2); \\\n  ptrdiff_t TENSOR1##_len = THTensor_(nElement)(TENSOR1); \\\n  CODE \\\n}\n#endif\n\n#ifdef _OPENMP\n#define TH_TENSOR_APPLY3_CONTIG(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, CODE) \\\n{ \\\n  ptrdiff_t TH_TENSOR_size = THTensor_(nElement)(TENSOR1); \\\n  PRAGMA(omp parallel if (TH_TENSOR_size > TH_OMP_OVERHEAD_THRESHOLD)) \\\n  { \\\n    size_t num_threads = omp_get_num_threads(); \\\n    size_t tid = omp_get_thread_num(); \\\n    ptrdiff_t TH_TENSOR_offset = tid * (TH_TENSOR_size / num_threads); \\\n    ptrdiff_t TH_TENSOR_end = tid == num_threads - 1 ? TH_TENSOR_size : \\\n      TH_TENSOR_offset + TH_TENSOR_size / num_threads; \\\n    ptrdiff_t TENSOR1##_len = TH_TENSOR_end - TH_TENSOR_offset; \\\n    TYPE1 *TENSOR1##_data = THTensor_(data)(TENSOR1) + TH_TENSOR_offset; \\\n    TYPE2 *TENSOR2##_data = THTensor_(data)(TENSOR2) + TH_TENSOR_offset; \\\n    TYPE3 *TENSOR3##_data = THTensor_(data)(TENSOR3) + TH_TENSOR_offset; \\\n    CODE \\\n  } \\\n}\n#else\n#define TH_TENSOR_APPLY3_CONTIG(TYPE1, TENSOR1, TYPE2, TENSOR2, TYPE3, TENSOR3, CODE) \\\n{ \\\n  TYPE1 *TENSOR1##_data = THTensor_(data)(TENSOR1); \\\n  TYPE2 *TENSOR2##_data = THTensor_(data)(TENSOR2); \\\n  TYPE3 *TENSOR3##_data = THTensor_(data)(TENSOR3); \\\n  ptrdiff_t TENSOR1##_len = THTensor_(nElement)(TENSOR1); \\\n  CODE \\\n}\n#endif\n\nvoid THTensor_(fill)(THTensor *r_, real value)\n{\n  if (THTensor_(isContiguous)(r_) || THTensor_(isTransposed)(r_)) {\n    TH_TENSOR_APPLY_CONTIG(real, r_, THVector_(fill)(r__data, value, r__len););\n  } else {\n    TH_TENSOR_APPLY(real, r_,\n      if (r__stride == 1) {\n        THVector_(fill)(r__data, value, r__size);\n\tr__i = r__size;\n\tr__data += r__stride * r__size;\n\tbreak;\n      } else {\n        *r__data = value;\n      }\n      );\n  }\n}\n\nvoid THTensor_(zero)(THTensor *r_)\n{\n  THTensor_(fill)(r_, 0);\n}\n\nvoid THTensor_(maskedFill)(THTensor *tensor, THByteTensor *mask, real value)\n{\n  TH_TENSOR_APPLY2(real, tensor, unsigned char, mask,\n                   if (*mask_data > 1)\n                   {\n                     THFree(mask_counter);\n                     THFree(tensor_counter);\n                     THError(\"Mask tensor can take 0 and 1 values only\");\n                   }\n                   else if (*mask_data == 1)\n                   {\n                     *tensor_data = value;\n                   });\n}\n\nvoid THTensor_(maskedCopy)(THTensor *tensor, THByteTensor *mask, THTensor* src )\n{\n  THTensor *srct = THTensor_(newContiguous)(src);\n  real *src_data = THTensor_(data)(srct);\n  ptrdiff_t cntr = 0;\n  ptrdiff_t nelem = THTensor_(nElement)(srct);\n  if (THTensor_(nElement)(tensor) != THByteTensor_nElement(mask))\n  {\n    THTensor_(free)(srct);\n    THError(\"Number of elements of destination tensor != Number of elements in mask\");\n  }\n  TH_TENSOR_APPLY2(real, tensor, unsigned char, mask,\n                   if (*mask_data > 1)\n                   {\n                     THTensor_(free)(srct);\n                     THFree(mask_counter);\n                     THFree(tensor_counter);\n                     THError(\"Mask tensor can take 0 and 1 values only\");\n                   }\n                   else if (*mask_data == 1)\n                   {\n                     if (cntr == nelem)\n                     {\n                       THTensor_(free)(srct);\n                       THFree(mask_counter);\n                       THFree(tensor_counter);\n                       THError(\"Number of elements of src < number of ones in mask\");\n                     }\n                     *tensor_data = *src_data;\n                     src_data++;\n                     cntr++;\n                   });\n  THTensor_(free)(srct);\n}\n\nvoid THTensor_(maskedSelect)(THTensor *tensor, THTensor *src, THByteTensor *mask)\n{\n  ptrdiff_t numel = THByteTensor_sumall(mask);\n  real *tensor_data;\n\n#ifdef DEBUG\n  THAssert(numel <= LONG_MAX);\n#endif\n  THTensor_(resize1d)(tensor,numel);\n  tensor_data = THTensor_(data)(tensor);\n  TH_TENSOR_APPLY2(real, src, unsigned char, mask,\n                   if (*mask_data > 1)\n                   {\n                     THFree(mask_counter);\n                     THFree(src_counter);\n                     THError(\"Mask tensor can take 0 and 1 values only\");\n                   }\n                   else if (*mask_data == 1)\n                   {\n                     *tensor_data = *src_data;\n                     tensor_data++;\n                   });\n}\n\n// Finds non-zero elements of a tensor and returns their subscripts\nvoid THTensor_(nonzero)(THLongTensor *subscript, THTensor *tensor)\n{\n  ptrdiff_t numel = 0;\n  long *subscript_data;\n  long i = 0;\n  long dim;\n  long div = 1;\n#ifdef TH_REAL_IS_HALF\n#define IS_NONZERO(val) ((val.x & 0x7fff) != 0)\n#else\n#define IS_NONZERO(val) ((val)!=0)\n#endif\n\n  /* First Pass to determine size of subscripts */\n  TH_TENSOR_APPLY(real, tensor,\n                  if IS_NONZERO(*tensor_data) {\n                    ++numel;\n                  });\n#ifdef DEBUG\n  THAssert(numel <= LONG_MAX);\n#endif\n  THLongTensor_resize2d(subscript, numel, tensor->nDimension);\n\n  /* Second pass populates subscripts */\n  subscript_data = THLongTensor_data(subscript);\n  TH_TENSOR_APPLY(real, tensor,\n                  if IS_NONZERO(*tensor_data) {\n                    div = 1;\n\n                    for (dim = tensor->nDimension - 1; dim >= 0; dim--) {\n                      *(subscript_data + dim) = (i/div) % tensor->size[dim];\n                      div *= tensor->size[dim];\n                    }\n\n                    subscript_data += tensor->nDimension;\n                  }\n                  ++i;);\n}\n\nvoid THTensor_(indexSelect)(THTensor *tensor, THTensor *src, int dim, THLongTensor *index)\n{\n  ptrdiff_t i, numel;\n  THLongStorage *newSize;\n  THTensor *tSlice, *sSlice;\n  long *index_data;\n  real *tensor_data, *src_data;\n\n  THArgCheck(index->nDimension == 1, 3, \"Index is supposed to be a vector\");\n  THArgCheck(dim < src->nDimension, 4,\"Indexing dim %d is out of bounds of tensor\", dim + TH_INDEX_BASE);\n  THArgCheck(src->nDimension > 0,2,\"Source tensor is empty\");\n\n  numel = THLongTensor_nElement(index);\n\n  newSize = THLongStorage_newWithSize(src->nDimension);\n  THLongStorage_rawCopy(newSize,src->size);\n#ifdef DEBUG\n  THAssert(numel <= LONG_MAX);\n#endif\n  newSize->data[dim] = numel;\n  THTensor_(resize)(tensor,newSize,NULL);\n  THLongStorage_free(newSize);\n\n  index = THLongTensor_newContiguous(index);\n  index_data = THLongTensor_data(index);\n\n  if (dim == 0 && THTensor_(isContiguous)(src) && THTensor_(isContiguous)(tensor))\n  {\n    tensor_data = THTensor_(data)(tensor);\n    src_data = THTensor_(data)(src);\n    ptrdiff_t rowsize = THTensor_(nElement)(src) / src->size[0];\n\n    // check that the indices are within range\n    long max = src->size[0] - 1 + TH_INDEX_BASE;\n    for (i=0; i<numel; i++) {\n      if (index_data[i] < TH_INDEX_BASE || index_data[i] > max) {\n        THLongTensor_free(index);\n        THError(\"index out of range\");\n      }\n    }\n\n    if (src->nDimension == 1) {\n      #pragma omp parallel for if(numel > TH_OMP_OVERHEAD_THRESHOLD) private(i)\n      for (i=0; i<numel; i++)\n        tensor_data[i] = src_data[index_data[i] - TH_INDEX_BASE];\n    } else {\n      #pragma omp parallel for if(numel*rowsize > TH_OMP_OVERHEAD_THRESHOLD) private(i)\n      for (i=0; i<numel; i++)\n        memcpy(tensor_data + i*rowsize, src_data + (index_data[i] - TH_INDEX_BASE)*rowsize, rowsize*sizeof(real));\n    }\n  }\n  else if (src->nDimension == 1)\n  {\n    for (i=0; i<numel; i++)\n      THTensor_(set1d)(tensor,i,THTensor_(get1d)(src,index_data[i] - TH_INDEX_BASE));\n  }\n  else\n  {\n    for (i=0; i<numel; i++)\n    {\n      tSlice = THTensor_(new)();\n      sSlice = THTensor_(new)();\n      THTensor_(select)(tSlice, tensor, dim, i);\n      THTensor_(select)(sSlice, src, dim, index_data[i] - TH_INDEX_BASE);\n      THTensor_(copy)(tSlice, sSlice);\n      THTensor_(free)(tSlice);\n      THTensor_(free)(sSlice);\n    }\n  }\n\n  THLongTensor_free(index);\n}\n\nvoid THTensor_(indexCopy)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src)\n{\n  ptrdiff_t i, numel;\n  THTensor *tSlice, *sSlice;\n  long *index_data;\n\n  numel = THLongTensor_nElement(index);\n  THArgCheck(index->nDimension == 1, 3, \"Index is supposed to be a vector\");\n  THArgCheck(dim < src->nDimension, 4, \"Indexing dim %d is out of bounds of tensor\", dim + TH_INDEX_BASE);\n  THArgCheck(numel == src->size[dim],4,\"Number of indices should be equal to source:size(dim)\");\n\n  index = THLongTensor_newContiguous(index);\n  index_data = THLongTensor_data(index);\n\n  if (tensor->nDimension > 1 )\n  {\n    tSlice = THTensor_(new)();\n    sSlice = THTensor_(new)();\n\n    for (i=0; i<numel; i++)\n    {\n      THTensor_(select)(tSlice, tensor, dim, index_data[i] - TH_INDEX_BASE);\n      THTensor_(select)(sSlice, src, dim, i);\n      THTensor_(copy)(tSlice, sSlice);\n    }\n\n    THTensor_(free)(tSlice);\n    THTensor_(free)(sSlice);\n  }\n  else\n  {\n    for (i=0; i<numel; i++)\n    {\n      THTensor_(set1d)(tensor, index_data[i] - TH_INDEX_BASE, THTensor_(get1d)(src,i));\n    }\n  }\n  THLongTensor_free(index);\n}\n\nvoid THTensor_(indexAdd)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src)\n{\n  ptrdiff_t i, numel;\n  THTensor *tSlice, *sSlice;\n  long *index_data;\n\n  numel = THLongTensor_nElement(index);\n  THArgCheck(index->nDimension == 1, 3, \"Index is supposed to be a vector\");\n  THArgCheck(dim < src->nDimension, 4,\"Indexing dim %d is out of bounds of tensor\", dim + TH_INDEX_BASE);\n  THArgCheck(numel == src->size[dim],4,\"Number of indices should be equal to source:size(dim)\");\n\n  index = THLongTensor_newContiguous(index);\n  index_data = THLongTensor_data(index);\n\n  if (tensor->nDimension > 1)\n  {\n    tSlice = THTensor_(new)();\n    sSlice = THTensor_(new)();\n\n    for (i=0; i<numel; i++)\n    {\n      THTensor_(select)(tSlice, tensor, dim, index_data[i] - TH_INDEX_BASE);\n      THTensor_(select)(sSlice, src, dim, i);\n      THTensor_(cadd)(tSlice, tSlice, 1.0, sSlice);\n    }\n\n    THTensor_(free)(tSlice);\n    THTensor_(free)(sSlice);\n  }\n  else\n  {\n    for (i=0; i<numel; i++)\n    {\n      THTensor_(set1d)(tensor,\n              index_data[i] - TH_INDEX_BASE,\n              THTensor_(get1d)(src,i) + THTensor_(get1d)(tensor,index_data[i] - TH_INDEX_BASE));\n    }\n  }\n  THLongTensor_free(index);\n}\n\nvoid THTensor_(indexFill)(THTensor *tensor, int dim, THLongTensor *index, real val)\n{\n  ptrdiff_t i, numel;\n  THTensor *tSlice;\n  long *index_data;\n\n  numel = THLongTensor_nElement(index);\n  THArgCheck(index->nDimension == 1, 3, \"Index is supposed to be a vector\");\n  THArgCheck(dim < tensor->nDimension, 4,\"Indexing dim %d is out of bounds of tensor\", dim + TH_INDEX_BASE);\n\n  index = THLongTensor_newContiguous(index);\n  index_data = THLongTensor_data(index);\n\n  for (i=0; i<numel; i++)\n  {\n    if (tensor->nDimension > 1)\n    {\n      tSlice = THTensor_(new)();\n      THTensor_(select)(tSlice, tensor,dim,index_data[i] - TH_INDEX_BASE);\n      THTensor_(fill)(tSlice, val);\n      THTensor_(free)(tSlice);\n    }\n    else\n    {\n      THTensor_(set1d)(tensor, index_data[i] - TH_INDEX_BASE, val);\n    }\n  }\n  THLongTensor_free(index);\n}\n\nvoid THTensor_(gather)(THTensor *tensor, THTensor *src, int dim, THLongTensor *index)\n{\n  long elems_per_row, i, idx;\n\n  THArgCheck(THTensor_(nDimension)(src) == THTensor_(nDimension)(tensor), 2,\n             \"Input tensor must have same dimensions as output tensor\");\n  THArgCheck(dim < THTensor_(nDimension)(tensor), 3, \"Index dimension is out of bounds\");\n  THArgCheck(THLongTensor_nDimension(index) == THTensor_(nDimension)(src), 4,\n             \"Index tensor must have same dimensions as input tensor\");\n\n  elems_per_row = THLongTensor_size(index, dim);\n\n  TH_TENSOR_DIM_APPLY3(real, tensor, real, src, long, index, dim,\n                       for (i = 0; i < elems_per_row; ++i)\n                       {\n                         idx = *(index_data + i*index_stride);\n                         if (idx < TH_INDEX_BASE || idx >= src_size + TH_INDEX_BASE)\n                         {\n                           THFree(TH_TENSOR_DIM_APPLY_counter);\n                           THError(\"Invalid index in gather\");\n                         }\n                         *(tensor_data + i*tensor_stride) = src_data[(idx - TH_INDEX_BASE) * src_stride];\n                       })\n}\n\nvoid THTensor_(scatter)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src)\n{\n  long elems_per_row, i, idx;\n\n  THArgCheck(dim < THTensor_(nDimension)(tensor), 2, \"Index dimension is out of bounds\");\n  THArgCheck(THLongTensor_nDimension(index) == THTensor_(nDimension)(tensor), 3,\n             \"Index tensor must have same dimensions as output tensor\");\n  THArgCheck(THTensor_(nDimension)(src) == THTensor_(nDimension)(tensor), 4,\n             \"Input tensor must have same dimensions as output tensor\");\n\n  elems_per_row = THLongTensor_size(index, dim);\n\n  TH_TENSOR_DIM_APPLY3(real, tensor, real, src, long, index, dim,\n                       for (i = 0; i < elems_per_row; ++i)\n                       {\n                         idx = *(index_data + i*index_stride);\n                         if (idx < TH_INDEX_BASE || idx >= tensor_size + TH_INDEX_BASE)\n                         {\n                           THFree(TH_TENSOR_DIM_APPLY_counter);\n                           THError(\"Invalid index in scatter\");\n                         }\n                         tensor_data[(idx - TH_INDEX_BASE) * tensor_stride] = *(src_data + i*src_stride);\n                       })\n}\n\nvoid THTensor_(scatterAdd)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src)\n{\n  long elems_per_row, i, idx;\n\n  THArgCheck(dim < THTensor_(nDimension)(tensor), 2, \"Index dimension is out of bounds\");\n  THArgCheck(THLongTensor_nDimension(index) == THTensor_(nDimension)(tensor), 3,\n             \"Index tensor must have same dimensions as output tensor\");\n  THArgCheck(THTensor_(nDimension)(src) == THTensor_(nDimension)(tensor), 4,\n             \"Input tensor must have same dimensions as output tensor\");\n\n  elems_per_row = THLongTensor_size(index, dim);\n\n  TH_TENSOR_DIM_APPLY3(real, tensor, real, src, long, index, dim,\n                       for (i = 0; i < elems_per_row; ++i)\n                       {\n                         idx = *(index_data + i*index_stride);\n                         if (idx < TH_INDEX_BASE || idx >= tensor_size + TH_INDEX_BASE)\n                         {\n                           THFree(TH_TENSOR_DIM_APPLY_counter);\n                           THError(\"Invalid index in scatterAdd\");\n                         }\n                         tensor_data[(idx - TH_INDEX_BASE) * tensor_stride] += *(src_data + i*src_stride);\n                       })\n}\n\nvoid THTensor_(scatterFill)(THTensor *tensor, int dim, THLongTensor *index, real val)\n{\n  long elems_per_row, i, idx;\n\n  THArgCheck(dim < THTensor_(nDimension)(tensor), 2, \"Index dimension is out of bounds\");\n  THArgCheck(THLongTensor_nDimension(index) == THTensor_(nDimension)(tensor), 3,\n             \"Index tensor must have same dimensions as output tensor\");\n\n  elems_per_row = THLongTensor_size(index, dim);\n\n  TH_TENSOR_DIM_APPLY2(real, tensor, long, index, dim,\n                       for (i = 0; i < elems_per_row; ++i)\n                       {\n                         idx = *(index_data + i*index_stride);\n                         if (idx < TH_INDEX_BASE || idx >= tensor_size + TH_INDEX_BASE)\n                         {\n                           THFree(TH_TENSOR_DIM_APPLY_counter);\n                           THError(\"Invalid index in scatter\");\n                         }\n                         tensor_data[(idx - TH_INDEX_BASE) * tensor_stride] = val;\n                       })\n}\n\naccreal THTensor_(dot)(THTensor *tensor, THTensor *src)\n{\n  accreal sum = 0;\n  /* we use a trick here. careful with that. */\n  TH_TENSOR_APPLY2(real, tensor, real, src,\n                   long sz = (tensor_size-tensor_i < src_size-src_i ? tensor_size-tensor_i : src_size-src_i);\n                   sum += THBlas_(dot)(sz, src_data, src_stride, tensor_data, tensor_stride);\n                   tensor_i += sz;\n                   src_i += sz;\n                   tensor_data += sz*tensor_stride;\n                   src_data += sz*src_stride;\n                   break;);\n  return sum;\n}\n\n\n#undef th_isnan\n#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)\n#define th_isnan(val) \\\n(isnan(val))\n#else\n#define th_isnan(val) (0)\n#endif\n\n#undef th_isnan_break\n#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)\n#define th_isnan_break(val) \\\nif (isnan(val)) break;\n#else\n#define th_isnan_break(val)\n#endif\n\nreal THTensor_(minall)(THTensor *tensor)\n{\n  real theMin;\n  real value;\n\n  THArgCheck(tensor->nDimension > 0, 1, \"tensor must have one dimension\");\n  theMin = THTensor_(data)(tensor)[0];\n  TH_TENSOR_APPLY(real, tensor,\n                  value = *tensor_data;\n                  /* This is not the same as value<theMin in the case of NaNs */\n                  if(!(value >= theMin))\n                  {\n                    theMin = value;\n                    th_isnan_break(value)\n                  });\n  return theMin;\n}\n\nreal THTensor_(maxall)(THTensor *tensor)\n{\n  real theMax;\n  real value;\n\n  THArgCheck(tensor->nDimension > 0, 1, \"tensor must have one dimension\");\n  theMax = THTensor_(data)(tensor)[0];\n  TH_TENSOR_APPLY(real, tensor,\n                  value = *tensor_data;\n                  /* This is not the same as value>theMax in the case of NaNs */\n                  if(!(value <= theMax))\n                  {\n                    theMax = value;\n                    th_isnan_break(value)\n                  });\n  return theMax;\n}\n\nstatic void THTensor_(quickselectnoidx)(real *arr, long k, long elements, long stride);\n\nreal THTensor_(medianall)(THTensor *tensor)\n{\n  THArgCheck(tensor->nDimension > 0, 1, \"tensor must have one dimension\");\n\n  real theMedian;\n  ptrdiff_t numel;\n  long k;\n  THTensor *temp_;\n  real *temp__data;\n\n  numel = THTensor_(nElement)(tensor);\n  k = (numel-1) >> 1;\n\n  temp_ = THTensor_(newClone)(tensor);\n  temp__data = THTensor_(data)(temp_);\n\n  THTensor_(quickselectnoidx)(temp__data, k, numel, 1);\n\n  theMedian = temp__data[k];\n\n  THTensor_(free)(temp_);\n\n  return theMedian;\n}\n\naccreal THTensor_(sumall)(THTensor *tensor)\n{\n  accreal sum = 0;\n  TH_TENSOR_APPLY(real, tensor, sum += *tensor_data;);\n  return sum;\n}\n\naccreal THTensor_(prodall)(THTensor *tensor)\n{\n  accreal prod = 1;\n  TH_TENSOR_APPLY(real, tensor, prod *= *tensor_data;);\n  return prod;\n}\n\nvoid THTensor_(add)(THTensor *r_, THTensor *t, real value)\n{\n  THTensor_(resizeAs)(r_, t);\n  if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {\n    TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(adds)(r__data, t_data, value, r__len););\n  } else {\n    TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data + value;);\n  }\n}\n\nvoid THTensor_(sub)(THTensor *r_, THTensor *t, real value)\n{\n  THTensor_(add)(r_, t, -value);\n}\n\nvoid THTensor_(mul)(THTensor *r_, THTensor *t, real value)\n{\n  THTensor_(resizeAs)(r_, t);\n  if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {\n    TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(muls)(r__data, t_data, value, r__len););\n  } else {\n    TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data * value;);\n  }\n}\n\nvoid THTensor_(div)(THTensor *r_, THTensor *t, real value)\n{\n  THTensor_(resizeAs)(r_, t);\n  if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {\n    TH_TENSOR_APPLY2_CONTIG(real, r_, real, t, THVector_(divs)(r__data, t_data, value, r__len););\n  } else {\n    TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data / value;);\n  }\n}\n\nvoid THTensor_(lshift)(THTensor *r_, THTensor *t, real value)\n{\n#if defined(TH_REAL_IS_FLOAT)\n  return THTensor_(mul)(r_, t, powf(2, value));\n#elif defined(TH_REAL_IS_DOUBLE)\n  return THTensor_(mul)(r_, t, pow(2, value));\n#elif defined(TH_REAL_IS_HALF)\n  return THError(\"lshift is not supported for torch.HalfTensor\");\n#else\n  THTensor_(resizeAs)(r_, t);\n  if (THTensor_(isContiguous)(r_) &&\n      THTensor_(isContiguous)(t) &&\n      THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {\n      real *tp = THTensor_(data)(t);\n      real *rp = THTensor_(data)(r_);\n      long sz = THTensor_(nElement)(t);\n      long i;\n      #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD * 100) private(i)\n      for (i=0; i<sz; i++) {\n#if defined(TH_REAL_IS_BYTE)\n          rp[i] = ((real) tp[i]) << value;\n#else\n          rp[i] = ((unsigned real) tp[i]) << value;\n#endif\n      }\n  } else {\n#if defined(TH_REAL_IS_BYTE)\n      TH_TENSOR_APPLY2(real, r_, real, t, *r__data = (((real) *t_data) << value););\n#else\n      TH_TENSOR_APPLY2(real, r_, real, t, *r__data = (((unsigned real) *t_data) << value););\n#endif\n  }\n#endif\n}\n\nvoid THTensor_(rshift)(THTensor *r_, THTensor *t, real value)\n{\n#if defined(TH_REAL_IS_FLOAT)\n  return THTensor_(div)(r_, t, powf(2, value));\n#elif defined(TH_REAL_IS_DOUBLE)\n  return THTensor_(div)(r_, t, pow(2, value));\n#elif defined(TH_REAL_IS_HALF)\n  return THError(\"rshift is not supported for torch.HalfTensor\");\n#else\n  THTensor_(resizeAs)(r_, t);\n  if (THTensor_(isContiguous)(r_) &&\n      THTensor_(isContiguous)(t) &&\n      THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {\n      real *tp = THTensor_(data)(t);\n      real *rp = THTensor_(data)(r_);\n      long sz = THTensor_(nElement)(t);\n      long i;\n      #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD * 100) private(i)\n      for (i=0; i<sz; i++) {\n#if defined(TH_REAL_IS_BYTE)\n          rp[i] = ((real) tp[i]) >> value;\n#else\n          rp[i] = ((unsigned real) tp[i]) >> value;\n#endif\n      }\n  } else {\n#if defined(TH_REAL_IS_BYTE)\n      TH_TENSOR_APPLY2(real, r_, real, t, *r__data = (((real) *t_data) >> value););\n#else\n      TH_TENSOR_APPLY2(real, r_, real, t, *r__data = (((unsigned real) *t_data) >> value););\n#endif\n  }\n#endif\n}\n\nvoid THTensor_(fmod)(THTensor *r_, THTensor *t, real value)\n{\n  THTensor_(resizeAs)(r_, t);\n  if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {\n\n      real *tp = THTensor_(data)(t);\n      real *rp = THTensor_(data)(r_);\n      ptrdiff_t sz = THTensor_(nElement)(t);\n      ptrdiff_t i;\n      #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i)\n      for (i=0; i<sz; i++) {\n#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)\n          rp[i] = fmod(tp[i], value);\n#else\n          rp[i] = tp[i] % value;\n#endif\n      }\n  } else {\n#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)\n      TH_TENSOR_APPLY2(real, r_, real, t, *r__data = fmod(*t_data, value););\n#else\n      TH_TENSOR_APPLY2(real, r_, real, t, *r__data = (*t_data % value););\n#endif\n  }\n}\n\nvoid THTensor_(remainder)(THTensor *r_, THTensor *t, real value)\n{\n  THTensor_(resizeAs)(r_, t);\n  if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {\n      real *tp = THTensor_(data)(t);\n      real *rp = THTensor_(data)(r_);\n      ptrdiff_t sz = THTensor_(nElement)(t);\n      ptrdiff_t i;\n      #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i)\n      for (i=0; i<sz; i++) {\n#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)\n          rp[i] = (value == 0)? NAN : tp[i] - value * floor(tp[i] / value);\n#else\n          // There is no NAN for integers\n          rp[i] = tp[i] % value;\n          if (rp[i] * value < 0)\n            rp[i] += value;\n#endif\n      }\n  } else {\n#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)\n      TH_TENSOR_APPLY2(real, r_, real, t, *r__data = (value == 0)? NAN : *t_data - value * floor(*t_data / value););\n#else\n       // There is no NAN for integers\n      TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data % value;\n                                          if (*r__data * value < 0) *r__data += value;);\n#endif\n  }\n}\n\nvoid THTensor_(bitand)(THTensor *r_, THTensor *t, real value)\n{\n#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_HALF)\n  return THError(\"bitand is only supported for integer type tensors\");\n#else\n  THTensor_(resizeAs)(r_, t);\n  if (THTensor_(isContiguous)(r_) &&\n      THTensor_(isContiguous)(t) &&\n      THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {\n      real *tp = THTensor_(data)(t);\n      real *rp = THTensor_(data)(r_);\n      long sz = THTensor_(nElement)(t);\n      long i;\n      #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD * 100) private(i)\n      for (i=0; i<sz; i++) {\n          rp[i] = tp[i] & value;\n      }\n  } else {\n      TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data & value;);\n  }\n#endif\n}\n\nvoid THTensor_(bitor)(THTensor *r_, THTensor *t, real value)\n{\n#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_HALF)\n  return THError(\"bitor is only supported for integer type tensors\");\n#else\n  THTensor_(resizeAs)(r_, t);\n  if (THTensor_(isContiguous)(r_) &&\n      THTensor_(isContiguous)(t) &&\n      THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {\n      real *tp = THTensor_(data)(t);\n      real *rp = THTensor_(data)(r_);\n      long sz = THTensor_(nElement)(t);\n      long i;\n      #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD * 100) private(i)\n      for (i=0; i<sz; i++) {\n          rp[i] = tp[i] | value;\n      }\n  } else {\n      TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data | value;);\n  }\n#endif\n}\n\nvoid THTensor_(bitxor)(THTensor *r_, THTensor *t, real value)\n{\n#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_HALF)\n  return THError(\"bitxor is only supported for integer type tensors\");\n#else\n  THTensor_(resizeAs)(r_, t);\n  if (THTensor_(isContiguous)(r_) &&\n      THTensor_(isContiguous)(t) &&\n      THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {\n      real *tp = THTensor_(data)(t);\n      real *rp = THTensor_(data)(r_);\n      long sz = THTensor_(nElement)(t);\n      long i;\n      #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD * 100) private(i)\n      for (i=0; i<sz; i++) {\n          rp[i] = tp[i] ^ value;\n      }\n  } else {\n      TH_TENSOR_APPLY2(real, r_, real, t, *r__data = *t_data ^ value;);\n  }\n#endif\n}\n\nvoid THTensor_(clamp)(THTensor *r_, THTensor *t, real min_value, real max_value)\n{\n  THTensor_(resizeAs)(r_, t);\n  if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {\n    real *tp = THTensor_(data)(t);\n    real *rp = THTensor_(data)(r_);\n    /* real t_val; */\n    ptrdiff_t sz = THTensor_(nElement)(t);\n    ptrdiff_t i;\n    #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i)\n    for (i=0; i<sz; i++)\n      rp[i] = (tp[i] < min_value) ? min_value : (tp[i] > max_value ? max_value : tp[i]);\n  } else {\n    TH_TENSOR_APPLY2(real, r_, real, t, *r__data = (*t_data < min_value) ? min_value : (*t_data > max_value ? max_value : *t_data););\n  }\n}\n\nvoid THTensor_(cadd)(THTensor *r_, THTensor *t, real value, THTensor *src)\n{\n  THTensor_(resizeAs)(r_, t);\n  if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(isContiguous)(src) && THTensor_(nElement)(r_) == THTensor_(nElement)(src)) {\n    if(r_ == t) {\n      THBlas_(axpy)(THTensor_(nElement)(t), value, THTensor_(data)(src), 1, THTensor_(data)(r_), 1);\n    } else {\n      TH_TENSOR_APPLY3_CONTIG(real, r_, real, t, real, src, THVector_(cadd)(r__data, t_data, src_data, value, r__len););\n    }\n  } else {\n    TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data + value * *src_data;);\n  }\n}\n\nvoid THTensor_(csub)(THTensor *r_, THTensor *t, real value,THTensor *src)\n{\n  THTensor_(cadd)(r_, t, -value, src);\n}\n\nvoid THTensor_(cmul)(THTensor *r_, THTensor *t, THTensor *src)\n{\n  THTensor_(resizeAs)(r_, t);\n  if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(isContiguous)(src) && THTensor_(nElement)(r_) == THTensor_(nElement)(src)) {\n    TH_TENSOR_APPLY3_CONTIG(real, r_, real, t, real, src, THVector_(cmul)(r__data, t_data, src_data, r__len););\n  } else {\n    TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data * *src_data;);\n  }\n}\n\nvoid THTensor_(cpow)(THTensor *r_, THTensor *t, THTensor *src)\n{\n  THTensor_(resizeAs)(r_, t);\n  if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(isContiguous)(src) && THTensor_(nElement)(r_) == THTensor_(nElement)(src)) {\n    real *tp = THTensor_(data)(t);\n    real *sp = THTensor_(data)(src);\n    real *rp = THTensor_(data)(r_);\n    ptrdiff_t sz = THTensor_(nElement)(t);\n    ptrdiff_t i;\n    #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i)\n    for (i=0; i<sz; i++)\n      rp[i] = pow(tp[i], sp[i]);\n  } else {\n    TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = pow(*t_data, *src_data););\n  }\n}\n\nvoid THTensor_(cdiv)(THTensor *r_, THTensor *t, THTensor *src)\n{\n  THTensor_(resizeAs)(r_, t);\n  if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(isContiguous)(src) && THTensor_(nElement)(r_) == THTensor_(nElement)(src)) {\n    TH_TENSOR_APPLY3_CONTIG(real, r_, real, t, real, src, THVector_(cdiv)(r__data, t_data, src_data, r__len););\n  } else {\n    TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data / *src_data;);\n  }\n}\n\nvoid THTensor_(clshift)(THTensor *r_, THTensor *t, THTensor *src)\n{\n#if defined(TH_REAL_IS_HALF)\n  return THError(\"clshift is not supported for torch.HalfTensor\");\n#endif\n  THTensor_(resizeAs)(r_, t);\n  if (THTensor_(isContiguous)(r_) &&\n      THTensor_(isContiguous)(t) &&\n      THTensor_(isContiguous)(src) &&\n      THTensor_(nElement)(r_) == THTensor_(nElement)(src)) {\n      real *tp = THTensor_(data)(t);\n      real *sp = THTensor_(data)(src);\n      real *rp = THTensor_(data)(r_);\n      ptrdiff_t sz = THTensor_(nElement)(t);\n      ptrdiff_t i;\n      #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i)\n    for (i=0; i<sz; i++) {\n#if defined(TH_REAL_IS_FLOAT)\n      rp[i] = tp[i] * powf(2, sp[i]);\n#elif defined(TH_REAL_IS_DOUBLE)\n      rp[i] = tp[i] * pow(2, sp[i]);\n#elif defined(TH_REAL_IS_BYTE)\n      rp[i] = ((real) tp[i]) << sp[i];\n#else\n      rp[i] = ((unsigned real) tp[i]) << sp[i];\n#endif\n    }\n  } else {\n#if defined(TH_REAL_IS_FLOAT)\n      TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data * powf(2, *src_data););\n#elif defined(TH_REAL_IS_DOUBLE)\n      TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data * pow(2, *src_data););\n#elif defined(TH_REAL_IS_BYTE)\n      TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = ((real)*t_data) << *src_data;);\n#else\n      TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = ((unsigned real)*t_data) << *src_data;);\n#endif\n  }\n}\n\nvoid THTensor_(crshift)(THTensor *r_, THTensor *t, THTensor *src)\n{\n#if defined(TH_REAL_IS_HALF)\n  return THError(\"crshift is not supported for torch.HalfTensor\");\n#endif\n  THTensor_(resizeAs)(r_, t);\n  if (THTensor_(isContiguous)(r_) &&\n      THTensor_(isContiguous)(t) &&\n      THTensor_(isContiguous)(src) &&\n      THTensor_(nElement)(r_) == THTensor_(nElement)(src)) {\n      real *tp = THTensor_(data)(t);\n      real *sp = THTensor_(data)(src);\n      real *rp = THTensor_(data)(r_);\n      ptrdiff_t sz = THTensor_(nElement)(t);\n      ptrdiff_t i;\n      #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i)\n    for (i=0; i<sz; i++) {\n#if defined(TH_REAL_IS_FLOAT)\n      rp[i] = tp[i] / powf(2, sp[i]);\n#elif defined(TH_REAL_IS_DOUBLE)\n      rp[i] = tp[i] / pow(2, sp[i]);\n#elif defined(TH_REAL_IS_BYTE)\n      rp[i] = ((real) tp[i]) >> sp[i];\n#else\n      rp[i] = ((unsigned real) tp[i]) >> sp[i];\n#endif\n    }\n  } else {\n#if defined(TH_REAL_IS_FLOAT)\n      TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data / powf(2, *src_data););\n#elif defined(TH_REAL_IS_DOUBLE)\n      TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data / pow(2, *src_data););\n#elif defined(TH_REAL_IS_BYTE)\n      TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = ((real)*t_data) >> *src_data;);\n#else\n      TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = ((unsigned real)*t_data) >> *src_data;);\n#endif\n  }\n}\n\nvoid THTensor_(cfmod)(THTensor *r_, THTensor *t, THTensor *src)\n{\n  THTensor_(resizeAs)(r_, t);\n  if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(isContiguous)(src) && THTensor_(nElement)(r_) == THTensor_(nElement)(src)) {\n      real *tp = THTensor_(data)(t);\n      real *sp = THTensor_(data)(src);\n      real *rp = THTensor_(data)(r_);\n      ptrdiff_t sz = THTensor_(nElement)(t);\n      ptrdiff_t i;\n      #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i)\n      for (i=0; i<sz; i++) {\n#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)\n          rp[i] = fmod(tp[i], sp[i]);\n#else\n          rp[i] = tp[i] % sp[i];\n#endif\n      }\n  } else {\n#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)\n      TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = fmod(*t_data, *src_data););\n#else\n      TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = (*t_data % *src_data););\n#endif\n\n  }\n}\n\nvoid THTensor_(cremainder)(THTensor *r_, THTensor *t, THTensor *src)\n{\n  THTensor_(resizeAs)(r_, t);\n  if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(isContiguous)(src) && THTensor_(nElement)(r_) == THTensor_(nElement)(src)) {\n      real *tp = THTensor_(data)(t);\n      real *sp = THTensor_(data)(src);\n      real *rp = THTensor_(data)(r_);\n      ptrdiff_t sz = THTensor_(nElement)(t);\n      ptrdiff_t i;\n      #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i)\n      for (i=0; i<sz; i++) {\n#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)\n          rp[i] = (sp[i] == 0)? NAN : tp[i] - sp[i] * floor(tp[i] / sp[i]);\n#else\n          // There is no NAN for integers\n          rp[i] = tp[i] % sp[i];\n          if (rp[i] * sp[i] < 0)\n            rp[i] += sp[i];\n#endif\n      }\n  } else {\n#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)\n      TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = (*src_data == 0)? NAN : *t_data - *src_data * floor(*t_data / *src_data););\n#else\n      // There is no NAN for integers\n      TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data % *src_data;\n                                                     if (*r__data * *src_data < 0) *r__data += *src_data;);\n#endif\n\n  }\n}\n\nvoid THTensor_(cbitand)(THTensor *r_, THTensor *t, THTensor *src)\n{\n#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_HALF)\n  return THError(\"cbitand is only supported for integer type tensors\");\n#else\n  THTensor_(resizeAs)(r_, t);\n  if (THTensor_(isContiguous)(r_) &&\n      THTensor_(isContiguous)(t) &&\n      THTensor_(isContiguous)(src) &&\n      THTensor_(nElement)(r_) == THTensor_(nElement)(src)) {\n      real *tp = THTensor_(data)(t);\n      real *sp = THTensor_(data)(src);\n      real *rp = THTensor_(data)(r_);\n      ptrdiff_t sz = THTensor_(nElement)(t);\n      ptrdiff_t i;\n      #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i)\n    for (i=0; i<sz; i++) {\n      rp[i] = tp[i] & sp[i];\n    }\n  } else {\n      TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data & *src_data;);\n  }\n#endif\n}\n\nvoid THTensor_(cbitor)(THTensor *r_, THTensor *t, THTensor *src)\n{\n#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_HALF)\n  return THError(\"cbitor is only supported for integer type tensors\");\n#else\n  THTensor_(resizeAs)(r_, t);\n  if (THTensor_(isContiguous)(r_) &&\n      THTensor_(isContiguous)(t) &&\n      THTensor_(isContiguous)(src) &&\n      THTensor_(nElement)(r_) == THTensor_(nElement)(src)) {\n      real *tp = THTensor_(data)(t);\n      real *sp = THTensor_(data)(src);\n      real *rp = THTensor_(data)(r_);\n      ptrdiff_t sz = THTensor_(nElement)(t);\n      ptrdiff_t i;\n      #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i)\n    for (i=0; i<sz; i++) {\n      rp[i] = tp[i] | sp[i];\n    }\n  } else {\n      TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data | *src_data;);\n  }\n#endif\n}\n\nvoid THTensor_(cbitxor)(THTensor *r_, THTensor *t, THTensor *src)\n{\n#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_HALF)\n  return THError(\"cbitxor is only supported for integer type tensors\");\n#else\n  THTensor_(resizeAs)(r_, t);\n  if (THTensor_(isContiguous)(r_) &&\n      THTensor_(isContiguous)(t) &&\n      THTensor_(isContiguous)(src) &&\n      THTensor_(nElement)(r_) == THTensor_(nElement)(src)) {\n      real *tp = THTensor_(data)(t);\n      real *sp = THTensor_(data)(src);\n      real *rp = THTensor_(data)(r_);\n      ptrdiff_t sz = THTensor_(nElement)(t);\n      ptrdiff_t i;\n      #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i)\n    for (i=0; i<sz; i++) {\n      rp[i] = tp[i] ^ sp[i];\n    }\n  } else {\n      TH_TENSOR_APPLY3(real, r_, real, t, real, src, *r__data = *t_data ^ *src_data;);\n  }\n#endif\n}\n\nvoid THTensor_(tpow)(THTensor *r_, real value, THTensor *t)\n{\n  THTensor_(resizeAs)(r_, t);\n  if (THTensor_(isContiguous)(r_) && THTensor_(isContiguous)(t) && THTensor_(nElement)(r_) == THTensor_(nElement)(t)) {\n    real *tp = THTensor_(data)(t);\n    real *rp = THTensor_(data)(r_);\n    ptrdiff_t sz = THTensor_(nElement)(t);\n    ptrdiff_t i;\n    #pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i)\n    for (i=0; i<sz; i++)\n      rp[i] = pow(value, tp[i]);\n  } else {\n    TH_TENSOR_APPLY2(real, r_, real, t, *r__data = pow(value, *t_data););\n  }\n}\n\nvoid THTensor_(addcmul)(THTensor *r_, THTensor *t, real value, THTensor *src1, THTensor *src2)\n{\n  if(r_ != t)\n  {\n    THTensor_(resizeAs)(r_, t);\n    THTensor_(copy)(r_, t);\n  }\n\n  TH_TENSOR_APPLY3(real, r_, real, src1, real, src2, *r__data += value * *src1_data * *src2_data;);\n}\n\n\nvoid THTensor_(addcdiv)(THTensor *r_, THTensor *t, real value, THTensor *src1, THTensor *src2)\n{\n  if(r_ != t)\n  {\n    THTensor_(resizeAs)(r_, t);\n    THTensor_(copy)(r_, t);\n  }\n\n  TH_TENSOR_APPLY3(real, r_, real, src1, real, src2, *r__data += value * *src1_data / *src2_data;);\n}\n\nvoid THTensor_(addmv)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor *mat, THTensor *vec)\n{\n  if( (mat->nDimension != 2) || (vec->nDimension != 1) )\n    THError(\"matrix and vector expected, got %dD, %dD\",\n      mat->nDimension, vec->nDimension);\n\n  if( mat->size[1] != vec->size[0] ) {\n    THDescBuff bm = THTensor_(sizeDesc)(mat);\n    THDescBuff bv = THTensor_(sizeDesc)(vec);\n    THError(\"size mismatch, %s, %s\", bm.str, bv.str);\n  }\n\n  if(t->nDimension != 1)\n    THError(\"vector expected, got t: %dD\", t->nDimension);\n\n  if(t->size[0] != mat->size[0]) {\n    THDescBuff bt = THTensor_(sizeDesc)(t);\n    THDescBuff bm = THTensor_(sizeDesc)(mat);\n    THError(\"size mismatch, t: %s, mat: %s\", bt.str, bm.str);\n  }\n\n  if(r_ != t)\n  {\n    THTensor_(resizeAs)(r_, t);\n    THTensor_(copy)(r_, t);\n  }\n\n  if(mat->stride[0] == 1)\n  {\n    THBlas_(gemv)('n', mat->size[0], mat->size[1],\n                  alpha, THTensor_(data)(mat), mat->stride[1],\n                  THTensor_(data)(vec), vec->stride[0],\n                  beta, THTensor_(data)(r_), r_->stride[0]);\n  }\n  else if(mat->stride[1] == 1)\n  {\n    THBlas_(gemv)('t',  mat->size[1], mat->size[0],\n                  alpha, THTensor_(data)(mat), mat->stride[0],\n                  THTensor_(data)(vec), vec->stride[0],\n                  beta, THTensor_(data)(r_), r_->stride[0]);\n  }\n  else\n  {\n    THTensor *cmat = THTensor_(newContiguous)(mat);\n\n    THBlas_(gemv)('t',  mat->size[1], mat->size[0],\n                  alpha, THTensor_(data)(cmat), cmat->stride[0],\n                  THTensor_(data)(vec), vec->stride[0],\n                  beta, THTensor_(data)(r_), r_->stride[0]);\n\n    THTensor_(free)(cmat);\n  }\n}\n\nvoid THTensor_(match)(THTensor *r_, THTensor *m1, THTensor *m2, real gain)\n{\n  long N1 = m1->size[0];\n  long N2 = m2->size[0];\n  long dim;\n  real *m1_p;\n  real *m2_p;\n  real *r_p;\n  long i;\n\n  THTensor_(resize2d)(r_, N1, N2);\n\n  m1 = THTensor_(newContiguous)(m1);\n  m2 = THTensor_(newContiguous)(m2);\n\n  THTensor_(resize2d)(m1, N1, THTensor_(nElement)(m1) / N1);\n  THTensor_(resize2d)(m2, N2, THTensor_(nElement)(m2) / N2);\n\n  dim = m1->size[1];\n  THArgCheck(m1->size[1] == m2->size[1], 3, \"m1 and m2 must have the same inner vector dim\");\n\n  m1_p = THTensor_(data)(m1);\n  m2_p = THTensor_(data)(m2);\n  r_p = THTensor_(data)(r_);\n\n#pragma omp parallel for private(i)\n  for (i=0; i<N1; i++) {\n    long j,k;\n    for (j=0; j<N2; j++) {\n      real sum = 0;\n      for (k=0; k<dim; k++) {\n        real term = m1_p[ i*dim + k ] - m2_p[ j*dim + k ];\n        sum += term*term;\n      }\n      r_p[ i*N2 + j ] = gain * sum;\n    }\n  }\n\n  THTensor_(free)(m1);\n  THTensor_(free)(m2);\n}\n\nvoid THTensor_(addmm)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor *m1, THTensor *m2)\n{\n  char transpose_r, transpose_m1, transpose_m2;\n  THTensor *r__, *m1_, *m2_;\n\n  if( (m1->nDimension != 2) || (m2->nDimension != 2))\n    THError(\"matrices expected, got %dD, %dD tensors\", m1->nDimension, m2->nDimension);\n\n  if(m1->size[1] != m2->size[0]) {\n    THDescBuff bm1 = THTensor_(sizeDesc)(m1);\n    THDescBuff bm2 = THTensor_(sizeDesc)(m2);\n    THError(\"size mismatch, m1: %s, m2: %s\", bm1.str, bm2.str);\n  }\n\n  if( t->nDimension != 2 )\n    THError(\"matrix expected, got %dD tensor for t\", t->nDimension);\n\n  if( (t->size[0] != m1->size[0]) || (t->size[1] != m2->size[1]) ) {\n    THDescBuff bt  = THTensor_(sizeDesc)(t);\n    THDescBuff bm1 = THTensor_(sizeDesc)(m1);\n    THDescBuff bm2 = THTensor_(sizeDesc)(m2);\n    THError(\"size mismatch, t: %s, m1: %s, m2: %s\", bt.str, bm1.str, bm2.str);\n  }\n\n  if(t != r_)\n  {\n    THTensor_(resizeAs)(r_, t);\n    if (beta != 0.0) {\n      THTensor_(copy)(r_, t);\n    }\n  }\n\n  /* r_ */\n  if(r_->stride[0] == 1 &&\n     r_->stride[1] != 0)\n  {\n    transpose_r = 'n';\n    r__ = r_;\n  }\n  else if(r_->stride[1] == 1 &&\n          r_->stride[0] != 0)\n  {\n    THTensor *swap = m2;\n    m2 = m1;\n    m1 = swap;\n    transpose_r = 't';\n    r__ = r_;\n  }\n  else\n  {\n    transpose_r = 'n';\n\n    THTensor *transp_r_ = THTensor_(newTranspose)(r_, 0, 1);\n    r__ = THTensor_(newClone)(transp_r_);\n    THTensor_(free)(transp_r_);\n    THTensor_(transpose)(r__, NULL, 0, 1);\n  }\n\n  /* m1 */\n  if(m1->stride[(transpose_r == 'n' ? 0 : 1)] == 1 &&\n     m1->stride[(transpose_r == 'n' ? 1 : 0)] != 0)\n  {\n    transpose_m1 = 'n';\n    m1_ = m1;\n  }\n  else if(m1->stride[(transpose_r == 'n' ? 1 : 0)] == 1 &&\n          m1->stride[(transpose_r == 'n' ? 0 : 1)] != 0)\n  {\n    transpose_m1 = 't';\n    m1_ = m1;\n  }\n  else\n  {\n    transpose_m1 = (transpose_r == 'n' ? 't' : 'n');\n    m1_ = THTensor_(newContiguous)(m1);\n  }\n\n  /* m2 */\n  if(m2->stride[(transpose_r == 'n' ? 0 : 1)] == 1 &&\n     m2->stride[(transpose_r == 'n' ? 1 : 0)] != 0)\n  {\n    transpose_m2 = 'n';\n    m2_ = m2;\n  }\n  else if(m2->stride[(transpose_r == 'n' ? 1 : 0)] == 1 &&\n          m2->stride[(transpose_r == 'n' ? 0 : 1)] != 0)\n  {\n    transpose_m2 = 't';\n    m2_ = m2;\n  }\n  else\n  {\n    transpose_m2 = (transpose_r == 'n' ? 't' : 'n');\n    m2_ = THTensor_(newContiguous)(m2);\n  }\n\n#pragma omp critical(blasgemm)\n  /* do the operation */\n  THBlas_(gemm)(transpose_m1,\n                transpose_m2,\n                r__->size[(transpose_r == 'n' ? 0 : 1)],\n                r__->size[(transpose_r == 'n' ? 1 : 0)],\n                m1_->size[(transpose_r == 'n' ? 1 : 0)],\n                alpha,\n                THTensor_(data)(m1_),\n                (transpose_m1 == 'n' ? m1_->stride[(transpose_r == 'n' ? 1 : 0)] : m1_->stride[(transpose_r == 'n' ? 0 : 1)]),\n                THTensor_(data)(m2_),\n                (transpose_m2 == 'n' ? m2_->stride[(transpose_r == 'n' ? 1 : 0)] : m2_->stride[(transpose_r == 'n' ? 0 : 1)]),\n                beta,\n                THTensor_(data)(r__),\n                r__->stride[(transpose_r == 'n' ? 1 : 0)]);\n\n  /* free intermediate variables */\n  if(m1_ != m1)\n    THTensor_(free)(m1_);\n\n  if(m2_ != m2)\n    THTensor_(free)(m2_);\n\n  if(r__ != r_)\n    THTensor_(freeCopyTo)(r__, r_);\n}\n\nvoid THTensor_(addr)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor *vec1, THTensor *vec2)\n{\n  if( (vec1->nDimension != 1) || (vec2->nDimension != 1) )\n    THError(\"vector and vector expected, got %dD, %dD tensors\",\n        vec1->nDimension, vec2->nDimension);\n\n  if(t->nDimension != 2)\n    THError(\"expected matrix, got %dD tensor for t\", t->nDimension);\n\n  if( (t->size[0] != vec1->size[0]) || (t->size[1] != vec2->size[0]) ) {\n    THDescBuff bt  = THTensor_(sizeDesc)(t);\n    THDescBuff bv1 = THTensor_(sizeDesc)(vec1);\n    THDescBuff bv2 = THTensor_(sizeDesc)(vec2);\n    THError(\"size mismatch, t: %s, vec1: %s, vec2: %s\", bt.str, bv1.str, bv2.str);\n  }\n\n  if(r_ != t)\n  {\n    THTensor_(resizeAs)(r_, t);\n    THTensor_(copy)(r_, t);\n  }\n\n  if(beta == 0) {\n    THTensor_(zero)(r_);\n  }\n  else if(beta != 1)\n    THTensor_(mul)(r_, r_, beta);\n\n  if(r_->stride[0] == 1)\n  {\n    THBlas_(ger)(vec1->size[0], vec2->size[0],\n                 alpha, THTensor_(data)(vec1), vec1->stride[0],\n                 THTensor_(data)(vec2), vec2->stride[0],\n                 THTensor_(data)(r_), r_->stride[1]);\n  }\n  else if(r_->stride[1] == 1)\n  {\n    THBlas_(ger)(vec2->size[0], vec1->size[0],\n                 alpha, THTensor_(data)(vec2), vec2->stride[0],\n                 THTensor_(data)(vec1), vec1->stride[0],\n                 THTensor_(data)(r_), r_->stride[0]);\n  }\n  else\n  {\n    THTensor *cr = THTensor_(newClone)(r_);\n\n    THBlas_(ger)(vec2->size[0], vec1->size[0],\n                 alpha, THTensor_(data)(vec2), vec2->stride[0],\n                 THTensor_(data)(vec1), vec1->stride[0],\n                 THTensor_(data)(cr), cr->stride[0]);\n\n    THTensor_(freeCopyTo)(cr, r_);\n  }\n}\n\nvoid THTensor_(addbmm)(THTensor *result, real beta, THTensor *t, real alpha, THTensor *batch1, THTensor *batch2)\n{\n  long batch;\n\n  THArgCheck(THTensor_(nDimension)(batch1) == 3, 1, \"expected 3D tensor\");\n  THArgCheck(THTensor_(nDimension)(batch2) == 3, 2, \"expected 3D tensor\");\n  THArgCheck(THTensor_(size)(batch1, 0) == THTensor_(size)(batch2, 0), 2,\n             \"equal number of batches expected, got %d, %d\",\n             THTensor_(size)(batch1, 0), THTensor_(size)(batch2, 0));\n  THArgCheck(THTensor_(size)(batch1, 2) == THTensor_(size)(batch2, 1), 2,\n             \"wrong matrix size, batch1: %dx%d, batch2: %dx%d\",\n             THTensor_(size)(batch1, 1), THTensor_(size)(batch1,2),\n             THTensor_(size)(batch2, 1), THTensor_(size)(batch2,2));\n\n  long dim1 = THTensor_(size)(batch1, 1);\n  long dim2 = THTensor_(size)(batch2, 2);\n  THArgCheck(THTensor_(size)(t, 0) == dim1, 1, \"output tensor of incorrect size\");\n  THArgCheck(THTensor_(size)(t, 1) == dim2, 1, \"output tensor of incorrect size\");\n\n  if (t != result) {\n    THTensor_(resizeAs)(result, t);\n    if (beta != 0.0) {\n      THTensor_(copy)(result, t);\n    }\n  }\n\n  THTensor *matrix1 = THTensor_(new)();\n  THTensor *matrix2 = THTensor_(new)();\n\n  for (batch = 0; batch < THTensor_(size)(batch1, 0); ++batch) {\n    THTensor_(select)(matrix1, batch1, 0, batch);\n    THTensor_(select)(matrix2, batch2, 0, batch);\n\n    THTensor_(addmm)(result, beta, result, alpha, matrix1, matrix2);\n    beta = 1; // accumulate output once\n  }\n\n  THTensor_(free)(matrix1);\n  THTensor_(free)(matrix2);\n}\n\nvoid THTensor_(baddbmm)(THTensor *result, real beta, THTensor *t, real alpha, THTensor *batch1, THTensor *batch2)\n{\n  long batch;\n\n  THArgCheck(THTensor_(nDimension)(batch1) == 3, 1, \"expected 3D tensor, got %dD\", THTensor_(nDimension)(batch1));\n  THArgCheck(THTensor_(nDimension)(batch2) == 3, 2, \"expected 3D tensor, got %dD\", THTensor_(nDimension)(batch2));\n  THArgCheck(THTensor_(size)(batch1, 0) == THTensor_(size)(batch2, 0), 2,\n             \"equal number of batches expected, got %d, %d\",\n             THTensor_(size)(batch1, 0), THTensor_(size)(batch2, 0));\n  THArgCheck(THTensor_(size)(batch1, 2) == THTensor_(size)(batch2, 1), 2,\n             \"wrong matrix size, batch1: %dx%d, batch2: %dx%d\",\n             THTensor_(size)(batch1, 1), THTensor_(size)(batch1, 2),\n             THTensor_(size)(batch2, 1), THTensor_(size)(batch2, 2));\n\n  long bs = THTensor_(size)(batch1, 0);\n  long dim1 = THTensor_(size)(batch1, 1);\n  long dim2 = THTensor_(size)(batch2, 2);\n  THArgCheck(THTensor_(size)(t, 0) == bs, 1,   \"output tensor of incorrect size\");\n  THArgCheck(THTensor_(size)(t, 1) == dim1, 1, \"output tensor of incorrect size\");\n  THArgCheck(THTensor_(size)(t, 2) == dim2, 1, \"output tensor of incorrect size\");\n\n  if (t != result) {\n    THTensor_(resizeAs)(result, t);\n    if (beta != 0.0) {\n      THTensor_(copy)(result, t);\n    }\n  }\n\n  THTensor *matrix1 = THTensor_(new)();\n  THTensor *matrix2 = THTensor_(new)();\n  THTensor *result_matrix = THTensor_(new)();\n\n  for (batch = 0; batch < THTensor_(size)(batch1, 0); ++batch) {\n    THTensor_(select)(matrix1, batch1, 0, batch);\n    THTensor_(select)(matrix2, batch2, 0, batch);\n    THTensor_(select)(result_matrix, result, 0, batch);\n\n    THTensor_(addmm)(result_matrix, beta, result_matrix, alpha, matrix1, matrix2);\n  }\n\n  THTensor_(free)(matrix1);\n  THTensor_(free)(matrix2);\n  THTensor_(free)(result_matrix);\n}\n\nptrdiff_t THTensor_(numel)(THTensor *t)\n{\n  return THTensor_(nElement)(t);\n}\n\nvoid THTensor_(max)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim)\n{\n  THLongStorage *dim;\n\n  THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, \"dimension %d out of range\",\n      dimension + TH_INDEX_BASE);\n\n  dim = THTensor_(newSizeOf)(t);\n  THLongStorage_set(dim, dimension, 1);\n  THTensor_(resize)(values_, dim, NULL);\n  THLongTensor_resize(indices_, dim, NULL);\n  THLongStorage_free(dim);\n\n  // two implementations optimized for data locality\n  if (t->stride[dimension] == 1) {\n    real theMax;\n    real value;\n    long theIndex;\n    long i;\n    TH_TENSOR_DIM_APPLY3(real, t, real, values_, long, indices_, dimension,\n                         theMax = t_data[0];\n                         theIndex = 0;\n\n                         for(i = 0; i < t_size; i++)\n                         {\n                           value = t_data[i*t_stride];\n                           /* This is not the same as value>theMax in the case of NaNs */\n                           if(!(value <= theMax))\n                           {\n                             theIndex = i;\n                             theMax = value;\n                             th_isnan_break(value)\n                           }\n                         }\n                         *indices__data = theIndex;\n                         *values__data = theMax;);\n  } else {\n    if (THTensor_(nDimension)(t) > 1) {\n      THTensor *t0 = THTensor_(newSelect)(t, dimension, 0);\n      THTensor_(copy)(values_, t0);\n      THTensor_(free)(t0);\n    } else {\n      THTensor_(fill)(values_, THTensor_(get1d)(t, 0));\n    }\n    THLongTensor_zero(indices_);\n\n    if(t->size[dimension] == 1) {\n      if (!keepdim) {\n        THTensor_(squeeze1d)(values_, values_, dimension);\n        THLongTensor_squeeze1d(indices_, indices_, dimension);\n      }\n      return;\n    }\n\n    THTensor *tempValues_ = THTensor_(newWithTensor)(values_);\n    // tempValues_.expand_as(t)\n    tempValues_->size[dimension] = t->size[dimension];\n    tempValues_->stride[dimension] = 0;\n\n    THLongTensor *tempIndices_ = THLongTensor_newWithTensor(indices_);\n    // tempIndices_.expand_as(t)\n    tempIndices_->size[dimension] = t->size[dimension];\n    tempIndices_->stride[dimension] = 0;\n\n    TH_TENSOR_APPLY3_D(real, t, real, tempValues_, long, tempIndices_, dimension,\n                          if(!(*t_data <= *tempValues__data) && !th_isnan(*tempValues__data)) {\n                            *tempValues__data = *t_data;\n                            *tempIndices__data = *tempIndices__dimOffset;\n                          });\n\n    THTensor_(free)(tempValues_);\n    THLongTensor_free(tempIndices_);\n  }\n\n  if (!keepdim) {\n    THTensor_(squeeze1d)(values_, values_, dimension);\n    THLongTensor_squeeze1d(indices_, indices_, dimension);\n  }\n}\n\nvoid THTensor_(min)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim)\n{\n  THLongStorage *dim;\n\n  THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, \"dimension %d out of range\",\n      dimension + TH_INDEX_BASE);\n\n  dim = THTensor_(newSizeOf)(t);\n  THLongStorage_set(dim, dimension, 1);\n  THTensor_(resize)(values_, dim, NULL);\n  THLongTensor_resize(indices_, dim, NULL);\n  THLongStorage_free(dim);\n\n  // two implementations optimized for data locality\n  if (t->stride[dimension] == 1) {\n    real theMax;\n    real value;\n    long theIndex;\n    long i;\n    TH_TENSOR_DIM_APPLY3(real, t, real, values_, long, indices_, dimension,\n                         theMax = t_data[0];\n                         theIndex = 0;\n\n                         for(i = 0; i < t_size; i++)\n                         {\n                           value = t_data[i*t_stride];\n                           /* This is not the same as value>theMax in the case of NaNs */\n                           if(!(value >= theMax))\n                           {\n                             theIndex = i;\n                             theMax = value;\n                             th_isnan_break(value)\n                           }\n                         }\n                         *indices__data = theIndex;\n                         *values__data = theMax;);\n  } else {\n    if (THTensor_(nDimension)(t) > 1) {\n      THTensor *t0 = THTensor_(newSelect)(t, dimension, 0);\n      THTensor_(copy)(values_, t0);\n      THTensor_(free)(t0);\n    } else {\n      THTensor_(fill)(values_, THTensor_(get1d)(t, 0));\n    }\n    THLongTensor_zero(indices_);\n\n    if(t->size[dimension] == 1) {\n      if (!keepdim) {\n        THTensor_(squeeze1d)(values_, values_, dimension);\n        THLongTensor_squeeze1d(indices_, indices_, dimension);\n      }\n      return;\n    }\n\n    THTensor *tempValues_ = THTensor_(newWithTensor)(values_);\n    // tempValues_.expand_as(t)\n    tempValues_->size[dimension] = t->size[dimension];\n    tempValues_->stride[dimension] = 0;\n\n    THLongTensor *tempIndices_ = THLongTensor_newWithTensor(indices_);\n    // tempIndices_.expand_as(t)\n    tempIndices_->size[dimension] = t->size[dimension];\n    tempIndices_->stride[dimension] = 0;\n\n    TH_TENSOR_APPLY3_D(real, t, real, tempValues_, long, tempIndices_, dimension,\n                          if(!(*t_data >= *tempValues__data) && !th_isnan(*tempValues__data)) {\n                            *tempValues__data = *t_data;\n                            *tempIndices__data = *tempIndices__dimOffset;\n                          });\n  }\n\n  if (!keepdim) {\n    THTensor_(squeeze1d)(values_, values_, dimension);\n    THLongTensor_squeeze1d(indices_, indices_, dimension);\n  }\n}\n\n\nvoid THTensor_(sum)(THTensor *r_, THTensor *t, int dimension, int keepdim)\n{\n  THLongStorage *dim;\n\n  THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, \"dimension %d out of range\",\n      dimension + TH_INDEX_BASE);\n\n  dim = THTensor_(newSizeOf)(t);\n  THLongStorage_set(dim, dimension, 1);\n  THTensor_(resize)(r_, dim, NULL);\n  THLongStorage_free(dim);\n\n  // two implementations optimized for data locality\n  if (t->stride[dimension] == 1) {\n    TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension,\n                         accreal sum = 0;\n                         long i;\n                         for(i = 0; i < t_size; i++)\n                           sum += t_data[i*t_stride];\n                         *r__data = (real)sum;);\n  } else {\n    THTensor_(zero)(r_);\n    THTensor *temp_ = THTensor_(newWithTensor)(r_);\n    // r_.expand_as(t)\n    temp_->size[dimension] = t->size[dimension];\n    temp_->stride[dimension] = 0;\n\n    TH_TENSOR_APPLY2(real, temp_, real, t, *temp__data = *temp__data + *t_data;);\n    THTensor_(free)(temp_);\n  }\n\n  if (!keepdim) {\n    THTensor_(squeeze1d)(r_, r_, dimension);\n  }\n}\n\nvoid THTensor_(prod)(THTensor *r_, THTensor *t, int dimension, int keepdim)\n{\n  THLongStorage *dim;\n\n  THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, \"dimension %d out of range\",\n      dimension + TH_INDEX_BASE);\n\n  dim = THTensor_(newSizeOf)(t);\n  THLongStorage_set(dim, dimension, 1);\n  THTensor_(resize)(r_, dim, NULL);\n  THLongStorage_free(dim);\n\n  // two implementations optimized for data locality\n  if (t->stride[dimension] == 1) {\n    TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension,\n                         accreal prod = 1;\n                         long i;\n                         for(i = 0; i < t_size; i++)\n                           prod *= t_data[i*t_stride];\n                         *r__data = (real)prod;);\n  } else {\n    THTensor_(fill)(r_, 1);\n    THTensor *temp_ = THTensor_(newWithTensor)(r_);\n    // r_.expand_as(t)\n    temp_->size[dimension] = t->size[dimension];\n    temp_->stride[dimension] = 0;\n\n    TH_TENSOR_APPLY2(real, temp_, real, t, *temp__data = *temp__data * *t_data;);\n    THTensor_(free)(temp_);\n  }\n\n  if (!keepdim) {\n    THTensor_(squeeze1d)(r_, r_, dimension);\n  }\n}\n\nvoid THTensor_(cumsum)(THTensor *r_, THTensor *t, int dimension)\n{\n  THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, \"dimension %d out of range\",\n      dimension + TH_INDEX_BASE);\n\n  THTensor_(resizeAs)(r_, t);\n\n  TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension,\n                       accreal cumsum = 0;\n                       long i;\n                       for(i = 0; i < t_size; i++)\n                       {\n                         cumsum += t_data[i*t_stride];\n                         r__data[i*r__stride] = (real)cumsum;\n                       });\n}\n\nvoid THTensor_(cumprod)(THTensor *r_, THTensor *t, int dimension)\n{\n  THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, \"dimension %d out of range\",\n      dimension + TH_INDEX_BASE);\n\n  THTensor_(resizeAs)(r_, t);\n\n  TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension,\n                       accreal cumprod = 1;\n                       long i;\n                       for(i = 0; i < t_size; i++)\n                       {\n                         cumprod *= t_data[i*t_stride];\n                         r__data[i*r__stride] = (real)cumprod;\n                       });\n}\n\n\nvoid THTensor_(sign)(THTensor *r_, THTensor *t)\n{\n  THTensor_(resizeAs)(r_, t);\n\n#if defined (TH_REAL_IS_BYTE)\n  TH_TENSOR_APPLY2(real, r_, real, t,\n    if (*t_data > 0) *r__data = 1;\n    else *r__data = 0;);\n#else\n  TH_TENSOR_APPLY2(real, r_, real, t,\n    if (*t_data > 0) *r__data = 1;\n    else if (*t_data < 0) *r__data = -1;\n    else *r__data = 0;);\n#endif\n}\n\n\naccreal THTensor_(trace)(THTensor *t)\n{\n  real *t_data = THTensor_(data)(t);\n  accreal sum = 0;\n  long i = 0;\n  long t_stride_0, t_stride_1, t_diag_size;\n\n  THArgCheck(THTensor_(nDimension)(t) == 2, 1, \"expected a matrix\");\n\n  t_stride_0 = THTensor_(stride)(t, 0);\n  t_stride_1 = THTensor_(stride)(t, 1);\n  t_diag_size = THMin(THTensor_(size)(t, 0), THTensor_(size)(t, 1));\n  while(i < t_diag_size)\n  {\n    sum += t_data[i*(t_stride_0+t_stride_1)];\n    i++;\n  }\n\n  return sum;\n}\n\nvoid THTensor_(cross)(THTensor *r_, THTensor *a, THTensor *b, int dimension)\n{\n  int i;\n\n  if(THTensor_(nDimension)(a) != THTensor_(nDimension)(b))\n    THError(\"inconsistent tensor dimension %dD, %dD\",\n        THTensor_(nDimension)(a), THTensor_(nDimension)(b));\n\n  for(i = 0; i < THTensor_(nDimension)(a); i++)\n  {\n    if(THTensor_(size)(a, i) != THTensor_(size)(b, i)) {\n        THDescBuff ba = THTensor_(sizeDesc)(a);\n        THDescBuff bb = THTensor_(sizeDesc)(b);\n        THError(\"inconsistent tensor sizes %s, %s\", ba.str, bb.str);\n    }\n  }\n\n  if(dimension < 0)\n  {\n    for(i = 0; i < THTensor_(nDimension)(a); i++)\n    {\n      if(THTensor_(size)(a, i) == 3)\n      {\n        dimension = i;\n        break;\n      }\n    }\n    if(dimension < 0) {\n      THDescBuff ba = THTensor_(sizeDesc)(a);\n      THError(\"no dimension of size 3 in a: %s\", ba.str);\n    }\n  }\n\n  THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(a), 3, \"dimension %d out of range\",\n      dimension + TH_INDEX_BASE);\n  THArgCheck(THTensor_(size)(a, dimension) == 3, 3, \"dimension %d does not have size 3\",\n      dimension + TH_INDEX_BASE);\n\n  THTensor_(resizeAs)(r_, a);\n\n  TH_TENSOR_DIM_APPLY3(real, a, real, b, real, r_, dimension,\n                       r__data[0*r__stride] = a_data[1*a_stride]*b_data[2*b_stride] - a_data[2*a_stride]*b_data[1*b_stride];\n                       r__data[1*r__stride] = a_data[2*a_stride]*b_data[0*b_stride] - a_data[0*a_stride]*b_data[2*b_stride];\n                       r__data[2*r__stride] = a_data[0*a_stride]*b_data[1*b_stride] - a_data[1*a_stride]*b_data[0*b_stride];);\n}\n\nvoid THTensor_(cmax)(THTensor *r, THTensor *t, THTensor *src) {\n  THTensor_(resizeAs)(r, t);\n  TH_TENSOR_APPLY3(real, r, real, t, real, src,\n                   *r_data = *t_data > *src_data ? *t_data : *src_data;);\n}\n\nvoid THTensor_(cmin)(THTensor *r, THTensor *t, THTensor *src) {\n  THTensor_(resizeAs)(r, t);\n  TH_TENSOR_APPLY3(real, r, real, t, real, src,\n                   *r_data = *t_data < *src_data ? *t_data : *src_data;);\n}\n\nvoid THTensor_(cmaxValue)(THTensor *r, THTensor *t, real value) {\n  THTensor_(resizeAs)(r, t);\n  TH_TENSOR_APPLY2(real, r, real, t,\n                   *r_data = *t_data > value ? *t_data : value;);\n}\n\nvoid THTensor_(cminValue)(THTensor *r, THTensor *t, real value) {\n  THTensor_(resizeAs)(r, t);\n  TH_TENSOR_APPLY2(real, r, real, t,\n                   *r_data = *t_data < value ? *t_data : value;);\n}\n\nvoid THTensor_(zeros)(THTensor *r_, THLongStorage *size)\n{\n  THTensor_(resize)(r_, size, NULL);\n  THTensor_(zero)(r_);\n}\n\nvoid THTensor_(zerosLike)(THTensor *r_, THTensor *input)\n{\n  THTensor_(resizeAs)(r_, input);\n  THTensor_(zero)(r_);\n}\n\nvoid THTensor_(onesLike)(THTensor *r_, THTensor *input)\n{\n  THTensor_(resizeAs)(r_, input);\n  THTensor_(fill)(r_, 1);\n}\n\nvoid THTensor_(ones)(THTensor *r_, THLongStorage *size)\n{\n  THTensor_(resize)(r_, size, NULL);\n  THTensor_(fill)(r_, 1);\n}\n\nvoid THTensor_(diag)(THTensor *r_, THTensor *t, int k)\n{\n  THArgCheck(THTensor_(nDimension)(t) == 1 || THTensor_(nDimension)(t) == 2, 1, \"matrix or a vector expected\");\n\n  if(THTensor_(nDimension)(t) == 1)\n  {\n    real *t_data = THTensor_(data)(t);\n    long t_stride_0 = THTensor_(stride)(t, 0);\n    long t_size = THTensor_(size)(t, 0);\n    long sz = t_size + (k >= 0 ? k : -k);\n    real *r__data;\n    long r__stride_0;\n    long r__stride_1;\n    long i;\n\n    THTensor_(resize2d)(r_, sz, sz);\n    THTensor_(zero)(r_);\n    r__data = THTensor_(data)(r_);\n    r__stride_0 = THTensor_(stride)(r_, 0);\n    r__stride_1 = THTensor_(stride)(r_, 1);\n    r__data += (k >= 0 ? k*r__stride_1 : -k*r__stride_0);\n\n    for(i = 0; i < t_size; i++)\n      r__data[i*(r__stride_0+r__stride_1)] = t_data[i*t_stride_0];\n  }\n  else\n  {\n    real *t_data = THTensor_(data)(t);\n    long t_stride_0 = THTensor_(stride)(t, 0);\n    long t_stride_1 = THTensor_(stride)(t, 1);\n    long sz;\n    real *r__data;\n    long r__stride_0;\n    long i;\n\n    if(k >= 0)\n      sz = THMin(THTensor_(size)(t, 0), THTensor_(size)(t, 1)-k);\n    else\n      sz = THMin(THTensor_(size)(t, 0)+k, THTensor_(size)(t, 1));\n    THTensor_(resize1d)(r_, sz);\n    r__data = THTensor_(data)(r_);\n    r__stride_0 = THTensor_(stride)(r_, 0);\n\n    t_data += (k >= 0 ? k*t_stride_1 : -k*t_stride_0);\n    for(i = 0; i < sz; i++)\n      r__data[i*r__stride_0] = t_data[i*(t_stride_0+t_stride_1)];\n  }\n}\n\nvoid THTensor_(eye)(THTensor *r_, long n, long m)\n{\n  real *r__data;\n  long i, sz;\n\n  THArgCheck(n > 0, 1, \"invalid argument\");\n\n  if(m <= 0)\n    m = n;\n\n  THTensor_(resize2d)(r_, n, m);\n  THTensor_(zero)(r_);\n\n  i = 0;\n  r__data = THTensor_(data)(r_);\n  sz = THMin(THTensor_(size)(r_, 0), THTensor_(size)(r_, 1));\n  for(i = 0; i < sz; i++)\n    r__data[i*(r_->stride[0]+r_->stride[1])] = 1;\n}\n\n\nvoid THTensor_(range)(THTensor *r_, accreal xmin, accreal xmax, accreal step)\n{\n  ptrdiff_t size;\n  real i = 0;\n\n  THArgCheck(step > 0 || step < 0, 3, \"step must be a non-null number\");\n  THArgCheck(((step > 0) && (xmax >= xmin)) || ((step < 0) && (xmax <= xmin))\n              , 2, \"upper bound and larger bound incoherent with step sign\");\n\n  size = (ptrdiff_t) (((xmax - xmin) / step) + 1);\n\n  if (THTensor_(nElement)(r_) != size) {\n    THTensor_(resize1d)(r_, size);\n  }\n\n  TH_TENSOR_APPLY(real, r_, *r__data = xmin + (i++)*step;);\n}\n\nvoid THTensor_(arange)(THTensor *r_, accreal xmin, accreal xmax, accreal step) {\n#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)\n  int m = fmod(xmax - xmin,step) == 0;\n#else\n  int m = (xmax - xmin) % step == 0;\n#endif\n  if (m)\n    xmax -= step;\n  THTensor_(range)(r_,xmin,xmax,step);\n}\n\nvoid THTensor_(randperm)(THTensor *r_, THGenerator *_generator, long n)\n{\n  real *r__data;\n  long r__stride_0;\n  long i;\n\n  THArgCheck(n > 0, 1, \"must be strictly positive\");\n\n  THTensor_(resize1d)(r_, n);\n  r__data = THTensor_(data)(r_);\n  r__stride_0 = THTensor_(stride)(r_,0);\n\n  for(i = 0; i < n; i++)\n    r__data[i*r__stride_0] = (real)(i);\n\n  for(i = 0; i < n-1; i++)\n  {\n    long z = THRandom_random(_generator) % (n-i);\n    real sav = r__data[i*r__stride_0];\n    r__data[i*r__stride_0] = r__data[(z+i)*r__stride_0];\n    r__data[(z+i)*r__stride_0] = sav;\n  }\n}\n\nvoid THTensor_(reshape)(THTensor *r_, THTensor *t, THLongStorage *size)\n{\n  THTensor_(resize)(r_, size, NULL);\n  THTensor_(copy)(r_, t);\n}\n\n/* I cut and pasted (slightly adapted) the quicksort code from\n   Sedgewick's 1978 \"Implementing Quicksort Programs\" article\n   http://www.csie.ntu.edu.tw/~b93076/p847-sedgewick.pdf\n\n   It is the state of the art existing implementation. The macros\n   are here to make as close a match as possible to the pseudocode of\n   Program 2 p.851\n\n   Note that other partition schemes exist, and are typically presented\n   in textbook, but those are less efficient. See e.g.\n   http://cs.stackexchange.com/questions/11458/quicksort-partitioning-hoare-vs-lomuto\n\n   Julien, November 12th 2013\n*/\n#define MAX_LEVELS  300\n#define M_SMALL 10 /* Limit for small subfiles */\n\n#define ARR(III) arr[(III)*stride]\n#define IDX(III) idx[(III)*stride]\n\n#define LONG_SWAP(AAA, BBB) swap = AAA; AAA = BBB; BBB = swap\n#define REAL_SWAP(AAA, BBB) rswap = AAA; AAA = BBB; BBB = rswap\n\n#define ARR_SWAP(III, JJJ) \\\n  REAL_SWAP(ARR(III), ARR(JJJ));\n\n#define BOTH_SWAP(III, JJJ) \\\n  REAL_SWAP(ARR(III), ARR(JJJ)); \\\n  LONG_SWAP(IDX(III), IDX(JJJ))\n\nstatic void THTensor_(quicksortascend)(real *arr, long *idx, long elements, long stride)\n{\n  long beg[MAX_LEVELS], end[MAX_LEVELS], i, j, L, R, P, swap, pid, stack = 0, sz_right, sz_left;\n  real rswap, piv;\n  unsigned char done = 0;\n\n  /* beg[0]=0; end[0]=elements; */\n  stack = 0;\n  L = 0; R = elements-1;\n  done = elements-1 <= M_SMALL;\n\n  while(!done) {\n      /* Use median of three for pivot choice */\n    P=(L+R)>>1;\n    BOTH_SWAP(P, L+1);\n    if (ARR(L+1) > ARR(R)) { BOTH_SWAP(L+1, R); }\n    if (ARR(L) > ARR(R)) { BOTH_SWAP(L, R); }\n    if (ARR(L+1) > ARR(L)) { BOTH_SWAP(L+1, L); }\n\n    i = L+1; j = R; piv = ARR(L); pid = IDX(L);\n\n    do {\n      do { i = i+1; } while(ARR(i) < piv);\n      do { j = j-1; } while(ARR(j) > piv);\n      if (j < i)\n          break;\n      BOTH_SWAP(i, j);\n    } while(1);\n    BOTH_SWAP(L, j);\n    /* Left subfile is (L, j-1) */\n    /* Right subfile is (i, R) */\n    sz_left = j-L;\n    sz_right = R-i+1;\n    if (sz_left <= M_SMALL && sz_right <= M_SMALL) {\n      /* both subfiles are small */\n      /* if stack empty */\n      if (stack == 0) {\n        done = 1;\n      } else {\n        stack--;\n        L = beg[stack];\n        R = end[stack];\n      }\n    } else if (sz_left <= M_SMALL || sz_right <= M_SMALL) {\n      /* exactly one of the subfiles is small */\n      /* (L,R) = large subfile */\n      if (sz_left > sz_right) {\n        /* Implicit: L = L; */\n        R = j-1;\n      } else {\n        L = i;\n        /* Implicit: R = R; */\n      }\n    } else {\n      /* none of the subfiles is small */\n      /* push large subfile */\n      /* (L,R) = small subfile */\n      if (sz_left > sz_right) {\n        beg[stack] = L;\n        end[stack] = j-1;\n        stack++;\n        L = i;\n        /* Implicit: R = R */\n      } else {\n        beg[stack] = i;\n        end[stack] = R;\n        stack++;\n        /* Implicit: L = L; */\n        R = j-1;\n      }\n    }\n  } /* while not done */\n  /* Now insertion sort on the concatenation of subfiles */\n  for(i=elements-2; i>=0; i--) {\n    if (ARR(i) > ARR(i+1)) {\n      piv = ARR(i);\n      pid = IDX(i);\n      j = i+1;\n      do {\n        ARR(j-1) = ARR(j);\n        IDX(j-1) = IDX(j);\n        j = j+1;\n      } while(j < elements && ARR(j) < piv);\n      ARR(j-1) = piv;\n      IDX(j-1) = pid;\n     }\n  }\n}\n\nstatic void THTensor_(quicksortdescend)(real *arr, long *idx, long elements, long stride)\n{\n  long beg[MAX_LEVELS], end[MAX_LEVELS], i, j, L, R, P, swap, pid, stack = 0, sz_right, sz_left;\n  real rswap, piv;\n  unsigned char done = 0;\n\n  /* beg[0]=0; end[0]=elements; */\n  stack = 0;\n  L = 0; R = elements-1;\n  done = elements-1 <= M_SMALL;\n\n  while(!done) {\n      /* Use median of three for pivot choice */\n    P=(L+R)>>1;\n    BOTH_SWAP(P, L+1);\n    if (ARR(L+1) < ARR(R)) { BOTH_SWAP(L+1, R); }\n    if (ARR(L) < ARR(R)) { BOTH_SWAP(L, R); }\n    if (ARR(L+1) < ARR(L)) { BOTH_SWAP(L+1, L); }\n\n    i = L+1; j = R; piv = ARR(L); pid = IDX(L);\n\n    do {\n      do { i = i+1; } while(ARR(i) > piv);\n      do { j = j-1; } while(ARR(j) < piv);\n      if (j < i)\n          break;\n      BOTH_SWAP(i, j);\n    } while(1);\n    BOTH_SWAP(L, j);\n    /* Left subfile is (L, j-1) */\n    /* Right subfile is (i, R) */\n    sz_left = j-L;\n    sz_right = R-i+1;\n    if (sz_left <= M_SMALL && sz_right <= M_SMALL) {\n      /* both subfiles are small */\n      /* if stack empty */\n      if (stack == 0) {\n        done = 1;\n      } else {\n        stack--;\n        L = beg[stack];\n        R = end[stack];\n      }\n    } else if (sz_left <= M_SMALL || sz_right <= M_SMALL) {\n      /* exactly one of the subfiles is small */\n      /* (L,R) = large subfile */\n      if (sz_left > sz_right) {\n        /* Implicit: L = L; */\n        R = j-1;\n      } else {\n        L = i;\n        /* Implicit: R = R; */\n      }\n    } else {\n      /* none of the subfiles is small */\n      /* push large subfile */\n      /* (L,R) = small subfile */\n      if (sz_left > sz_right) {\n        beg[stack] = L;\n        end[stack] = j-1;\n        stack++;\n        L = i;\n        /* Implicit: R = R */\n      } else {\n        beg[stack] = i;\n        end[stack] = R;\n        stack++;\n        /* Implicit: L = L; */\n        R = j-1;\n      }\n    }\n  } /* while not done */\n  /* Now insertion sort on the concatenation of subfiles */\n  for(i=elements-2; i>=0; i--) {\n    if (ARR(i) < ARR(i+1)) {\n      piv = ARR(i);\n      pid = IDX(i);\n      j = i+1;\n      do {\n        ARR(j-1) = ARR(j);\n        IDX(j-1) = IDX(j);\n        j = j+1;\n      } while(j < elements && ARR(j) > piv);\n      ARR(j-1) = piv;\n      IDX(j-1) = pid;\n     }\n  }\n}\n\n#undef MAX_LEVELS\n#undef M_SMALL\n\nvoid THTensor_(sort)(THTensor *rt_, THLongTensor *ri_, THTensor *t, int dimension, int descendingOrder)\n{\n  THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, \"invalid dimension %d\",\n      dimension + TH_INDEX_BASE);\n\n  THTensor_(resizeAs)(rt_, t);\n  THTensor_(copy)(rt_, t);\n\n  {\n    THLongStorage *size = THTensor_(newSizeOf)(t);\n    THLongTensor_resize(ri_, size, NULL);\n    THLongStorage_free(size);\n  }\n\n  if(descendingOrder)\n  {\n    TH_TENSOR_DIM_APPLY2(real, rt_, long, ri_, dimension,\n                         long i;\n                         for(i = 0; i < ri__size; i++)\n                           ri__data[i*ri__stride] = i;\n                         THTensor_(quicksortdescend)(rt__data, ri__data, rt__size, rt__stride);)\n      }\n  else\n  {\n    TH_TENSOR_DIM_APPLY2(real, rt_, long, ri_, dimension,\n                         long i;\n                         for(i = 0; i < ri__size; i++)\n                           ri__data[i*ri__stride] = i;\n                         THTensor_(quicksortascend)(rt__data, ri__data, rt__size, rt__stride);)\n      }\n}\n\n/* Implementation of the Quickselect algorithm, based on Nicolas Devillard's\npublic domain implementation at http://ndevilla.free.fr/median/median/\nAdapted similarly to the above Quicksort algorithm.\nThis version does not produce indices along with values. */\nstatic void THTensor_(quickselectnoidx)(real *arr, long k, long elements, long stride)\n{\n  long P, L, R, i, j, swap;\n  real rswap, piv;\n  L = 0;\n  R = elements-1;\n\n  do {\n    if (R <= L) /* One element only */\n      return;\n\n    if (R == L+1) {  /* Two elements only */\n      if (ARR(L) > ARR(R)) {\n        ARR_SWAP(L, R);\n      }\n      return;\n    }\n\n    /* Use median of three for pivot choice */\n    P=(L+R)>>1;\n    ARR_SWAP(P, L+1);\n    if (ARR(L+1) > ARR(R)) { ARR_SWAP(L+1, R); }\n    if (ARR(L) > ARR(R)) { ARR_SWAP(L, R); }\n    if (ARR(L+1) > ARR(L)) { ARR_SWAP(L+1, L); }\n\n    i = L+1;\n    j = R;\n    piv = ARR(L);\n    do {\n      do i++; while(ARR(i) < piv);\n      do j--; while(ARR(j) > piv);\n      if (j < i)\n        break;\n      ARR_SWAP(i, j);\n    } while(1);\n    ARR_SWAP(L, j);\n\n    /* Re-set active partition */\n    if (j <= k) L=i;\n    if (j >= k) R=j-1;\n  } while(1);\n}\n\n/* Implementation of the Quickselect algorithm, based on Nicolas Devillard's\npublic domain implementation at http://ndevilla.free.fr/median/median/\nAdapted similarly to the above Quicksort algorithm. */\nstatic void THTensor_(quickselect)(real *arr, long *idx, long k, long elements, long stride)\n{\n  long P, L, R, i, j, swap, pid;\n  real rswap, piv;\n  L = 0;\n  R = elements-1;\n\n  do {\n    if (R <= L) /* One element only */\n      return;\n\n    if (R == L+1) {  /* Two elements only */\n      if (ARR(L) > ARR(R)) {\n        BOTH_SWAP(L, R);\n      }\n      return;\n    }\n\n    /* Use median of three for pivot choice */\n    P=(L+R)>>1;\n    BOTH_SWAP(P, L+1);\n    if (ARR(L+1) > ARR(R)) { BOTH_SWAP(L+1, R); }\n    if (ARR(L) > ARR(R)) { BOTH_SWAP(L, R); }\n    if (ARR(L+1) > ARR(L)) { BOTH_SWAP(L+1, L); }\n\n    i = L+1;\n    j = R;\n    piv = ARR(L);\n    pid = IDX(L);\n    do {\n      do i++; while(ARR(i) < piv);\n      do j--; while(ARR(j) > piv);\n      if (j < i)\n        break;\n      BOTH_SWAP(i, j);\n    } while(1);\n    BOTH_SWAP(L, j);\n\n    /* Re-set active partition */\n    if (j <= k) L=i;\n    if (j >= k) R=j-1;\n  } while(1);\n}\n\n#undef ARR\n#undef IDX\n#undef LONG_SWAP\n#undef REAL_SWAP\n#undef BOTH_SWAP\n\nvoid THTensor_(mode)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim)\n{\n  THLongStorage *dim;\n  THTensor *temp_;\n  THLongTensor *tempi_;\n  real *temp__data;\n  long *tempi__data;\n  long t_size_dim;\n\n  THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 3, \"dimension out of range\");\n\n  dim = THTensor_(newSizeOf)(t);\n  THLongStorage_set(dim, dimension, 1);\n  THTensor_(resize)(values_, dim, NULL);\n  THLongTensor_resize(indices_, dim, NULL);\n  THLongStorage_free(dim);\n\n  t_size_dim = THTensor_(size)(t, dimension);\n\n  temp_ = THTensor_(new)();\n  THTensor_(resize1d)(temp_, t_size_dim);\n  temp__data = THTensor_(data)(temp_);\n\n  tempi_ = THLongTensor_new();\n  THLongTensor_resize1d(tempi_, t_size_dim);\n  tempi__data = THLongTensor_data(tempi_);\n\n  TH_TENSOR_DIM_APPLY3(real, t, real, values_, long, indices_, dimension,\n                       long i;\n                       real mode = 0;\n                       long modei = 0;\n                       long temp_freq = 0;\n                       long max_freq = 0;\n                       for(i = 0; i < t_size_dim; i++)\n                          temp__data[i] = t_data[i*t_stride];\n                       for(i = 0; i < t_size_dim; i++)\n                          tempi__data[i] = i;\n                       THTensor_(quicksortascend)(temp__data, tempi__data, t_size_dim, 1);\n\n                       for(i = 0; i < t_size_dim; i++)\n                       {\n                          temp_freq++;\n                          if ((i == t_size_dim - 1) || (temp__data[i] != temp__data[i+1]))\n                          {\n                              if (temp_freq > max_freq)\n                              {\n                                 mode = temp__data[i];\n                                 modei = tempi__data[i];\n                                 max_freq = temp_freq;\n                              }\n                              temp_freq = 0;\n                          }\n                       }\n                       *values__data = mode;\n                       *indices__data = modei;);\n\n  THTensor_(free)(temp_);\n  THLongTensor_free(tempi_);\n  if (!keepdim) {\n    THTensor_(squeeze1d)(values_, values_, dimension);\n    THLongTensor_squeeze1d(indices_, indices_, dimension);\n  }\n}\n\nvoid THTensor_(kthvalue)(THTensor *values_, THLongTensor *indices_, THTensor *t, long k, int dimension, int keepdim)\n{\n  THLongStorage *dim;\n  THTensor *temp_;\n  THLongTensor *tempi_;\n  real *temp__data;\n  long *tempi__data;\n  long t_size_dim;\n\n  THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 3, \"dimension out of range\");\n  THArgCheck(k > 0 && k <= t->size[dimension], 2, \"selected index out of range\");\n\n  dim = THTensor_(newSizeOf)(t);\n  THLongStorage_set(dim, dimension, 1);\n  THTensor_(resize)(values_, dim, NULL);\n  THLongTensor_resize(indices_, dim, NULL);\n  THLongStorage_free(dim);\n\n  t_size_dim = THTensor_(size)(t, dimension);\n\n  temp_ = THTensor_(new)();\n  THTensor_(resize1d)(temp_, t_size_dim);\n  temp__data = THTensor_(data)(temp_);\n\n  tempi_ = THLongTensor_new();\n  THLongTensor_resize1d(tempi_, t_size_dim);\n  tempi__data = THLongTensor_data(tempi_);\n\n  TH_TENSOR_DIM_APPLY3(real, t, real, values_, long, indices_, dimension,\n                       long i;\n                       for(i = 0; i < t_size_dim; i++)\n                          temp__data[i] = t_data[i*t_stride];\n                       for(i = 0; i < t_size_dim; i++)\n                          tempi__data[i] = i;\n                       THTensor_(quickselect)(temp__data, tempi__data, k - 1, t_size_dim, 1);\n                       *values__data = temp__data[k-1];\n                       *indices__data = tempi__data[k-1];);\n\n  THTensor_(free)(temp_);\n  THLongTensor_free(tempi_);\n  if (!keepdim) {\n    THTensor_(squeeze1d)(values_, values_, dimension);\n    THLongTensor_squeeze1d(indices_, indices_, dimension);\n  }\n}\n\nvoid THTensor_(median)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim)\n{\n  long t_size_dim, k;\n\n  THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 3, \"dimension out of range\");\n\n  t_size_dim = THTensor_(size)(t, dimension);\n  k = (t_size_dim-1) >> 1; /* take middle or one-before-middle element */\n\n  THTensor_(kthvalue)(values_, indices_, t, k+1, dimension, keepdim);\n}\n\nvoid THTensor_(topk)(THTensor *rt_, THLongTensor *ri_, THTensor *t, long k, int dim, int dir, int sorted)\n{\n  int numDims = THTensor_(nDimension)(t);\n  THArgCheck(dim >= 0 && dim < numDims, 3, \"dim not in range\");\n\n  long sliceSize = THTensor_(size)(t, dim);\n  THArgCheck(k > 0 && k <= sliceSize, 2, \"k not in range for dimension\");\n\n  THTensor *tmpResults = THTensor_(new)();\n  THTensor_(resize1d)(tmpResults, sliceSize);\n  real *tmp__data = THTensor_(data)(tmpResults);\n\n  THLongTensor *tmpIndices = THLongTensor_new();\n  THLongTensor_resize1d(tmpIndices, sliceSize);\n  long *tmpi__data = THLongTensor_data(tmpIndices);\n\n  THLongStorage *topKSize = THTensor_(newSizeOf)(t);\n  THLongStorage_set(topKSize, dim, k);\n  THTensor_(resize)(rt_, topKSize, NULL);\n  THLongTensor_resize(ri_, topKSize, NULL);\n  THLongStorage_free(topKSize);\n\n  if (dir) {\n    /* k largest elements, descending order (optional: see sorted) */\n    long K = sliceSize - k;\n    TH_TENSOR_DIM_APPLY3(real, t, real, rt_, long, ri_, dim,\n                         long i;\n                         for(i = 0; i < sliceSize; i++)\n                         {\n                           tmp__data[i] = t_data[i*t_stride];\n                           tmpi__data[i] = i;\n                         }\n                         if (K > 0)\n                           THTensor_(quickselect)(tmp__data, tmpi__data, K - 1, sliceSize, 1);\n                         if (sorted)\n                           THTensor_(quicksortdescend)(tmp__data + K, tmpi__data + K, k, 1);\n                         for(i = 0; i < k; i++)\n                         {\n                           rt__data[i*rt__stride] = tmp__data[i + K];\n                           ri__data[i*ri__stride] = tmpi__data[i + K];\n                         })\n  }\n  else {\n    /* k smallest elements, ascending order (optional: see sorted) */\n    TH_TENSOR_DIM_APPLY3(real, t, real, rt_, long, ri_, dim,\n                         long i;\n                         for(i = 0; i < sliceSize; i++)\n                         {\n                           tmp__data[i] = t_data[i*t_stride];\n                           tmpi__data[i] = i;\n                         }\n                         THTensor_(quickselect)(tmp__data, tmpi__data, k - 1, sliceSize, 1);\n                         if (sorted)\n                           THTensor_(quicksortascend)(tmp__data, tmpi__data, k - 1, 1);\n                         for(i = 0; i < k; i++)\n                         {\n                           rt__data[i*rt__stride] = tmp__data[i];\n                           ri__data[i*ri__stride] = tmpi__data[i];\n                         })\n  }\n\n  THTensor_(free)(tmpResults);\n  THLongTensor_free(tmpIndices);\n}\n\nvoid THTensor_(tril)(THTensor *r_, THTensor *t, long k)\n{\n  long t_size_0, t_size_1;\n  long t_stride_0, t_stride_1;\n  long r__stride_0, r__stride_1;\n  real *t_data, *r__data;\n  long r, c;\n\n  THArgCheck(THTensor_(nDimension)(t) == 2, 1, \"expected a matrix\");\n\n  THTensor_(resizeAs)(r_, t);\n\n  t_size_0 = THTensor_(size)(t, 0);\n  t_size_1 = THTensor_(size)(t, 1);\n  t_stride_0 = THTensor_(stride)(t, 0);\n  t_stride_1 = THTensor_(stride)(t, 1);\n  r__stride_0 = THTensor_(stride)(r_, 0);\n  r__stride_1 = THTensor_(stride)(r_, 1);\n  r__data = THTensor_(data)(r_);\n  t_data = THTensor_(data)(t);\n\n  for(r = 0; r < t_size_0; r++)\n  {\n    long sz = THMin(r+k+1, t_size_1);\n    for(c = THMax(0, r+k+1); c < t_size_1; c++)\n      r__data[r*r__stride_0+c*r__stride_1] = 0;\n    for(c = 0; c < sz; c++)\n      r__data[r*r__stride_0+c*r__stride_1] = t_data[r*t_stride_0+c*t_stride_1];\n  }\n}\n\nvoid THTensor_(triu)(THTensor *r_, THTensor *t, long k)\n{\n  long t_size_0, t_size_1;\n  long t_stride_0, t_stride_1;\n  long r__stride_0, r__stride_1;\n  real *t_data, *r__data;\n  long r, c;\n\n  THArgCheck(THTensor_(nDimension)(t) == 2, 1, \"expected a matrix\");\n\n  THTensor_(resizeAs)(r_, t);\n\n  t_size_0 = THTensor_(size)(t, 0);\n  t_size_1 = THTensor_(size)(t, 1);\n  t_stride_0 = THTensor_(stride)(t, 0);\n  t_stride_1 = THTensor_(stride)(t, 1);\n  r__stride_0 = THTensor_(stride)(r_, 0);\n  r__stride_1 = THTensor_(stride)(r_, 1);\n  r__data = THTensor_(data)(r_);\n  t_data = THTensor_(data)(t);\n\n  for(r = 0; r < t_size_0; r++)\n  {\n    long sz = THMin(r+k, t_size_1);\n    for(c = THMax(0, r+k); c < t_size_1; c++)\n      r__data[r*r__stride_0+c*r__stride_1] = t_data[r*t_stride_0+c*t_stride_1];\n    for(c = 0; c < sz; c++)\n      r__data[r*r__stride_0+c*r__stride_1] = 0;\n  }\n}\n\nvoid THTensor_(cat)(THTensor *r_, THTensor *ta, THTensor *tb, int dimension)\n{\n  THTensor* inputs[2];\n  inputs[0] = ta;\n  inputs[1] = tb;\n  THTensor_(catArray)(r_, inputs, 2, dimension);\n}\n\nvoid THTensor_(catArray)(THTensor *result, THTensor **inputs, int numInputs, int dimension)\n{\n  THLongStorage *size;\n  int i, j;\n  long offset;\n  int maxDim = dimension + 1;\n  int allEmpty = 1;\n  int allContiguous = 1;\n\n  // cat_dimension is the actual dimension we cat along\n  int cat_dimension = dimension;\n\n  for (i = 0; i < numInputs; i++)\n  {\n    maxDim = THMax(maxDim, inputs[i]->nDimension);\n  }\n\n  // When the user input dimension is -1 (i.e. -2 in C)\n  // Then we pick the maximum last dimension across all tensors.\n  if ( dimension + TH_INDEX_BASE == -1 )\n  {\n    cat_dimension = maxDim?(maxDim-1):0;\n  }\n\n  THArgCheck(numInputs > 0, 3, \"invalid number of inputs %d\", numInputs);\n  THArgCheck(cat_dimension >= 0, 4, \"invalid dimension %d\", dimension + TH_INDEX_BASE);\n\n  size = THLongStorage_newWithSize(maxDim);\n\n  for(i = 0; i < maxDim; i++)\n  {\n    // dimSize is either the size of the dim if it exists, either 1 if #dim > 0, otherwise 0\n    long dimSize = i < inputs[0]->nDimension ? inputs[0]->size[i] : THMin(inputs[0]->nDimension, 1);\n    if (i == cat_dimension)\n    {\n      for (j = 1; j < numInputs; j++)\n      {\n        // accumulate the size over the dimension we want to cat on.\n        // Empty tensors are allowed\n        dimSize += i < inputs[j]->nDimension ? inputs[j]->size[i] : THMin(inputs[j]->nDimension, 1);\n      }\n    }\n    else\n    {\n      for (j = 1; j < numInputs; j++)\n      {\n        long sz = (i < inputs[j]->nDimension ? inputs[j]->size[i] : THMin(inputs[j]->nDimension, 1));\n        // If it's a dimension we're not catting on\n        // Then fail if sizes are different AND > 0\n        if (dimSize != sz && dimSize && sz)\n        {\n          THLongStorage_free(size);\n          THError(\"inconsistent tensor sizes\");\n        }\n        else if(!dimSize)\n        {\n          dimSize = sz;\n        }\n      }\n    }\n    allEmpty = allEmpty && !dimSize;\n    size->data[i] = dimSize;\n  }\n\n  // Initiate catting and resizing\n  // If at least one of the input is not empty\n  if (!allEmpty)\n  {\n    THTensor_(resize)(result, size, NULL);\n\n    // Check contiguity of all inputs and result\n    for (i = 0; i < numInputs; i++) {\n      if(inputs[i]->nDimension) {\n        allContiguous = allContiguous && THTensor_(isContiguous)(inputs[i]);\n      }\n    }\n    allContiguous = allContiguous && THTensor_(isContiguous)(result);\n\n    // First path is for contiguous inputs along dim 1\n    // Second path for non-contiguous\n    if (cat_dimension == 0 && allContiguous)\n    {\n      real* result_data = result->storage->data + result->storageOffset;\n      offset = 0;\n      for (j = 0; j < numInputs; j++)\n      {\n        if (inputs[j]->nDimension)\n        {\n          THTensor* input0 = inputs[j];\n          real* input0_data = input0->storage->data + input0->storageOffset;\n          long input0_size = THTensor_(nElement)(input0);\n          memcpy(result_data + offset, input0_data, input0_size*sizeof(real));\n          offset += input0_size;\n        }\n      }\n    }\n    else\n    {\n      offset = 0;\n      for (j = 0; j < numInputs; j++)\n      {\n        if (inputs[j]->nDimension)\n        {\n          long dimSize = cat_dimension < inputs[j]->nDimension ? inputs[j]->size[cat_dimension] : 1;\n          THTensor *nt = THTensor_(newWithTensor)(result);\n          THTensor_(narrow)(nt, NULL, cat_dimension, offset, dimSize);\n          THTensor_(copy)(nt, inputs[j]);\n          THTensor_(free)(nt);\n          offset += dimSize;\n        }\n      }\n    }\n  }\n  THLongStorage_free(size);\n}\n\nint THTensor_(equal)(THTensor *ta, THTensor* tb)\n{\n  int equal = 1;\n  if(!THTensor_(isSameSizeAs)(ta, tb))\n    return 0;\n\n  if (THTensor_(isContiguous)(ta) && THTensor_(isContiguous)(tb)) {\n    real *tap = THTensor_(data)(ta);\n    real *tbp = THTensor_(data)(tb);\n    ptrdiff_t sz = THTensor_(nElement)(ta);\n    ptrdiff_t i;\n    for (i=0; i<sz; ++i){\n      if(tap[i] != tbp[i]) return 0;\n    }\n  } else {\n    // Short-circuit the apply function on inequality\n    TH_TENSOR_APPLY2(real, ta, real, tb,\n                     if (equal && *ta_data != *tb_data) {\n                        equal = 0;\n                        TH_TENSOR_APPLY_hasFinished = 1; break;\n                     })\n  }\n  return equal;\n}\n\n#define TENSOR_IMPLEMENT_LOGICAL(NAME,OP)\t\t\t\t\\\n  void THTensor_(NAME##Value)(THByteTensor *r_, THTensor* t, real value)\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    THByteTensor_resizeNd(r_, t->nDimension, t->size, NULL);\t\t\\\n    TH_TENSOR_APPLY2(unsigned char, r_, real, t,\t\t\t\\\n\t\t     *r__data = (*t_data OP value) ? 1 : 0;); \\\n  }\t\t\t\t\t\t\t\t\t\\\n  void THTensor_(NAME##ValueT)(THTensor* r_, THTensor* t, real value)\t\\\n  {\t\t\t\t\t\t\t\t\t\\\n    THTensor_(resizeNd)(r_, t->nDimension, t->size, NULL);\t\t\\\n    TH_TENSOR_APPLY2(real, r_, real, t,\t\t\t\t\t\\\n\t\t     *r__data = (*t_data OP value) ? 1 : 0;); \\\n  }\t\t\t\t\t\t\t\t\t\\\n  void THTensor_(NAME##Tensor)(THByteTensor *r_, THTensor *ta, THTensor *tb) \\\n  {\t\t\t\t\t\t\t\t\t\\\n    THByteTensor_resizeNd(r_, ta->nDimension, ta->size, NULL);\t\t\\\n    TH_TENSOR_APPLY3(unsigned char, r_, real, ta, real, tb,\t\t\\\n\t\t     *r__data = (*ta_data OP *tb_data) ? 1 : 0;); \\\n  }\t\t\t\t\t\t\t\t\t\\\n  void THTensor_(NAME##TensorT)(THTensor *r_, THTensor *ta, THTensor *tb) \\\n  {\t\t\t\t\t\t\t\t\t\\\n    THTensor_(resizeNd)(r_, ta->nDimension, ta->size, NULL);\t\t\\\n    TH_TENSOR_APPLY3(real, r_, real, ta, real, tb,\t\t\t\\\n\t\t     *r__data = (*ta_data OP *tb_data) ? 1 : 0;); \\\n  }\t\t\t\t\t\t\t\t\t\\\n\n\nTENSOR_IMPLEMENT_LOGICAL(lt,<)\nTENSOR_IMPLEMENT_LOGICAL(gt,>)\nTENSOR_IMPLEMENT_LOGICAL(le,<=)\nTENSOR_IMPLEMENT_LOGICAL(ge,>=)\nTENSOR_IMPLEMENT_LOGICAL(eq,==)\nTENSOR_IMPLEMENT_LOGICAL(ne,!=)\n\n#define LAB_IMPLEMENT_BASIC_FUNCTION(NAME, CFUNC)             \\\n  void THTensor_(NAME)(THTensor *r_, THTensor *t)                \\\n  {                                                           \\\n    THTensor_(resizeAs)(r_, t);                               \\\n    TH_TENSOR_APPLY2(real, t, real, r_, *r__data = CFUNC(*t_data);); \\\n  }                                                           \\\n\n#if defined(TH_REAL_IS_LONG)\nLAB_IMPLEMENT_BASIC_FUNCTION(abs,labs)\nLAB_IMPLEMENT_BASIC_FUNCTION(neg,-)\n#endif /* long only part */\n\n#if defined(TH_REAL_IS_SHORT) || defined(TH_REAL_IS_INT)\nLAB_IMPLEMENT_BASIC_FUNCTION(abs,abs)\nLAB_IMPLEMENT_BASIC_FUNCTION(neg,-)\n#endif /* int only part */\n\n#if defined(TH_REAL_IS_BYTE)\n\n#define TENSOR_IMPLEMENT_LOGICAL_SUM(NAME, OP, INIT_VALUE) \\\n  int THTensor_(NAME)(THTensor *tensor) \\\n  { \\\n    THArgCheck(tensor->nDimension > 0, 1, \"empty Tensor\"); \\\n    int sum = INIT_VALUE;                               \\\n    TH_TENSOR_APPLY(real, tensor, sum = sum OP *tensor_data;); \\\n    return sum; \\\n  }\n\nTENSOR_IMPLEMENT_LOGICAL_SUM(logicalall, &&, 1)\nTENSOR_IMPLEMENT_LOGICAL_SUM(logicalany, ||, 0)\n\n#endif /* Byte only part */\n\n/* floating point only now */\n#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)\n\n#if defined (TH_REAL_IS_FLOAT)\n#define TH_MATH_NAME(fn) fn##f\n#else\n#define TH_MATH_NAME(fn) fn\n#endif\n\nLAB_IMPLEMENT_BASIC_FUNCTION(log,TH_MATH_NAME(log))\nLAB_IMPLEMENT_BASIC_FUNCTION(lgamma,TH_MATH_NAME(lgamma))\nLAB_IMPLEMENT_BASIC_FUNCTION(log1p,TH_MATH_NAME(log1p))\nLAB_IMPLEMENT_BASIC_FUNCTION(sigmoid,TH_MATH_NAME(TH_sigmoid))\nLAB_IMPLEMENT_BASIC_FUNCTION(exp,TH_MATH_NAME(exp))\nLAB_IMPLEMENT_BASIC_FUNCTION(cos,TH_MATH_NAME(cos))\nLAB_IMPLEMENT_BASIC_FUNCTION(acos,TH_MATH_NAME(acos))\nLAB_IMPLEMENT_BASIC_FUNCTION(cosh,TH_MATH_NAME(cosh))\nLAB_IMPLEMENT_BASIC_FUNCTION(sin,TH_MATH_NAME(sin))\nLAB_IMPLEMENT_BASIC_FUNCTION(asin,TH_MATH_NAME(asin))\nLAB_IMPLEMENT_BASIC_FUNCTION(sinh,TH_MATH_NAME(sinh))\nLAB_IMPLEMENT_BASIC_FUNCTION(tan,TH_MATH_NAME(tan))\nLAB_IMPLEMENT_BASIC_FUNCTION(atan,TH_MATH_NAME(atan))\nLAB_IMPLEMENT_BASIC_FUNCTION(tanh,TH_MATH_NAME(tanh))\nLAB_IMPLEMENT_BASIC_FUNCTION(sqrt,TH_MATH_NAME(sqrt))\nLAB_IMPLEMENT_BASIC_FUNCTION(rsqrt,TH_MATH_NAME(TH_rsqrt))\nLAB_IMPLEMENT_BASIC_FUNCTION(ceil,TH_MATH_NAME(ceil))\nLAB_IMPLEMENT_BASIC_FUNCTION(floor,TH_MATH_NAME(floor))\nLAB_IMPLEMENT_BASIC_FUNCTION(round,TH_MATH_NAME(round))\nLAB_IMPLEMENT_BASIC_FUNCTION(abs,TH_MATH_NAME(fabs))\nLAB_IMPLEMENT_BASIC_FUNCTION(trunc,TH_MATH_NAME(trunc))\nLAB_IMPLEMENT_BASIC_FUNCTION(frac,TH_MATH_NAME(TH_frac))\nLAB_IMPLEMENT_BASIC_FUNCTION(neg,-)\nLAB_IMPLEMENT_BASIC_FUNCTION(cinv, TH_MATH_NAME(1.0) / )\n\n\nvoid THTensor_(pow)(THTensor *r_, THTensor *t, real value)\n{\n  THTensor_(resizeAs)(r_, t);\n  if(value == 1){\n    THTensor_(copy)(r_, t);\n  }\n  else if(value == 2){\n    THTensor_(cmul)(r_, t, t);\n  }\n  else if(value == 3){\n    TH_TENSOR_APPLY2(real, t, real, r_, *r__data = *t_data * *t_data * *t_data;);\n  }\n  else if(value == 0.5){\n    THTensor_(sqrt)(r_, t);\n  }\n  else if(value == -0.5){\n    THTensor_(rsqrt)(r_, t);\n  }\n  else if(value == -1){\n    THTensor_(cinv)(r_, t);\n  }\n  else if(value == -2){\n    TH_TENSOR_APPLY2(real, t, real, r_, *r__data = TH_MATH_NAME(1.0) / (*t_data * *t_data););\n  }\n  else{\n    TH_TENSOR_APPLY2(real, t, real, r_, *r__data = TH_MATH_NAME(pow)(*t_data, value););\n  }\n}\n\nvoid THTensor_(atan2)(THTensor *r_, THTensor *tx, THTensor *ty)\n{\n  THTensor_(resizeAs)(r_, tx);\n  TH_TENSOR_APPLY3(real, r_, real, tx, real, ty, *r__data = TH_MATH_NAME(atan2)(*tx_data,*ty_data););\n}\n\nvoid THTensor_(lerp)(THTensor *r_, THTensor *a, THTensor *b, real weight)\n{\n  THArgCheck(THTensor_(nElement)(a) == THTensor_(nElement)(b), 2, \"sizes do not match\");\n  THTensor_(resizeAs)(r_, a);\n  TH_TENSOR_APPLY3(real, r_, real, a, real, b, *r__data = TH_MATH_NAME(TH_lerp)(*a_data, *b_data, weight););\n}\n\nvoid THTensor_(mean)(THTensor *r_, THTensor *t, int dimension, int keepdim)\n{\n  THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, \"invalid dimension %d\",\n      dimension + TH_INDEX_BASE);\n\n  THTensor_(sum)(r_, t, dimension, keepdim);\n  THTensor_(div)(r_, r_, t->size[dimension]);\n}\n\nvoid THTensor_(std)(THTensor *r_, THTensor *t, int dimension, int biased, int keepdim)\n{\n  THLongStorage *dim;\n\n  THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 3, \"invalid dimension %d\",\n      dimension + TH_INDEX_BASE);\n\n  dim = THTensor_(newSizeOf)(t);\n  THLongStorage_set(dim, dimension, 1);\n  THTensor_(resize)(r_, dim, NULL);\n  THLongStorage_free(dim);\n\n  TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension,\n                       accreal sum = 0;\n                       accreal sum2 = 0;\n                       long i;\n                       for(i = 0; i < t_size; i++)\n                       {\n                         real z = t_data[i*t_stride];\n                         sum += z;\n                         sum2 += z*z;\n                       }\n\n                       if(biased)\n                       {\n                         sum /= t_size;\n                         sum2 /= t_size;\n                         sum2 -= sum*sum;\n                         sum2 = (sum2 < 0 ? 0 : sum2);\n                         *r__data = (real)TH_MATH_NAME(sqrt)(sum2);\n                       }\n                       else\n                       {\n                         sum /= t_size;\n                         sum2 /= t_size-1;\n                         sum2 -= ((real)t_size)/((real)(t_size-1))*sum*sum;\n                         sum2 = (sum2 < 0 ? 0 : sum2);\n                         *r__data = (real)TH_MATH_NAME(sqrt)(sum2);\n                       });\n\n  if (!keepdim) {\n    THTensor_(squeeze1d)(r_, r_, dimension);\n  }\n}\n\nvoid THTensor_(var)(THTensor *r_, THTensor *t, int dimension, int biased, int keepdim)\n{\n  THLongStorage *dim;\n\n  THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 3, \"invalid dimension %d\",\n      dimension + TH_INDEX_BASE);\n\n  dim = THTensor_(newSizeOf)(t);\n  THLongStorage_set(dim, dimension, 1);\n  THTensor_(resize)(r_, dim, NULL);\n  THLongStorage_free(dim);\n\n  TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension,\n                       accreal sum = 0;\n                       accreal sum2 = 0;\n                       long i;\n                       for(i = 0; i < t_size; i++)\n                       {\n                         real z = t_data[i*t_stride];\n                         sum += z;\n                         sum2 += z*z;\n                       }\n\n                       if(biased)\n                       {\n                         sum /= t_size;\n                         sum2 /= t_size;\n                         sum2 -= sum*sum;\n                         sum2 = (sum2 < 0 ? 0 : sum2);\n                         *r__data = sum2;\n                       }\n                       else\n                       {\n                         sum /= t_size;\n                         sum2 /= t_size-1;\n                         sum2 -= ((real)t_size)/((real)(t_size-1))*sum*sum;\n                         sum2 = (sum2 < 0 ? 0 : sum2);\n                         *r__data = (real)sum2;\n                       });\n\n  if (!keepdim) {\n    THTensor_(squeeze1d)(r_, r_, dimension);\n  }\n}\n\nvoid THTensor_(norm)(THTensor *r_, THTensor *t, real value, int dimension, int keepdim)\n{\n  THLongStorage *dim;\n\n  THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 3, \"invalid dimension %d\",\n      dimension + TH_INDEX_BASE);\n\n  dim = THTensor_(newSizeOf)(t);\n  THLongStorage_set(dim, dimension, 1);\n  THTensor_(resize)(r_, dim, NULL);\n  THLongStorage_free(dim);\n\n  if(value == 0) {\n    TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension,\n                         accreal sum = 0;\n                         long i;\n                         for(i = 0; i < t_size; i++)\n                           sum += t_data[i*t_stride] != 0.0;\n                         *r__data = sum;)\n  } else {\n    TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension,\n                         accreal sum = 0;\n                         long i;\n                         for(i = 0; i < t_size; i++) {\n                           sum += TH_MATH_NAME(pow)(\n                             TH_MATH_NAME(fabs)(t_data[i*t_stride]), value);\n                         }\n                         *r__data = TH_MATH_NAME(pow)(sum, 1.0/value);)\n  }\n\n  if (!keepdim) {\n    THTensor_(squeeze1d)(r_, r_, dimension);\n  }\n}\n\naccreal THTensor_(normall)(THTensor *tensor, real value)\n{\n  accreal sum = 0;\n  if(value == 0) {\n    TH_TENSOR_APPLY(real, tensor, sum += *tensor_data != 0.0;);\n    return sum;\n  } else if(value == 1) {\n    TH_TENSOR_APPLY(real, tensor, sum += TH_MATH_NAME(fabs)(*tensor_data););\n    return sum;\n  } else if(value == 2) {\n    TH_TENSOR_APPLY(real, tensor, accreal z = *tensor_data; sum += z*z;);\n    return sqrt(sum);\n  } else {\n    TH_TENSOR_APPLY(real, tensor, sum += TH_MATH_NAME(pow)(TH_MATH_NAME(fabs)(*tensor_data), value););\n    return TH_MATH_NAME(pow)(sum, 1.0/value);\n  }\n}\n\nvoid THTensor_(renorm)(THTensor *res, THTensor *src, real value, int dimension, real maxnorm)\n{\n  int i;\n  THTensor *rowR, *rowS;\n\n  THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(src), 3, \"invalid dimension %d\",\n      dimension + TH_INDEX_BASE);\n  THArgCheck(value > 0, 2, \"non-positive-norm not supported\");\n  THArgCheck(THTensor_(nDimension)(src) > 1, 1, \"need at least 2 dimensions, got %d dimensions\",\n      THTensor_(nDimension)(src));\n\n  rowR = THTensor_(new)();\n  rowS = THTensor_(new)();\n\n  THTensor_(resizeAs)(res, src);\n\n  for (i=0; i<src->size[dimension]; i++)\n  {\n    real norm = 0;\n    real new_norm;\n\n    THTensor_(select)(rowS, src, dimension, i);\n    THTensor_(select)(rowR, res, dimension, i);\n    if (value == 1) {\n      TH_TENSOR_APPLY(real, rowS, norm += fabs(*rowS_data););\n    } else if (value == 2) {\n      TH_TENSOR_APPLY(real, rowS, accreal z = *rowS_data; norm += z*z;);\n    } else {\n      TH_TENSOR_APPLY(real, rowS, norm += TH_MATH_NAME(pow)(TH_MATH_NAME(fabs)(*rowS_data), value););\n    }\n\n    norm = pow(norm, 1/value);\n\n    if (norm > maxnorm)\n    {\n      new_norm = maxnorm / (norm + 1e-7);\n\n      TH_TENSOR_APPLY2(\n        real, rowR, real, rowS,\n        *rowR_data = (*rowS_data) * new_norm;\n      )\n    }\n    else\n      THTensor_(copy)(rowR, rowS);\n  }\n\n  THTensor_(free)(rowR);\n  THTensor_(free)(rowS);\n}\n\naccreal THTensor_(dist)(THTensor *tensor, THTensor *src, real value)\n{\n  real sum = 0;\n  TH_TENSOR_APPLY2(real, tensor, real, src,\n                   sum += TH_MATH_NAME(pow)(\n                     TH_MATH_NAME(fabs)(*tensor_data - *src_data), value););\n  return TH_MATH_NAME(pow)(sum, 1.0/value);\n}\n\naccreal THTensor_(meanall)(THTensor *tensor)\n{\n  THArgCheck(tensor->nDimension > 0, 1, \"empty Tensor\");\n  return THTensor_(sumall)(tensor)/THTensor_(nElement)(tensor);\n}\n\naccreal THTensor_(varall)(THTensor *tensor, int biased)\n{\n  accreal mean = THTensor_(meanall)(tensor);\n  accreal sum = 0;\n  TH_TENSOR_APPLY(real, tensor, sum += (*tensor_data - mean)*(*tensor_data - mean););\n  sum /= THTensor_(nElement)(tensor) - (biased ? 0 : 1);\n  return sum;\n}\n\naccreal THTensor_(stdall)(THTensor *tensor, int biased)\n{\n  return sqrt(THTensor_(varall)(tensor, biased));\n}\n\nvoid THTensor_(linspace)(THTensor *r_, real a, real b, long n)\n{\n  real i = 0;\n\n  THArgCheck(n > 1 || (n == 1 && (a == b)), 3, \"invalid number of points\");\n\n  if (THTensor_(nElement)(r_) != n) {\n    THTensor_(resize1d)(r_, n);\n  }\n\n  if(n == 1) {\n    THTensor_(set1d)(r_, 0, a);\n  } else {\n     TH_TENSOR_APPLY(real, r_,\n             *r__data = a + i*(b-a)/((real)(n-1));\n             i++;\n           );\n  }\n}\n\nvoid THTensor_(logspace)(THTensor *r_, real a, real b, long n)\n{\n  real i = 0;\n\n  THArgCheck(n > 1 || (n == 1 && (a == b)), 3, \"invalid number of points\");\n\n  if (THTensor_(nElement)(r_) != n) {\n    THTensor_(resize1d)(r_, n);\n  }\n\n  if(n == 1) {\n    THTensor_(set1d)(r_, 0, TH_MATH_NAME(pow)(10.0, a));\n  } else {\n    TH_TENSOR_APPLY(real, r_,\n        *r__data = TH_MATH_NAME(pow)(10.0, a + i*(b-a)/((real)(n-1)));\n        i++;\n        );\n  }\n}\n\nvoid THTensor_(rand)(THTensor *r_, THGenerator *_generator, THLongStorage *size)\n{\n  THTensor_(resize)(r_, size, NULL);\n  THTensor_(uniform)(r_, _generator, 0, 1);\n}\n\nvoid THTensor_(randn)(THTensor *r_, THGenerator *_generator, THLongStorage *size)\n{\n  THTensor_(resize)(r_, size, NULL);\n  THTensor_(normal)(r_, _generator, 0, 1);\n}\n\nvoid THTensor_(histc)(THTensor *hist, THTensor *tensor, long nbins, real minvalue, real maxvalue)\n{\n  real minval;\n  real maxval;\n  real *h_data;\n\n  THTensor_(resize1d)(hist, nbins);\n  THTensor_(zero)(hist);\n  minval = minvalue;\n  maxval = maxvalue;\n  if (minval == maxval)\n  {\n    minval = THTensor_(minall)(tensor);\n    maxval = THTensor_(maxall)(tensor);\n  }\n  if (minval == maxval)\n  {\n    minval = minval - 1;\n    maxval = maxval + 1;\n  }\n\n  h_data = THTensor_(data)(hist);\n\n  TH_TENSOR_APPLY(real, tensor,\n    if (*tensor_data >= minval && *tensor_data <= maxval) {\n      const int bin = (int)((*tensor_data-minval) / (maxval-minval) * nbins);\n      h_data[THMin(bin, nbins-1)] += 1;\n    }\n  );\n}\n\nvoid THTensor_(bhistc)(THTensor *hist, THTensor *tensor, long nbins, real minvalue, real maxvalue)\n{\n  THArgCheck(THTensor_(nDimension)(tensor) < 3, 2, \"invalid dimension %d, the input must be a 2d tensor\", THTensor_(nDimension)(tensor));\n\n  int dimension = 1;\n  THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(tensor), 2, \"invalid dimension %d\",\n      dimension + TH_INDEX_BASE);\n\n  real minval;\n  real maxval;\n  real *h_data;\n\n  THTensor_(resize2d)(hist, tensor->size[0], nbins);\n  THTensor_(zero)(hist);\n\n  minval = minvalue;\n  maxval = maxvalue;\n  if (minval == maxval)\n  {\n    minval = THTensor_(minall)(tensor);\n    maxval = THTensor_(maxall)(tensor);\n  }\n  if (minval == maxval)\n  {\n    minval = minval - 1;\n    maxval = maxval + 1;\n  }\n\n  TH_TENSOR_DIM_APPLY2(real, tensor, real, hist, dimension, long i;\n                        for(i = 0; i < tensor_size; i++)\n                        {\n                          if(tensor_data[i*tensor_stride] >= minval && tensor_data[i*tensor_stride] <= maxval) {\n                            const int bin = (int)((tensor_data[i*tensor_stride]-minval) / (maxval-minval) * nbins);\n                            hist_data[THMin(bin, nbins-1)] += 1;\n                          }\n                        }\n  );\n}\n\n#undef TH_MATH_NAME\n#endif /* floating point only part */\n#undef IS_NONZERO\n#endif\n"
  },
  {
    "path": "lib/TH/generic/THTensorMath.h",
    "content": "#ifndef TH_GENERIC_FILE\n#define TH_GENERIC_FILE \"generic/THTensorMath.h\"\n#else\n\nTH_API void THTensor_(fill)(THTensor *r_, real value);\nTH_API void THTensor_(zero)(THTensor *r_);\n\nTH_API void THTensor_(maskedFill)(THTensor *tensor, THByteTensor *mask, real value);\nTH_API void THTensor_(maskedCopy)(THTensor *tensor, THByteTensor *mask, THTensor* src);\nTH_API void THTensor_(maskedSelect)(THTensor *tensor, THTensor* src, THByteTensor *mask);\n\nTH_API void THTensor_(nonzero)(THLongTensor *subscript, THTensor *tensor);\n\nTH_API void THTensor_(indexSelect)(THTensor *tensor, THTensor *src, int dim, THLongTensor *index);\nTH_API void THTensor_(indexCopy)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src);\nTH_API void THTensor_(indexAdd)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src);\nTH_API void THTensor_(indexFill)(THTensor *tensor, int dim, THLongTensor *index, real val);\n\nTH_API void THTensor_(gather)(THTensor *tensor, THTensor *src, int dim, THLongTensor *index);\nTH_API void THTensor_(scatter)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src);\nTH_API void THTensor_(scatterAdd)(THTensor *tensor, int dim, THLongTensor *index, THTensor *src);\nTH_API void THTensor_(scatterFill)(THTensor *tensor, int dim, THLongTensor *index, real val);\n\nTH_API accreal THTensor_(dot)(THTensor *t, THTensor *src);\n\nTH_API real THTensor_(minall)(THTensor *t);\nTH_API real THTensor_(maxall)(THTensor *t);\nTH_API real THTensor_(medianall)(THTensor *t);\nTH_API accreal THTensor_(sumall)(THTensor *t);\nTH_API accreal THTensor_(prodall)(THTensor *t);\n\nTH_API void THTensor_(neg)(THTensor *self, THTensor *src);\nTH_API void THTensor_(cinv)(THTensor *self, THTensor *src);\n\nTH_API void THTensor_(add)(THTensor *r_, THTensor *t, real value);\nTH_API void THTensor_(sub)(THTensor *self, THTensor *src, real value);\nTH_API void THTensor_(mul)(THTensor *r_, THTensor *t, real value);\nTH_API void THTensor_(div)(THTensor *r_, THTensor *t, real value);\nTH_API void THTensor_(lshift)(THTensor *r_, THTensor *t, real value);\nTH_API void THTensor_(rshift)(THTensor *r_, THTensor *t, real value);\nTH_API void THTensor_(fmod)(THTensor *r_, THTensor *t, real value);\nTH_API void THTensor_(remainder)(THTensor *r_, THTensor *t, real value);\nTH_API void THTensor_(clamp)(THTensor *r_, THTensor *t, real min_value, real max_value);\nTH_API void THTensor_(bitand)(THTensor *r_, THTensor *t, real value);\nTH_API void THTensor_(bitor)(THTensor *r_, THTensor *t, real value);\nTH_API void THTensor_(bitxor)(THTensor *r_, THTensor *t, real value);\n\nTH_API void THTensor_(cadd)(THTensor *r_, THTensor *t, real value, THTensor *src);\nTH_API void THTensor_(csub)(THTensor *self, THTensor *src1, real value, THTensor *src2);\nTH_API void THTensor_(cmul)(THTensor *r_, THTensor *t, THTensor *src);\nTH_API void THTensor_(cpow)(THTensor *r_, THTensor *t, THTensor *src);\nTH_API void THTensor_(cdiv)(THTensor *r_, THTensor *t, THTensor *src);\nTH_API void THTensor_(clshift)(THTensor *r_, THTensor *t, THTensor *src);\nTH_API void THTensor_(crshift)(THTensor *r_, THTensor *t, THTensor *src);\nTH_API void THTensor_(cfmod)(THTensor *r_, THTensor *t, THTensor *src);\nTH_API void THTensor_(cremainder)(THTensor *r_, THTensor *t, THTensor *src);\nTH_API void THTensor_(cbitand)(THTensor *r_, THTensor *t, THTensor *src);\nTH_API void THTensor_(cbitor)(THTensor *r_, THTensor *t, THTensor *src);\nTH_API void THTensor_(cbitxor)(THTensor *r_, THTensor *t, THTensor *src);\n\nTH_API void THTensor_(addcmul)(THTensor *r_, THTensor *t, real value, THTensor *src1, THTensor *src2);\nTH_API void THTensor_(addcdiv)(THTensor *r_, THTensor *t, real value, THTensor *src1, THTensor *src2);\n\nTH_API void THTensor_(addmv)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor *mat,  THTensor *vec);\nTH_API void THTensor_(addmm)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor *mat1, THTensor *mat2);\nTH_API void THTensor_(addr)(THTensor *r_,  real beta, THTensor *t, real alpha, THTensor *vec1, THTensor *vec2);\n\nTH_API void THTensor_(addbmm)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor *batch1, THTensor *batch2);\nTH_API void THTensor_(baddbmm)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor *batch1, THTensor *batch2);\n\nTH_API void THTensor_(match)(THTensor *r_, THTensor *m1, THTensor *m2, real gain);\n\nTH_API ptrdiff_t THTensor_(numel)(THTensor *t);\nTH_API void THTensor_(max)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim);\nTH_API void THTensor_(min)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim);\nTH_API void THTensor_(kthvalue)(THTensor *values_, THLongTensor *indices_, THTensor *t, long k, int dimension, int keepdim);\nTH_API void THTensor_(mode)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim);\nTH_API void THTensor_(median)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension, int keepdim);\nTH_API void THTensor_(sum)(THTensor *r_, THTensor *t, int dimension, int keepdim);\nTH_API void THTensor_(prod)(THTensor *r_, THTensor *t, int dimension, int keepdim);\nTH_API void THTensor_(cumsum)(THTensor *r_, THTensor *t, int dimension);\nTH_API void THTensor_(cumprod)(THTensor *r_, THTensor *t, int dimension);\nTH_API void THTensor_(sign)(THTensor *r_, THTensor *t);\nTH_API accreal THTensor_(trace)(THTensor *t);\nTH_API void THTensor_(cross)(THTensor *r_, THTensor *a, THTensor *b, int dimension);\n\nTH_API void THTensor_(cmax)(THTensor *r, THTensor *t, THTensor *src);\nTH_API void THTensor_(cmin)(THTensor *r, THTensor *t, THTensor *src);\nTH_API void THTensor_(cmaxValue)(THTensor *r, THTensor *t, real value);\nTH_API void THTensor_(cminValue)(THTensor *r, THTensor *t, real value);\n\nTH_API void THTensor_(zeros)(THTensor *r_, THLongStorage *size);\nTH_API void THTensor_(zerosLike)(THTensor *r_, THTensor *input);\nTH_API void THTensor_(ones)(THTensor *r_, THLongStorage *size);\nTH_API void THTensor_(onesLike)(THTensor *r_, THTensor *input);\nTH_API void THTensor_(diag)(THTensor *r_, THTensor *t, int k);\nTH_API void THTensor_(eye)(THTensor *r_, long n, long m);\nTH_API void THTensor_(arange)(THTensor *r_, accreal xmin, accreal xmax, accreal step);\nTH_API void THTensor_(range)(THTensor *r_, accreal xmin, accreal xmax, accreal step);\nTH_API void THTensor_(randperm)(THTensor *r_, THGenerator *_generator, long n);\n\nTH_API void THTensor_(reshape)(THTensor *r_, THTensor *t, THLongStorage *size);\nTH_API void THTensor_(sort)(THTensor *rt_, THLongTensor *ri_, THTensor *t, int dimension, int descendingOrder);\nTH_API void THTensor_(topk)(THTensor *rt_, THLongTensor *ri_, THTensor *t, long k, int dim, int dir, int sorted);\nTH_API void THTensor_(tril)(THTensor *r_, THTensor *t, long k);\nTH_API void THTensor_(triu)(THTensor *r_, THTensor *t, long k);\nTH_API void THTensor_(cat)(THTensor *r_, THTensor *ta, THTensor *tb, int dimension);\nTH_API void THTensor_(catArray)(THTensor *result, THTensor **inputs, int numInputs, int dimension);\n\nTH_API int THTensor_(equal)(THTensor *ta, THTensor *tb);\n\nTH_API void THTensor_(ltValue)(THByteTensor *r_, THTensor* t, real value);\nTH_API void THTensor_(leValue)(THByteTensor *r_, THTensor* t, real value);\nTH_API void THTensor_(gtValue)(THByteTensor *r_, THTensor* t, real value);\nTH_API void THTensor_(geValue)(THByteTensor *r_, THTensor* t, real value);\nTH_API void THTensor_(neValue)(THByteTensor *r_, THTensor* t, real value);\nTH_API void THTensor_(eqValue)(THByteTensor *r_, THTensor* t, real value);\n\nTH_API void THTensor_(ltValueT)(THTensor *r_, THTensor* t, real value);\nTH_API void THTensor_(leValueT)(THTensor *r_, THTensor* t, real value);\nTH_API void THTensor_(gtValueT)(THTensor *r_, THTensor* t, real value);\nTH_API void THTensor_(geValueT)(THTensor *r_, THTensor* t, real value);\nTH_API void THTensor_(neValueT)(THTensor *r_, THTensor* t, real value);\nTH_API void THTensor_(eqValueT)(THTensor *r_, THTensor* t, real value);\n\nTH_API void THTensor_(ltTensor)(THByteTensor *r_, THTensor *ta, THTensor *tb);\nTH_API void THTensor_(leTensor)(THByteTensor *r_, THTensor *ta, THTensor *tb);\nTH_API void THTensor_(gtTensor)(THByteTensor *r_, THTensor *ta, THTensor *tb);\nTH_API void THTensor_(geTensor)(THByteTensor *r_, THTensor *ta, THTensor *tb);\nTH_API void THTensor_(neTensor)(THByteTensor *r_, THTensor *ta, THTensor *tb);\nTH_API void THTensor_(eqTensor)(THByteTensor *r_, THTensor *ta, THTensor *tb);\n\nTH_API void THTensor_(ltTensorT)(THTensor *r_, THTensor *ta, THTensor *tb);\nTH_API void THTensor_(leTensorT)(THTensor *r_, THTensor *ta, THTensor *tb);\nTH_API void THTensor_(gtTensorT)(THTensor *r_, THTensor *ta, THTensor *tb);\nTH_API void THTensor_(geTensorT)(THTensor *r_, THTensor *ta, THTensor *tb);\nTH_API void THTensor_(neTensorT)(THTensor *r_, THTensor *ta, THTensor *tb);\nTH_API void THTensor_(eqTensorT)(THTensor *r_, THTensor *ta, THTensor *tb);\n\n#if defined(TH_REAL_IS_SHORT) || defined(TH_REAL_IS_INT) || defined(TH_REAL_IS_LONG)\nTH_API void THTensor_(abs)(THTensor *r_, THTensor *t);\n#endif\n\n#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)\n\nTH_API void THTensor_(sigmoid)(THTensor *r_, THTensor *t);\nTH_API void THTensor_(log)(THTensor *r_, THTensor *t);\nTH_API void THTensor_(lgamma)(THTensor *r_, THTensor *t);\nTH_API void THTensor_(log1p)(THTensor *r_, THTensor *t);\nTH_API void THTensor_(exp)(THTensor *r_, THTensor *t);\nTH_API void THTensor_(cos)(THTensor *r_, THTensor *t);\nTH_API void THTensor_(acos)(THTensor *r_, THTensor *t);\nTH_API void THTensor_(cosh)(THTensor *r_, THTensor *t);\nTH_API void THTensor_(sin)(THTensor *r_, THTensor *t);\nTH_API void THTensor_(asin)(THTensor *r_, THTensor *t);\nTH_API void THTensor_(sinh)(THTensor *r_, THTensor *t);\nTH_API void THTensor_(tan)(THTensor *r_, THTensor *t);\nTH_API void THTensor_(atan)(THTensor *r_, THTensor *t);\nTH_API void THTensor_(atan2)(THTensor *r_, THTensor *tx, THTensor *ty);\nTH_API void THTensor_(tanh)(THTensor *r_, THTensor *t);\nTH_API void THTensor_(pow)(THTensor *r_, THTensor *t, real value);\nTH_API void THTensor_(tpow)(THTensor *r_, real value, THTensor *t);\nTH_API void THTensor_(sqrt)(THTensor *r_, THTensor *t);\nTH_API void THTensor_(rsqrt)(THTensor *r_, THTensor *t);\nTH_API void THTensor_(ceil)(THTensor *r_, THTensor *t);\nTH_API void THTensor_(floor)(THTensor *r_, THTensor *t);\nTH_API void THTensor_(round)(THTensor *r_, THTensor *t);\nTH_API void THTensor_(abs)(THTensor *r_, THTensor *t);\nTH_API void THTensor_(trunc)(THTensor *r_, THTensor *t);\nTH_API void THTensor_(frac)(THTensor *r_, THTensor *t);\nTH_API void THTensor_(lerp)(THTensor *r_, THTensor *a, THTensor *b, real weight);\n\nTH_API void THTensor_(mean)(THTensor *r_, THTensor *t, int dimension, int keepdim);\nTH_API void THTensor_(std)(THTensor *r_, THTensor *t, int dimension, int biased, int keepdim);\nTH_API void THTensor_(var)(THTensor *r_, THTensor *t, int dimension, int biased, int keepdim);\nTH_API void THTensor_(norm)(THTensor *r_, THTensor *t, real value, int dimension, int keepdim);\nTH_API void THTensor_(renorm)(THTensor *r_, THTensor *t, real value, int dimension, real maxnorm);\nTH_API accreal THTensor_(dist)(THTensor *a, THTensor *b, real value);\nTH_API void THTensor_(histc)(THTensor *hist, THTensor *tensor, long nbins, real minvalue, real maxvalue);\nTH_API void THTensor_(bhistc)(THTensor *hist, THTensor *tensor, long nbins, real minvalue, real maxvalue);\n\nTH_API accreal THTensor_(meanall)(THTensor *self);\nTH_API accreal THTensor_(varall)(THTensor *self, int biased);\nTH_API accreal THTensor_(stdall)(THTensor *self, int biased);\nTH_API accreal THTensor_(normall)(THTensor *t, real value);\n\nTH_API void THTensor_(linspace)(THTensor *r_, real a, real b, long n);\nTH_API void THTensor_(logspace)(THTensor *r_, real a, real b, long n);\nTH_API void THTensor_(rand)(THTensor *r_, THGenerator *_generator, THLongStorage *size);\nTH_API void THTensor_(randn)(THTensor *r_, THGenerator *_generator, THLongStorage *size);\n#endif\n\n#if defined(TH_REAL_IS_BYTE)\n\nTH_API int THTensor_(logicalall)(THTensor *self);\nTH_API int THTensor_(logicalany)(THTensor *self);\n\n#endif /* TH_REAL_IS_BYTE */\n\n#endif\n"
  },
  {
    "path": "lib/TH/generic/THTensorRandom.c",
    "content": "#ifndef TH_GENERIC_FILE\n#define TH_GENERIC_FILE \"generic/THTensorRandom.c\"\n#else\n\nvoid THTensor_(random)(THTensor *self, THGenerator *_generator)\n{\n#if defined(TH_REAL_IS_BYTE)\n  TH_TENSOR_APPLY(real, self, *self_data = (unsigned char)(THRandom_random(_generator) % (UCHAR_MAX+1)););\n#elif defined(TH_REAL_IS_CHAR)\n  TH_TENSOR_APPLY(real, self, *self_data = (char)(THRandom_random(_generator) % (CHAR_MAX+1)););\n#elif defined(TH_REAL_IS_SHORT)\n  TH_TENSOR_APPLY(real, self, *self_data = (short)(THRandom_random(_generator) % (SHRT_MAX+1)););\n#elif defined(TH_REAL_IS_INT)\n  TH_TENSOR_APPLY(real, self, *self_data = (int)(THRandom_random(_generator) % (INT_MAX+1UL)););\n#elif defined(TH_REAL_IS_LONG)\n  TH_TENSOR_APPLY(real, self, *self_data = (long)(THRandom_random(_generator) % (LONG_MAX+1UL)););\n#elif defined(TH_REAL_IS_FLOAT)\n  TH_TENSOR_APPLY(real, self, *self_data = (float)(THRandom_random(_generator) % ((1UL << FLT_MANT_DIG)+1)););\n#elif defined(TH_REAL_IS_DOUBLE)\n  TH_TENSOR_APPLY(real, self, *self_data = (double)(THRandom_random(_generator) % ((1ULL << DBL_MANT_DIG)+1)););\n#else\n#error \"Unknown type\"\n#endif\n}\n\nvoid THTensor_(clampedRandom)(THTensor *self, THGenerator *_generator, long min, long max) {\n  THArgCheck(max > min, 2, \"max must be greater than min\");\n  TH_TENSOR_APPLY(real, self, *self_data = (real)((THRandom_random(_generator) % (max - min)) + min);)\n}\n\nvoid THTensor_(cappedRandom)(THTensor *self, THGenerator *_generator, long max) {\n  THArgCheck(max > 0, 1, \"max must be positive\");\n  THTensor_(clampedRandom)(self, _generator, 0, max);\n}\n\nvoid THTensor_(geometric)(THTensor *self, THGenerator *_generator, double p)\n{\n  TH_TENSOR_APPLY(real, self, *self_data = (real)THRandom_geometric(_generator, p););\n}\n\nvoid THTensor_(bernoulli)(THTensor *self, THGenerator *_generator, double p)\n{\n  TH_TENSOR_APPLY(real, self, *self_data = (real)THRandom_bernoulli(_generator, p););\n}\n\nvoid THTensor_(bernoulli_FloatTensor)(THTensor *self, THGenerator *_generator, THFloatTensor *p)\n{\n  TH_TENSOR_APPLY2(real, self, float, p, *self_data = (real)THRandom_bernoulli(_generator, (double)*p_data););\n}\n\nvoid THTensor_(bernoulli_DoubleTensor)(THTensor *self, THGenerator *_generator, THDoubleTensor *p)\n{\n  TH_TENSOR_APPLY2(real, self, double, p, *self_data = (real)THRandom_bernoulli(_generator, (double)*p_data););\n}\n\n#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)\n\nvoid THTensor_(uniform)(THTensor *self, THGenerator *_generator, double a, double b)\n{\n  TH_TENSOR_APPLY(real, self, *self_data = (real)THRandom_uniform(_generator, a, b););\n}\n\nvoid THTensor_(normal)(THTensor *self, THGenerator *_generator, double mean, double stdv)\n{\n  TH_TENSOR_APPLY(real, self, *self_data = (real)THRandom_normal(_generator, mean, stdv););\n}\n\nvoid THTensor_(normal_means)(THTensor *self, THGenerator *gen, THTensor *means, double stddev)\n{\n  THTensor_(resizeAs)(self, means);\n  THTensor_(normal)(self, gen, 0, stddev);\n  THTensor_(cadd)(self, self, 1, means);\n}\n\nvoid THTensor_(normal_stddevs)(THTensor *self, THGenerator *gen, double mean, THTensor *stddevs)\n{\n  THTensor_(resizeAs)(self, stddevs);\n  THTensor_(normal)(self, gen, 0, 1);\n  THTensor_(cmul)(self, self, stddevs);\n  THTensor_(add)(self, self, mean);\n}\n\nvoid THTensor_(normal_means_stddevs)(THTensor *self, THGenerator *gen, THTensor *means, THTensor *stddevs)\n{\n  THTensor_(resizeAs)(self, means);\n  THTensor_(normal)(self, gen, 0, 1);\n  THTensor_(cmul)(self, self, stddevs);\n  THTensor_(cadd)(self, self, 1, means);\n}\n\nvoid THTensor_(exponential)(THTensor *self, THGenerator *_generator, double lambda)\n{\n  TH_TENSOR_APPLY(real, self, *self_data = (real)THRandom_exponential(_generator, lambda););\n}\n\nvoid THTensor_(cauchy)(THTensor *self, THGenerator *_generator, double median, double sigma)\n{\n  TH_TENSOR_APPLY(real, self, *self_data = (real)THRandom_cauchy(_generator, median, sigma););\n}\n\nvoid THTensor_(logNormal)(THTensor *self, THGenerator *_generator, double mean, double stdv)\n{\n  TH_TENSOR_APPLY(real, self, *self_data = (real)THRandom_logNormal(_generator, mean, stdv););\n}\n\n\nvoid THTensor_(multinomialAliasSetup)(THTensor *probs, THLongTensor *J, THTensor *q)\n{\n  long inputsize = THTensor_(nElement)(probs);\n  long i = 0;\n  THLongTensor *smaller = THLongTensor_newWithSize1d(inputsize);\n  THLongTensor *larger = THLongTensor_newWithSize1d(inputsize);\n  long small_c = 0;\n  long large_c = 0;\n  THLongTensor_resize1d(J, inputsize);\n  THTensor_(resize1d)(q, inputsize);\n  real *q_data = THTensor_(data)(q);\n  long *J_data = THLongTensor_data(J);\n      \n  for(i = 0; i < inputsize; i++)\n    {\n      THTensor_fastSet1d(J, i, 0L);\n      real val = THTensor_fastGet1d(probs, i);\n      THTensor_fastSet1d(q, i, inputsize*val);\n      \n      if (inputsize * val < 1.0)\n        {\n          THTensor_fastSet1d(smaller, small_c, i);\n          small_c += 1;\n        }\n      else\n        {\n          THTensor_fastSet1d(larger, large_c, i);\n          large_c += 1;\n        }\n    }\n\n  // Loop through and create little binary mixtures that\n  // appropriately allocate the larger outcomes over the\n  // overall uniform mixture.\n  long large, small;\n  while(small_c > 0 && large_c > 0)\n    {\n      large = THTensor_fastGet1d(larger, large_c-1);\n      small = THTensor_fastGet1d(smaller, small_c-1);\n      \n      THTensor_fastSet1d(J, small, large);\n      q_data[large * q->stride[0]] -= 1.0 - THTensor_fastGet1d(q, small);\n\n      if(q_data[large * q->stride[0]] < 1.0)\n        {\n          THTensor_fastSet1d(smaller, small_c-1, large);\n          large_c -= 1;\n        }\n      else\n        {\n          THTensor_fastSet1d(larger, large_c-1, large);\n          small_c -= 1;\n        }\n    }\n\n  real q_min = THTensor_fastGet1d(q, inputsize-1);\n  real q_max = q_min;\n  real q_temp;\n  for(i=0; i < inputsize; i++)\n    {\n      q_temp = THTensor_fastGet1d(q, i);\n      if(q_temp < q_min)\n        q_min = q_temp;\n      else if(q_temp > q_max)\n        q_max = q_temp;\n    }\n  THArgCheckWithCleanup((q_min > 0),\n                        THCleanup(THLongTensor_free(smaller); THLongTensor_free(larger);), 2,\n                        \"q_min is less than 0\");\n  \n  if(q_max > 1)\n    {\n      for(i=0; i < inputsize; i++)\n        {\n          q_data[i*q->stride[0]] /= q_max;\n        }\n    }\n  for(i=0; i<inputsize; i++)\n    {\n      // sometimes an large index isn't added to J. \n      // fix it by making the probability 1 so that J isn't indexed.\n      if(J_data[i] <= 0)\n        q_data[i] = 1.0;\n    }\n  THLongTensor_free(smaller);\n  THLongTensor_free(larger);\n}\nvoid THTensor_(multinomialAliasDraw)(THLongTensor *self, THGenerator *_generator, THLongTensor *J, THTensor *q)\n{\n  long K = THLongTensor_nElement(J);\n  long output_nelem = THLongTensor_nElement(self);\n  \n  int i = 0, _mask=0;\n  real _q;\n  long rand_ind, sample_idx, J_sample, kk_sample;\n  for(i=0; i< output_nelem; i++)\n    {\n      rand_ind = (long)THRandom_uniform(_generator, 0, K) ;\n      _q = THTensor_fastGet1d(q, rand_ind);\n\n      _mask = THRandom_bernoulli(_generator, _q);\n      \n      J_sample = THTensor_fastGet1d(J, rand_ind);\n\n      sample_idx = J_sample*(1 -_mask) + (rand_ind+1L) * _mask;\n\n      THTensor_fastSet1d(self, i, sample_idx-1L);\n    }\n}\nvoid THTensor_(multinomial)(THLongTensor *self, THGenerator *_generator, THTensor *prob_dist, int n_sample, int with_replacement)\n{\n  int start_dim = THTensor_(nDimension)(prob_dist);\n  long n_dist;\n  long n_categories;\n  THDoubleTensor* cum_dist;\n  int i,j,k;\n\n  if (start_dim == 1)\n  {\n    THTensor_(resize2d)(prob_dist, 1, THTensor_(size)(prob_dist, 0));\n  }\n\n  n_dist = THTensor_(size)(prob_dist, 0);\n  n_categories = THTensor_(size)(prob_dist, 1);\n\n  THArgCheck(n_sample > 0, 2, \"cannot sample n_sample < 0 samples\");\n\n  if (!with_replacement)\n  {\n    THArgCheck((!with_replacement) && (n_sample <= n_categories), 2, \\\n    \"cannot sample n_sample > prob_dist:size(1) samples without replacement\");\n  }\n\n  /* cumulative probability distribution vector */\n  cum_dist = THDoubleTensor_newWithSize1d(n_categories);\n\n  /* will contain multinomial samples (category indices to be returned) */\n  THLongTensor_resize2d(self, n_dist , n_sample);\n\n  for (i=0; i<n_dist; i++)\n  {\n    /* Get normalized cumulative distribution from prob distribution */\n    double sum = 0;\n    for (j=0; j<n_categories; j++)\n    {\n      sum += THStorage_(get)( \\\n        prob_dist->storage, \\\n        prob_dist->storageOffset+i*prob_dist->stride[0]+j*prob_dist->stride[1] \\\n      );\n      THDoubleStorage_set(\n        cum_dist->storage, \\\n        cum_dist->storageOffset+j*cum_dist->stride[0], \\\n        sum \\\n      );\n    }\n    THArgCheckWithCleanup((sum > 0), THCleanup(THDoubleTensor_free(cum_dist);), 2,\n                          \"invalid multinomial distribution (sum of probabilities <= 0)\");\n    /* normalize cumulative probability distribution so that last val is 1\n    i.e. doesn't assume original prob_dist row sums to one */\n    if ( (sum > 0) || ( ( sum < 1.00001) && (sum > 0.99999) ) )\n    {\n      for (j=0; j<n_categories; j++)\n      {\n        THDoubleTensor_data(cum_dist)[j*cum_dist->stride[0]] /= sum;\n      }\n    }\n\n    for (j=0; j<n_sample; j++)\n    {\n      /* sample a probability mass from a uniform distribution */\n      double uniform_sample = THRandom_uniform(_generator, 0, 1);\n      /* Do a binary search for the slot in which the prob falls\n      ie cum_dist[row][slot-1] < uniform_prob < cum_distr[row][slot] */\n      int left_pointer = 0;\n      int right_pointer = n_categories;\n      int mid_pointer;\n      double cum_prob;\n      int sample_idx;\n      /* Make sure the last cumulative distribution bucket sums to 1 */\n      THDoubleTensor_data(cum_dist)[(n_categories-1)*cum_dist->stride[0]] = 1;\n\n      while(right_pointer - left_pointer > 0)\n      {\n          mid_pointer = left_pointer + (right_pointer - left_pointer) / 2;\n          cum_prob = THDoubleStorage_get( \\\n            cum_dist->storage, \\\n            cum_dist->storageOffset+mid_pointer*cum_dist->stride[0] \\\n          );\n          if (cum_prob < uniform_sample)\n          {\n            left_pointer = mid_pointer + 1;\n          }\n          else\n          {\n            right_pointer = mid_pointer;\n          }\n      }\n      sample_idx = left_pointer;\n\n       /* store in result tensor (will be incremented for lua compat by wrapper) */\n      THLongStorage_set( \\\n        self->storage, \\\n        self->storageOffset+i*self->stride[0]+j*self->stride[1], \\\n        sample_idx \\\n      );\n\n      /* Once a sample is drawn, it cannot be drawn again. ie sample without replacement */\n      if (!with_replacement)\n      {\n        /* update cumulative distribution so that sample cannot be drawn again */\n        double diff;\n        double new_val = 0;\n        double sum;\n\n        if (sample_idx != 0)\n        {\n          new_val = THDoubleStorage_get( \\\n            cum_dist->storage, \\\n            cum_dist->storageOffset+(sample_idx-1)*cum_dist->stride[0] \\\n          );\n        }\n        /* marginal cumulative mass (i.e. original probability) of sample */\n        diff = THDoubleStorage_get( \\\n          cum_dist->storage, \\\n          cum_dist->storageOffset+sample_idx*cum_dist->stride[0] \\\n        ) - new_val;\n        /* new sum of marginals is not one anymore... */\n        sum = 1.0 - diff;\n        for (k=0; k<n_categories; k++)\n        {\n          new_val = THDoubleStorage_get( \\\n            cum_dist->storage, \\\n            cum_dist->storageOffset+k*cum_dist->stride[0] \\\n          );\n          if (k >= sample_idx)\n          {\n            /* remove sampled probability mass from later cumulative probabilities */\n            new_val -= diff;\n          }\n          /* make total marginals sum to one */\n          new_val /= sum;\n          THDoubleStorage_set( \\\n            cum_dist->storage, \\\n            cum_dist->storageOffset+k*cum_dist->stride[0], \\\n            new_val \\\n          );\n        }\n      }\n    }\n  }\n\n  THDoubleTensor_free(cum_dist);\n\n  if (start_dim == 1)\n  {\n    THLongTensor_resize1d(self, n_sample);\n    THTensor_(resize1d)(prob_dist, n_categories);\n  }\n}\n\n#endif\n\n#if defined(TH_REAL_IS_BYTE)\nvoid THTensor_(getRNGState)(THGenerator *_generator, THTensor *self)\n{\n  static const size_t size = sizeof(THGenerator);\n  THGenerator *rng_state;\n  THTensor_(resize1d)(self, size);\n  THArgCheck(THTensor_(nElement)(self) == size, 1, \"RNG state is wrong size\");\n  THArgCheck(THTensor_(isContiguous)(self), 1, \"RNG state needs to be contiguous\");\n  rng_state = (THGenerator *)THTensor_(data)(self);\n  THGenerator_copy(rng_state, _generator);\n}\n\nvoid THTensor_(setRNGState)(THGenerator *_generator, THTensor *self)\n{\n  static const size_t size = sizeof(THGenerator);\n  THGenerator *rng_state;\n  THArgCheck(THTensor_(nElement)(self) == size, 1, \"RNG state is wrong size\");\n  THArgCheck(THTensor_(isContiguous)(self), 1, \"RNG state needs to be contiguous\");\n  rng_state = (THGenerator *)THTensor_(data)(self);\n  THArgCheck(THGenerator_isValid(rng_state), 1, \"Invalid RNG state\");\n  THGenerator_copy(_generator, rng_state);\n}\n#endif\n\n#endif\n"
  },
  {
    "path": "lib/TH/generic/THTensorRandom.h",
    "content": "#ifndef TH_GENERIC_FILE\n#define TH_GENERIC_FILE \"generic/THTensorRandom.h\"\n#else\n\nTH_API void THTensor_(random)(THTensor *self, THGenerator *_generator);\nTH_API void THTensor_(clampedRandom)(THTensor *self, THGenerator *_generator, long min, long max);\nTH_API void THTensor_(cappedRandom)(THTensor *self, THGenerator *_generator, long max);\nTH_API void THTensor_(geometric)(THTensor *self, THGenerator *_generator, double p);\nTH_API void THTensor_(bernoulli)(THTensor *self, THGenerator *_generator, double p);\nTH_API void THTensor_(bernoulli_FloatTensor)(THTensor *self, THGenerator *_generator, THFloatTensor *p);\nTH_API void THTensor_(bernoulli_DoubleTensor)(THTensor *self, THGenerator *_generator, THDoubleTensor *p);\n\n#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)\nTH_API void THTensor_(uniform)(THTensor *self, THGenerator *_generator, double a, double b);\nTH_API void THTensor_(normal)(THTensor *self, THGenerator *_generator, double mean, double stdv);\nTH_API void THTensor_(normal_means)(THTensor *self, THGenerator *gen, THTensor *means, double stddev);\nTH_API void THTensor_(normal_stddevs)(THTensor *self, THGenerator *gen, double mean, THTensor *stddevs);\nTH_API void THTensor_(normal_means_stddevs)(THTensor *self, THGenerator *gen, THTensor *means, THTensor *stddevs);\nTH_API void THTensor_(exponential)(THTensor *self, THGenerator *_generator, double lambda);\nTH_API void THTensor_(cauchy)(THTensor *self, THGenerator *_generator, double median, double sigma);\nTH_API void THTensor_(logNormal)(THTensor *self, THGenerator *_generator, double mean, double stdv);\nTH_API void THTensor_(multinomial)(THLongTensor *self, THGenerator *_generator, THTensor *prob_dist, int n_sample, int with_replacement);\nTH_API void THTensor_(multinomialAliasSetup)(THTensor *prob_dist, THLongTensor *J, THTensor *q);\nTH_API void THTensor_(multinomialAliasDraw)(THLongTensor *self, THGenerator *_generator, THLongTensor *J, THTensor *q);\n#endif\n\n#if defined(TH_REAL_IS_BYTE)\nTH_API void THTensor_(getRNGState)(THGenerator *_generator, THTensor *self);\nTH_API void THTensor_(setRNGState)(THGenerator *_generator, THTensor *self);\n#endif\n\n#endif\n"
  },
  {
    "path": "lib/TH/generic/THVector.h",
    "content": "#ifndef TH_GENERIC_FILE\n#define TH_GENERIC_FILE \"generic/THVector.h\"\n#else\n\nTH_API void THVector_(fill)(real *x, const real c, const ptrdiff_t n);\nTH_API void THVector_(cadd)(real *z, const real *x, const real *y, const real c, const ptrdiff_t n);\nTH_API void THVector_(adds)(real *y, const real *x, const real c, const ptrdiff_t n);\nTH_API void THVector_(cmul)(real *z, const real *x, const real *y, const ptrdiff_t n);\nTH_API void THVector_(muls)(real *y, const real *x, const real c, const ptrdiff_t n);\nTH_API void THVector_(cdiv)(real *z, const real *x, const real *y, const ptrdiff_t n);\nTH_API void THVector_(divs)(real *y, const real *x, const real c, const ptrdiff_t n);\nTH_API void THVector_(copy)(real *y, const real *x, const ptrdiff_t n);\n\n/* Initialize the dispatch pointers */\nTH_API void THVector_(vectorDispatchInit)(void);\n\n#endif\n"
  },
  {
    "path": "lib/TH/generic/THVectorDefault.c",
    "content": "#ifndef TH_GENERIC_FILE\n#define TH_GENERIC_FILE \"generic/THVectorDefault.c\"\n#else\n\nvoid THVector_(copy_DEFAULT)(real *x, const real *y, const ptrdiff_t n) {\n  ptrdiff_t i = 0;\n\n  for(; i <n-4; i+=4)\n  {\n    x[i] = y[i];\n    x[i+1] = y[i+1];\n    x[i+2] = y[i+2];\n    x[i+3] = y[i+3];\n  }\n\n  for(; i < n; i++)\n    x[i] = y[i];\n}\n\nvoid THVector_(fill_DEFAULT)(real *x, const real c, const ptrdiff_t n) {\n  ptrdiff_t i = 0;\n\n  for(; i <n-4; i+=4)\n  {\n    x[i] = c;\n    x[i+1] = c;\n    x[i+2] = c;\n    x[i+3] = c;\n  }\n\n  for(; i < n; i++)\n    x[i] = c;\n}\n\nvoid THVector_(cadd_DEFAULT)(real *z, const real *x, const real *y, const real c, const ptrdiff_t n)\n{\n  ptrdiff_t i = 0;\n\n  for(; i<n-4; i+=4)\n  {\n    z[i] = x[i] + c * y[i];\n    z[i+1] = x[i+1] + c * y[i+1];\n    z[i+2] = x[i+2] + c * y[i+2];\n    z[i+3] = x[i+3] + c * y[i+3];\n  }\n\n  for(; i<n; i++)\n    z[i] = x[i] + c * y[i];\n}\n\nvoid THVector_(adds_DEFAULT)(real *y, const real *x, const real c, const ptrdiff_t n)\n{\n  ptrdiff_t i = 0;\n\n  for(; i<n-4; i+=4)\n  {\n    y[i] = x[i] + c;\n    y[i+1] = x[i+1] + c;\n    y[i+2] = x[i+2] + c;\n    y[i+3] = x[i+3] + c;\n  }\n\n  for(; i<n; i++)\n    y[i] = x[i] + c;\n}\n\nvoid THVector_(cmul_DEFAULT)(real *z, const real *x, const real *y, const ptrdiff_t n)\n{\n  ptrdiff_t i = 0;\n\n  for(; i <n-4; i+=4)\n  {\n    z[i] = x[i] * y[i];\n    z[i+1] = x[i+1] * y[i+1];\n    z[i+2] = x[i+2] * y[i+2];\n    z[i+3] = x[i+3] * y[i+3];\n  }\n\n  for(; i < n; i++)\n    z[i] = x[i] * y[i];\n}\n\nvoid THVector_(muls_DEFAULT)(real *y, const real *x, const real c, const ptrdiff_t n)\n{\n  ptrdiff_t i = 0;\n\n  for(; i <n-4; i+=4)\n  {\n    y[i] = x[i] * c;\n    y[i+1] = x[i+1] * c;\n    y[i+2] = x[i+2] * c;\n    y[i+3] = x[i+3] * c;\n  }\n\n  for(; i < n; i++)\n    y[i] = x[i] * c;\n}\n\nvoid THVector_(cdiv_DEFAULT)(real *z, const real *x, const real *y, const ptrdiff_t n)\n{\n  ptrdiff_t i = 0;\n\n  for(; i<n-4; i+=4)\n  {\n    z[i] = x[i] / y[i];\n    z[i+1] = x[i+1] / y[i+1];\n    z[i+2] = x[i+2] / y[i+2];\n    z[i+3] = x[i+3] / y[i+3];\n  }\n\n  for(; i < n; i++)\n    z[i] = x[i] / y[i];\n}\n\nvoid THVector_(divs_DEFAULT)(real *y, const real *x, const real c, const ptrdiff_t n)\n{\n  ptrdiff_t i = 0;\n\n  for(; i<n-4; i+=4)\n  {\n    y[i] = x[i] / c;\n    y[i+1] = x[i+1] / c;\n    y[i+2] = x[i+2] / c;\n    y[i+3] = x[i+3] / c;\n  }\n\n  for(; i < n; i++)\n    y[i] = x[i] / c;\n}\n\n#endif\n"
  },
  {
    "path": "lib/TH/generic/THVectorDispatch.c",
    "content": "#ifndef TH_GENERIC_FILE\n#define TH_GENERIC_FILE \"generic/THVectorDispatch.c\"\n#else\n\n/* For now there are only SIMD implementations for FLOAT and DOUBLE.\n * Hopefully in the future this can be made totally generic (e.g, there are SIMD implementations\n * for a lot of functions */\n/* Each function with multiple implementations has:\n * 1. A DISPATCHPTR which will be initialized to point to the best available implementation for the host\n * 2. A DISPATCHTABLE which holds pointers to each implementation of a function, and a value indicating\n *    which SIMD extension a given implementation uses\n * 3. A dispatch stub, which is what is actually called by clients, that simply wraps the dispatch pointer.\n */\n\nstatic void (*THVector_(fill_DISPATCHPTR))(real *, const real, const ptrdiff_t) = &THVector_(fill_DEFAULT);\nstatic FunctionDescription THVector_(fill_DISPATCHTABLE)[] = {\n  #if defined(__NEON__)\n    #if defined(TH_REAL_IS_FLOAT)\n      FUNCTION_IMPL(THVector_(fill_NEON), SIMDExtension_NEON),\n    #endif\n  #endif\n\n  #if defined(__PPC64__)\n    #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)\n      FUNCTION_IMPL(THVector_(fill_VSX), SIMDExtension_VSX),\n    #endif\n  #endif\n\n  #if defined(USE_AVX)\n    #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)\n      FUNCTION_IMPL(THVector_(fill_AVX), SIMDExtension_AVX),\n    #endif\n  #endif\n\n  #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \\\n          || defined(USE_SSE4_1) || defined(USE_SSE4_2)\n    #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)\n      FUNCTION_IMPL(THVector_(fill_SSE), SIMDExtension_SSE),\n    #endif\n  #endif\n  FUNCTION_IMPL(THVector_(fill_DEFAULT), SIMDExtension_DEFAULT)\n};\nvoid THVector_(fill)(real *x, const real c, const ptrdiff_t n) {\n  THVector_(fill_DISPATCHPTR)(x, c, n);\n}\n\nstatic void (*THVector_(cadd_DISPATCHPTR))(real *, const real *, const real *, const real, const ptrdiff_t) = &THVector_(cadd_DEFAULT);\nstatic FunctionDescription THVector_(cadd_DISPATCHTABLE)[] = {\n  #if defined(__NEON__)\n    #if defined(TH_REAL_IS_FLOAT)\n      FUNCTION_IMPL(THVector_(cadd_NEON), SIMDExtension_NEON),\n    #endif\n  #endif\n\n  #if defined(USE_AVX2)\n    #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)\n      FUNCTION_IMPL(THVector_(cadd_AVX2), SIMDExtension_AVX2),\n    #endif\n  #endif\n\n  #if defined(USE_AVX)\n    #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)\n      FUNCTION_IMPL(THVector_(cadd_AVX), SIMDExtension_AVX),\n    #endif\n  #endif\n\n  #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \\\n          || defined(USE_SSE4_1) || defined(USE_SSE4_2)\n    #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)\n      FUNCTION_IMPL(THVector_(cadd_SSE), SIMDExtension_SSE),\n    #endif\n  #endif\n\n  FUNCTION_IMPL(THVector_(cadd_DEFAULT), SIMDExtension_DEFAULT)\n};\nvoid THVector_(cadd)(real *z, const real *x, const real *y, const real c, const ptrdiff_t n) {\n  THVector_(cadd_DISPATCHPTR)(z, x, y, c, n);\n}\n\nstatic void (*THVector_(adds_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(adds_DEFAULT);\nstatic FunctionDescription THVector_(adds_DISPATCHTABLE)[] = {\n  #if defined(__NEON__)\n    #if defined(TH_REAL_IS_FLOAT)\n      FUNCTION_IMPL(THVector_(adds_NEON), SIMDExtension_NEON),\n    #endif\n  #endif\n\n  #if defined(__PPC64__)\n    #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)\n      FUNCTION_IMPL(THVector_(adds_VSX), SIMDExtension_VSX),\n    #endif\n  #endif\n\n  #if defined(USE_AVX)\n    #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)\n      FUNCTION_IMPL(THVector_(adds_AVX), SIMDExtension_AVX),\n    #endif\n  #endif\n\n  #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \\\n          || defined(USE_SSE4_1) || defined(USE_SSE4_2)\n    #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)\n      FUNCTION_IMPL(THVector_(adds_SSE), SIMDExtension_SSE),\n    #endif\n  #endif\n\n  FUNCTION_IMPL(THVector_(adds_DEFAULT), SIMDExtension_DEFAULT)\n};\n// Dispatch stubs that just call the pointers\nTH_API void THVector_(adds)(real *r_, const real *t, const real value, const ptrdiff_t n) {\n  THVector_(adds_DISPATCHPTR)(r_, t, value, n);\n}\n\nstatic void (*THVector_(cmul_DISPATCHPTR))(real *, const real *, const real *, const ptrdiff_t) = &THVector_(cmul_DEFAULT);\nstatic FunctionDescription THVector_(cmul_DISPATCHTABLE)[] = {\n  #if defined(__NEON__)\n    #if defined(TH_REAL_IS_FLOAT)\n      FUNCTION_IMPL(THVector_(cmul_NEON), SIMDExtension_NEON),\n    #endif\n  #endif\n\n  #if defined(USE_AVX)\n    #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)\n      FUNCTION_IMPL(THVector_(cmul_AVX), SIMDExtension_AVX),\n    #endif\n  #endif\n\n  #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \\\n          || defined(USE_SSE4_1) || defined(USE_SSE4_2)\n    #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)\n      FUNCTION_IMPL(THVector_(cmul_SSE), SIMDExtension_SSE),\n    #endif\n  #endif\n\n  FUNCTION_IMPL(THVector_(cmul_DEFAULT), SIMDExtension_DEFAULT)\n};\nvoid THVector_(cmul)(real *z, const real *x, const real *y, const ptrdiff_t n) {\n  THVector_(cmul_DISPATCHPTR)(z, x, y, n);\n}\n\nstatic void (*THVector_(muls_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(muls_DEFAULT);\nstatic FunctionDescription THVector_(muls_DISPATCHTABLE)[] = {\n  #if defined(__NEON__)\n    #if defined(TH_REAL_IS_FLOAT)\n      FUNCTION_IMPL(THVector_(muls_NEON), SIMDExtension_NEON),\n    #endif\n  #endif\n\n  #if defined(__PPC64__)\n    #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)\n      FUNCTION_IMPL(THVector_(muls_VSX), SIMDExtension_VSX),\n    #endif\n  #endif\n\n  #if defined(USE_AVX)\n    #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)\n      FUNCTION_IMPL(THVector_(muls_AVX), SIMDExtension_AVX),\n    #endif\n  #endif\n\n  #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \\\n          || defined(USE_SSE4_1) || defined(USE_SSE4_2)\n    #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)\n      FUNCTION_IMPL(THVector_(muls_SSE), SIMDExtension_SSE),\n    #endif\n  #endif\n\n  FUNCTION_IMPL(THVector_(muls_DEFAULT), SIMDExtension_DEFAULT)\n};\nvoid THVector_(muls)(real *y, const real *x, const real c, const ptrdiff_t n) {\n  THVector_(muls_DISPATCHPTR)(y, x, c, n);\n}\n\nstatic void (*THVector_(cdiv_DISPATCHPTR))(real *, const real *, const real *, const ptrdiff_t) = &THVector_(cdiv_DEFAULT);\nstatic FunctionDescription THVector_(cdiv_DISPATCHTABLE)[] = {\n  #if defined(__NEON__)\n    #if defined(TH_REAL_IS_FLOAT)\n      FUNCTION_IMPL(THVector_(cdiv_NEON), SIMDExtension_NEON),\n    #endif\n  #endif\n\n  #if defined(USE_AVX)\n    #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)\n      FUNCTION_IMPL(THVector_(cdiv_AVX), SIMDExtension_AVX),\n    #endif\n  #endif\n\n  #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \\\n          || defined(USE_SSE4_1) || defined(USE_SSE4_2)\n    #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)\n      FUNCTION_IMPL(THVector_(cdiv_SSE), SIMDExtension_SSE),\n    #endif\n  #endif\n\n  FUNCTION_IMPL(THVector_(cdiv_DEFAULT), SIMDExtension_DEFAULT)\n};\nvoid THVector_(cdiv)(real *z, const real *x, const real *y, const ptrdiff_t n) {\n  THVector_(cdiv_DISPATCHPTR)(z, x, y, n);\n}\n\nstatic void (*THVector_(divs_DISPATCHPTR))(real *, const real *, const real, const ptrdiff_t) = &THVector_(divs_DEFAULT);\nstatic FunctionDescription THVector_(divs_DISPATCHTABLE)[] = {\n  #if defined(__NEON__)\n    #if defined(TH_REAL_IS_FLOAT)\n      FUNCTION_IMPL(THVector_(divs_NEON), SIMDExtension_NEON),\n    #endif\n  #endif\n\n  #if defined(USE_AVX)\n    #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)\n      FUNCTION_IMPL(THVector_(divs_AVX), SIMDExtension_AVX),\n    #endif\n  #endif\n\n  #if defined(USE_SSE2) || defined(USE_SSE3) || defined(USE_SSSE3) \\\n          || defined(USE_SSE4_1) || defined(USE_SSE4_2)\n    #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)\n      FUNCTION_IMPL(THVector_(divs_SSE), SIMDExtension_SSE),\n    #endif\n  #endif\n\n  FUNCTION_IMPL(THVector_(divs_DEFAULT), SIMDExtension_DEFAULT)\n};\nvoid THVector_(divs)(real *y, const real *x, const real c, const ptrdiff_t n) {\n  THVector_(divs_DISPATCHPTR)(y, x, c, n);\n}\n\nstatic void (*THVector_(copy_DISPATCHPTR))(real *, const real *, const ptrdiff_t) = &THVector_(copy_DEFAULT);\nstatic FunctionDescription THVector_(copy_DISPATCHTABLE)[] = {\n  #if defined(USE_AVX)\n    #if defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)\n      FUNCTION_IMPL(THVector_(copy_AVX), SIMDExtension_AVX),\n    #endif\n  #endif\n\n  FUNCTION_IMPL(THVector_(copy_DEFAULT), SIMDExtension_DEFAULT)\n};\nvoid THVector_(copy)(real *y, const real *x, const ptrdiff_t n) {\n  THVector_(copy_DISPATCHPTR)(y, x, n);\n}\n\n/* This needs to be called in order to initialize the dispatch pointers at runtime.\n * This function simply checks what SIMD extensions are available, and then walks the dispatch table\n * to choose the best function.\n * NOTE: As implemented, it will initialize the dispatch pointer to the first supported function.\n *       This means that in the dispatch tables, implementations supporting more recent extensions\n *       need to come first\n */\nvoid THVector_(vectorDispatchInit)(void)\n{\n  uint32_t hostSimdExts = detectHostSIMDExtensions();\n  INIT_DISPATCH_PTR(fill);\n  INIT_DISPATCH_PTR(cadd);\n  INIT_DISPATCH_PTR(adds);\n  INIT_DISPATCH_PTR(cmul);\n  INIT_DISPATCH_PTR(muls);\n  INIT_DISPATCH_PTR(cdiv);\n  INIT_DISPATCH_PTR(divs);\n  INIT_DISPATCH_PTR(copy);\n}\n\n#endif\n"
  },
  {
    "path": "lib/TH/generic/simd/common_simd.h",
    "content": "#ifndef COMMON_SIMD_H\n#define COMMON_SIMD_H\n\n/* Weights */\n#define LOAD_WEIGHT(q, simd_type, inst_var) _m ## simd_type ## inst_var(*(q))\n\n#define DECLARE_WEIGHTS(simd_type) \\\n__ ## simd_type weight0; \\\n__ ## simd_type weight1; \\\n__ ## simd_type weight2; \\\n__ ## simd_type weight3; \\\n__ ## simd_type weight4;\n\n#define LOAD_WEIGHTS(k, simd_type, inst_var) \\\nweight0 = LOAD_WEIGHT(weight + 5 * 0 + k, simd_type, inst_var); \\\nweight1 = LOAD_WEIGHT(weight + 5 * 1 + k, simd_type, inst_var); \\\nweight2 = LOAD_WEIGHT(weight + 5 * 2 + k, simd_type, inst_var); \\\nweight3 = LOAD_WEIGHT(weight + 5 * 3 + k, simd_type, inst_var); \\\nweight4 = LOAD_WEIGHT(weight + 5 * 4 + k, simd_type, inst_var);\n\n/* Inputs declare */\n#define DECLARE_INPUT_0(i) \\\nfloat* input0 = image + i; \\\n\n#define DECLARE_INPUT_1() \\\nfloat* input1 = input0 + inputStride; \\\nfloat* input2 = input1 + inputStride; \\\nfloat* input3 = input2 + inputStride; \\\nfloat* input4 = input3 + inputStride;\n\n#define DECLARE_INPUT_2() \\\nDECLARE_INPUT_1() \\\nfloat* input5 = input4 + inputStride;\n\n#define DECLARE_INPUT_4() \\\nDECLARE_INPUT_2() \\\nfloat* input6 = input5 + inputStride; \\\nfloat* input7 = input6 + inputStride;\n\n#define DECLARE_INPUT_5() \\\nDECLARE_INPUT_4() \\\nfloat* input8 = input7 + inputStride;\n\n#define DECLARE_INPUT_6() \\\nDECLARE_INPUT_5() \\\nfloat* input9 = input8 + inputStride;\n\n#define DECLARE_INPUT_7() \\\nDECLARE_INPUT_6() \\\nfloat* inputA = input9 + inputStride;\n\n#define DECLARE_INPUT_8() \\\nDECLARE_INPUT_7() \\\nfloat* inputB = inputA + inputStride;\n\n\n/* Inputs increment */\n#define INC_INPUT_1()\\\ninput0++; \\\ninput1++; \\\ninput2++; \\\ninput3++; \\\ninput4++; \\\n\n#define INC_INPUT_2()\\\nINC_INPUT_1() \\\ninput5++;\n\n#define INC_INPUT_4()\\\nINC_INPUT_2() \\\ninput6++; \\\ninput7++;\n\n#define INC_INPUT_5()\\\nINC_INPUT_4() \\\ninput8++;\n\n#define INC_INPUT_6()\\\nINC_INPUT_5() \\\ninput9++;\n\n#define INC_INPUT_7()\\\nINC_INPUT_6() \\\ninputA++;\n\n#define INC_INPUT_8()\\\nINC_INPUT_7() \\\ninputB++;\n\n/* Outputs declare */\n#define DECLARE_OUTPUT_1() \\\nfloat* output0 = output;\n\n#define DECLARE_OUTPUT_2() \\\nDECLARE_OUTPUT_1() \\\nfloat* output1 = output0 + outputStride;\n\n#define DECLARE_OUTPUT_4() \\\nDECLARE_OUTPUT_2() \\\nfloat* output2 = output1 + outputStride; \\\nfloat* output3 = output2 + outputStride;\n\n#define DECLARE_OUTPUT_5() \\\nDECLARE_OUTPUT_4() \\\nfloat* output4 = output3 + outputStride;\n\n#define DECLARE_OUTPUT_6() \\\nDECLARE_OUTPUT_5() \\\nfloat* output5 = output4 + outputStride;\n\n#define DECLARE_OUTPUT_7() \\\nDECLARE_OUTPUT_6() \\\nfloat* output6 = output5 + outputStride;\n\n#define DECLARE_OUTPUT_8() \\\nDECLARE_OUTPUT_7() \\\nfloat* output7 = output6 + outputStride;\n\n/* Outputs increment */\n#define INC_OUTPUT_1(x) \\\noutput0 += x;\n\n#define INC_OUTPUT_2(x) \\\nINC_OUTPUT_1(x) \\\noutput1 += x;\n\n#define INC_OUTPUT_4(x) \\\nINC_OUTPUT_2(x) \\\noutput2 += x; \\\noutput3 += x;\n\n#define INC_OUTPUT_5(x) \\\nINC_OUTPUT_4(x) \\\noutput4 += x;\n\n#define INC_OUTPUT_6(x) \\\nINC_OUTPUT_5(x) \\\noutput5 += x;\n\n#define INC_OUTPUT_7(x) \\\nINC_OUTPUT_6(x) \\\noutput6 += x;\n\n#define INC_OUTPUT_8(x) \\\nINC_OUTPUT_7(x) \\\noutput7 += x;\n\n/* Image declare */\n#define DECLARE_IMAGE_1(simd_type) \\\n__ ## simd_type image0; \\\n__ ## simd_type image1; \\\n__ ## simd_type image2; \\\n__ ## simd_type image3; \\\n__ ## simd_type image4;\n\n#define DECLARE_IMAGE_2(simd_type) \\\nDECLARE_IMAGE_1(simd_type) \\\n__ ## simd_type image5;\n\n#define DECLARE_IMAGE_4(simd_type) \\\nDECLARE_IMAGE_2(simd_type) \\\n__ ## simd_type image6; \\\n__ ## simd_type image7;\n\n#define DECLARE_IMAGE_5(simd_type) \\\nDECLARE_IMAGE_4(simd_type) \\\n__ ## simd_type image8;\n\n#define DECLARE_IMAGE_6(simd_type) \\\nDECLARE_IMAGE_5(simd_type) \\\n__ ## simd_type image9;\n\n#define DECLARE_IMAGE_7(simd_type) \\\nDECLARE_IMAGE_6(simd_type) \\\n__ ## simd_type imageA;\n\n#define DECLARE_IMAGE_8(simd_type) \\\nDECLARE_IMAGE_7(simd_type) \\\n__ ## simd_type imageB;\n\n/* Sums declare */\n#define DECLARE_SUM_1(simd_type) \\\n__ ## simd_type sum0;\n\n#define DECLARE_SUM_2(simd_type) \\\nDECLARE_SUM_1(simd_type) \\\n__ ## simd_type sum1;\n\n#define DECLARE_SUM_4(simd_type) \\\nDECLARE_SUM_2(simd_type) \\\n__ ## simd_type sum2; \\\n__ ## simd_type sum3;\n\n#define DECLARE_SUM_5(simd_type) \\\nDECLARE_SUM_4(simd_type) \\\n__ ## simd_type sum4;\n\n#define DECLARE_SUM_6(simd_type) \\\nDECLARE_SUM_5(simd_type) \\\n__ ## simd_type sum5;\n\n#define DECLARE_SUM_7(simd_type) \\\nDECLARE_SUM_6(simd_type) \\\n__ ## simd_type sum6;\n\n#define DECLARE_SUM_8(simd_type) \\\nDECLARE_SUM_7(simd_type) \\\n__ ## simd_type sum7;\n\n/* Sums load */\n#define LOAD_SUM_1(simd_type) \\\nsum0 = _m ## simd_type ## _loadu_ps(output0);\n\n#define LOAD_SUM_2(simd_type) \\\nLOAD_SUM_1(simd_type) \\\nsum1 = _m ## simd_type ## _loadu_ps(output1);\n\n#define LOAD_SUM_4(simd_type) \\\nLOAD_SUM_2(simd_type) \\\nsum2 = _m ## simd_type ## _loadu_ps(output2); \\\nsum3 = _m ## simd_type ## _loadu_ps(output3);\n\n#define LOAD_SUM_5(simd_type) \\\nLOAD_SUM_4(simd_type) \\\nsum4 = _m ## simd_type ## _loadu_ps(output4);\n\n#define LOAD_SUM_6(simd_type) \\\nLOAD_SUM_5(simd_type) \\\nsum5 = _m ## simd_type ## _loadu_ps(output5);\n\n#define LOAD_SUM_7(simd_type) \\\nLOAD_SUM_6(simd_type) \\\nsum6 = _m ## simd_type ## _loadu_ps(output6);\n\n#define LOAD_SUM_8(simd_type) \\\nLOAD_SUM_7(simd_type) \\\nsum7 = _m ## simd_type ## _loadu_ps(output7);\n\n/* Sums store */\n#define STORE_SUM_1(simd_type) \\\n_m ## simd_type ## _storeu_ps(output0, sum0);\n\n#define STORE_SUM_2(simd_type) \\\nSTORE_SUM_1(simd_type) \\\n_m ## simd_type ## _storeu_ps(output1, sum1);\n\n#define STORE_SUM_4(simd_type) \\\nSTORE_SUM_2(simd_type) \\\n_m ## simd_type ## _storeu_ps(output2, sum2); \\\n_m ## simd_type ## _storeu_ps(output3, sum3);\n\n#define STORE_SUM_5(simd_type) \\\nSTORE_SUM_4(simd_type) \\\n_m ## simd_type ## _storeu_ps(output4, sum4);\n\n#define STORE_SUM_6(simd_type) \\\nSTORE_SUM_5(simd_type) \\\n_m ## simd_type ## _storeu_ps(output5, sum5);\n\n#define STORE_SUM_7(simd_type) \\\nSTORE_SUM_6(simd_type) \\\n_m ## simd_type ## _storeu_ps(output6, sum6);\n\n#define STORE_SUM_8(simd_type) \\\nSTORE_SUM_7(simd_type) \\\n_m ## simd_type ## _storeu_ps(output7, sum7);\n\n/* Convolution */\n#define CONVOLVE_1ROWS(simd_type) \\\nimage0 = _m ## simd_type ## _loadu_ps(input0); \\\nimage1 = _m ## simd_type ## _loadu_ps(input1); \\\nimage2 = _m ## simd_type ## _loadu_ps(input2); \\\nimage3 = _m ## simd_type ## _loadu_ps(input3); \\\nimage4 = _m ## simd_type ## _loadu_ps(input4); \\\n\\\nsum0 = _m ## simd_type ## _add_ps(sum0, _m ## simd_type ## _mul_ps(weight0, image0)); \\\nsum0 = _m ## simd_type ## _add_ps(sum0, _m ## simd_type ## _mul_ps(weight1, image1)); \\\nsum0 = _m ## simd_type ## _add_ps(sum0, _m ## simd_type ## _mul_ps(weight2, image2)); \\\nsum0 = _m ## simd_type ## _add_ps(sum0, _m ## simd_type ## _mul_ps(weight3, image3)); \\\nsum0 = _m ## simd_type ## _add_ps(sum0, _m ## simd_type ## _mul_ps(weight4, image4));\n\n#define CONVOLVE_2ROWS(simd_type) \\\nCONVOLVE_1ROWS(simd_type) \\\nimage5 = _m ## simd_type ## _loadu_ps(input5); \\\nsum1 = _m ## simd_type ## _add_ps(sum1, _m ## simd_type ## _mul_ps(weight0, image1)); \\\nsum1 = _m ## simd_type ## _add_ps(sum1, _m ## simd_type ## _mul_ps(weight1, image2)); \\\nsum1 = _m ## simd_type ## _add_ps(sum1, _m ## simd_type ## _mul_ps(weight2, image3)); \\\nsum1 = _m ## simd_type ## _add_ps(sum1, _m ## simd_type ## _mul_ps(weight3, image4)); \\\nsum1 = _m ## simd_type ## _add_ps(sum1, _m ## simd_type ## _mul_ps(weight4, image5));\n\n#define CONVOLVE_4ROWS(simd_type) \\\nCONVOLVE_2ROWS(simd_type) \\\nimage6 = _m ## simd_type ## _loadu_ps(input6); \\\nsum2 = _m ## simd_type ## _add_ps(sum2, _m ## simd_type ## _mul_ps(weight0, image2)); \\\nsum2 = _m ## simd_type ## _add_ps(sum2, _m ## simd_type ## _mul_ps(weight1, image3)); \\\nsum2 = _m ## simd_type ## _add_ps(sum2, _m ## simd_type ## _mul_ps(weight2, image4)); \\\nsum2 = _m ## simd_type ## _add_ps(sum2, _m ## simd_type ## _mul_ps(weight3, image5)); \\\nsum2 = _m ## simd_type ## _add_ps(sum2, _m ## simd_type ## _mul_ps(weight4, image6)); \\\n\\\nimage7 = _m ## simd_type ## _loadu_ps(input7); \\\nsum3 = _m ## simd_type ## _add_ps(sum3, _m ## simd_type ## _mul_ps(weight0, image3)); \\\nsum3 = _m ## simd_type ## _add_ps(sum3, _m ## simd_type ## _mul_ps(weight1, image4)); \\\nsum3 = _m ## simd_type ## _add_ps(sum3, _m ## simd_type ## _mul_ps(weight2, image5)); \\\nsum3 = _m ## simd_type ## _add_ps(sum3, _m ## simd_type ## _mul_ps(weight3, image6)); \\\nsum3 = _m ## simd_type ## _add_ps(sum3, _m ## simd_type ## _mul_ps(weight4, image7));\n\n#define CONVOLVE_5ROWS(simd_type) \\\nCONVOLVE_4ROWS(simd_type) \\\nimage8 = _m ## simd_type ## _loadu_ps(input8); \\\nsum4 = _m ## simd_type ## _add_ps(sum4, _m ## simd_type ## _mul_ps(weight0, image4)); \\\nsum4 = _m ## simd_type ## _add_ps(sum4, _m ## simd_type ## _mul_ps(weight1, image5)); \\\nsum4 = _m ## simd_type ## _add_ps(sum4, _m ## simd_type ## _mul_ps(weight2, image6)); \\\nsum4 = _m ## simd_type ## _add_ps(sum4, _m ## simd_type ## _mul_ps(weight3, image7)); \\\nsum4 = _m ## simd_type ## _add_ps(sum4, _m ## simd_type ## _mul_ps(weight4, image8));\n\n#define CONVOLVE_6ROWS(simd_type) \\\nCONVOLVE_5ROWS(simd_type) \\\nimage9 = _m ## simd_type ## _loadu_ps(input9); \\\nsum5 = _m ## simd_type ## _add_ps(sum5, _m ## simd_type ## _mul_ps(weight0, image5)); \\\nsum5 = _m ## simd_type ## _add_ps(sum5, _m ## simd_type ## _mul_ps(weight1, image6)); \\\nsum5 = _m ## simd_type ## _add_ps(sum5, _m ## simd_type ## _mul_ps(weight2, image7)); \\\nsum5 = _m ## simd_type ## _add_ps(sum5, _m ## simd_type ## _mul_ps(weight3, image8)); \\\nsum5 = _m ## simd_type ## _add_ps(sum5, _m ## simd_type ## _mul_ps(weight4, image9));\n\n#define CONVOLVE_7ROWS(simd_type) \\\nCONVOLVE_6ROWS(simd_type) \\\nimageA = _m ## simd_type ## _loadu_ps(inputA); \\\nsum6 = _m ## simd_type ## _add_ps(sum6, _m ## simd_type ## _mul_ps(weight0, image6)); \\\nsum6 = _m ## simd_type ## _add_ps(sum6, _m ## simd_type ## _mul_ps(weight1, image7)); \\\nsum6 = _m ## simd_type ## _add_ps(sum6, _m ## simd_type ## _mul_ps(weight2, image8)); \\\nsum6 = _m ## simd_type ## _add_ps(sum6, _m ## simd_type ## _mul_ps(weight3, image9)); \\\nsum6 = _m ## simd_type ## _add_ps(sum6, _m ## simd_type ## _mul_ps(weight4, imageA));\n\n#define CONVOLVE_8ROWS(simd_type) \\\nCONVOLVE_7ROWS(simd_type) \\\nimageB = _m ## simd_type ## _loadu_ps(inputB); \\\nsum7 = _m ## simd_type ## _add_ps(sum7, _m ## simd_type ## _mul_ps(weight0, image7)); \\\nsum7 = _m ## simd_type ## _add_ps(sum7, _m ## simd_type ## _mul_ps(weight1, image8)); \\\nsum7 = _m ## simd_type ## _add_ps(sum7, _m ## simd_type ## _mul_ps(weight2, image9)); \\\nsum7 = _m ## simd_type ## _add_ps(sum7, _m ## simd_type ## _mul_ps(weight3, imageA)); \\\nsum7 = _m ## simd_type ## _add_ps(sum7, _m ## simd_type ## _mul_ps(weight4, imageB));\n\n/* Convolution MEGA macro */\n#define DECLARE_SUMX(rows) DECLARE_SUM_ ## rows\n#define LOAD_SUMX(rows) LOAD_SUM_ ## rows\n#define DECLARE_INPUTX(rows) DECLARE_INPUT_ ## rows\n#define DECLARE_IMAGEX(rows) DECLARE_IMAGE_ ## rows\n#define CONVOLVEX(rows) CONVOLVE_ ## rows ## ROWS\n#define INC_INPUTX(rows) INC_INPUT_ ## rows\n#define STORE_SUMX(rows) STORE_SUM_ ## rows\n#define INC_OUTPUTX(rows) INC_OUTPUT_ ## rows\n\n#define CONVOLUTION_LOOP(rows, simd_type, simd_inst_prefex, simd_set, i) \\\nDECLARE_SUMX(rows)(simd_type) \\\nLOAD_SUMX(rows)(simd_inst_prefex) \\\nDECLARE_WEIGHTS(simd_type) \\\nDECLARE_INPUT_0(i) \\\nDECLARE_INPUTX(rows)() \\\nDECLARE_IMAGEX(rows)(simd_type) \\\n\\\nLOAD_WEIGHTS(0, simd_inst_prefex, simd_set) \\\nCONVOLVEX(rows)(simd_inst_prefex) \\\nINC_INPUTX(rows)() \\\n\\\nLOAD_WEIGHTS(1, simd_inst_prefex, simd_set) \\\nCONVOLVEX(rows)(simd_inst_prefex) \\\nINC_INPUTX(rows)() \\\n\\\nLOAD_WEIGHTS(2, simd_inst_prefex, simd_set) \\\nCONVOLVEX(rows)(simd_inst_prefex) \\\nINC_INPUTX(rows)() \\\n\\\nLOAD_WEIGHTS(3, simd_inst_prefex, simd_set) \\\nCONVOLVEX(rows)(simd_inst_prefex) \\\nINC_INPUTX(rows)() \\\n\\\nLOAD_WEIGHTS(4, simd_inst_prefex, simd_set) \\\nCONVOLVEX(rows)(simd_inst_prefex) \\\n\\\nSTORE_SUMX(rows)(simd_inst_prefex) \\\n\\\nINC_OUTPUTX(rows)(sizeof(__ ## simd_type) / sizeof(float))\n\n\n#define CONVOLVE_8COLS_XROWS(rows, i) \\\n{ \\\nCONVOLUTION_LOOP(rows, m256, m256, _set1_ps, i) \\\n}\n\n#define CONVOLVE_4COLS_XROWS(rows, i) \\\n{ \\\nCONVOLUTION_LOOP(rows, m128, m, _set_ps1, i) \\\n}\n\n#endif\n"
  },
  {
    "path": "lib/TH/generic/simd/convolve.c",
    "content": "#if defined(__AVX__)\n\n#ifdef _MSC_VER\n#include <intrin.h>\n\nstatic __inline int __get_cpuid (unsigned int __level, unsigned int *__eax,\n                                 unsigned int *__ebx, unsigned int *__ecx,\n                                 unsigned int *__edx) {\n  unsigned int cpui[4];\n  __cpuid(cpui, __level);\n  *__eax = cpui[0]; *__ebx = cpui[1]; *__ecx = cpui[2]; *__edx = cpui[3];\n  return 1;\n}\n\nstatic void xgetbv(unsigned int op, unsigned int* eax, unsigned int* edx) {\n  *eax = 0; *edx = 0;\n  if (op == 0)\n      *eax = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);\n}\n\n#else\n\n#if __i386__\n#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \\\n__asm(\"  pushl  %%ebx\\n\" \\\n\"  cpuid\\n\" \\\n\"  mov    %%ebx,%1\\n\" \\\n\"  popl   %%ebx\" \\\n: \"=a\"(__eax), \"=r\" (__ebx), \"=c\"(__ecx), \"=d\"(__edx) \\\n: \"0\"(__level))\n#else\n#define __cpuid(__level, __eax, __ebx, __ecx, __edx) \\\n__asm(\"cpuid\" : \"=a\"(__eax), \"=b\" (__ebx), \"=c\"(__ecx), \"=d\"(__edx) \\\n: \"0\"(__level))\n#endif\n\nstatic __inline int __get_cpuid (unsigned int __level, unsigned int *__eax,\n                                 unsigned int *__ebx, unsigned int *__ecx,\n                                 unsigned int *__edx) {\n  __cpuid(__level, *__eax, *__ebx, *__ecx, *__edx);\n  return 1;\n}\n\nstatic void xgetbv(unsigned int op, unsigned int* eax, unsigned int* edx) {\n  __asm__ __volatile__\n  (\".byte 0x0f, 0x01, 0xd0\": \"=a\" (*eax), \"=d\" (*edx) : \"c\" (op) : \"cc\");\n}\n\n#endif\n\nenum ECPUFeature\n{\n  kCPUFeature_SSE = 0x01,\n  kCPUFeature_SSE2 = 0x02,\n  kCPUFeature_SSE3 = 0x04,\n  kCPUFeature_SSE3_S = 0x08,\n  kCPUFeature_SSE4_1 = 0x10,\n  kCPUFeature_SSE4_2 = 0x20,\n  kCPUFeature_AVX = 0x40\n};\n\nstatic unsigned int checkCPUFeatures() {\n  unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;\n  unsigned int features = 0;\n  __get_cpuid(1, &eax, &ebx, &ecx, &edx);\n  if( (edx & (1 << 25)) != 0 ) {\n    features |= kCPUFeature_SSE;\n  }\n  if( (edx & (1 << 26)) != 0 ) {\n    features |= kCPUFeature_SSE2;\n  }\n  if( (ecx & (1 << 0)) != 0 ) {\n    features |= kCPUFeature_SSE3;\n  }\n  if( (ecx & (1 << 9)) != 0 ) {\n    features |= kCPUFeature_SSE3_S;\n  }\n  if( (ecx & (1 << 19)) != 0 ) {\n    features |= kCPUFeature_SSE4_1;\n  }\n  if( (ecx & (1 << 20)) != 0 ) {\n    features |= kCPUFeature_SSE4_2;\n  }\n  if( (ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0 ) {\n    xgetbv(0, &eax, &edx);\n    if( (eax & 6) == 6 ) {\n      features |= kCPUFeature_AVX;\n    }\n  }\n  return features;\n}\n\n#include <stdio.h>\n\nstatic int haveCPUFeature(unsigned int feature) {\n  static unsigned int sCPUFeatures = 0;\n  static int sDetectedCPUFeatures = 0;\n  if (!sDetectedCPUFeatures) {\n    sDetectedCPUFeatures = 1;\n    sCPUFeatures = checkCPUFeatures();\n    if ((sCPUFeatures & kCPUFeature_AVX) != 0) {\n      printf(\"torch running avx\\n\");\n    } else {\n      printf(\"torch running sse \\n\");\n    }\n  }\n  return (sCPUFeatures & feature) != 0;\n}\n\n#endif\n\nvoid convolve_5x5_sse(float* output, float* input, float* kernel, long outRows, long outCols, long outStride, long inCols);\nvoid convolve_5x5_avx(float* output, float* input, float* kernel, long outRows, long outCols, long outStride, long inCols);\n\nvoid convolve_5x5(float* output, float* input, float* kernel, long outRows, long outCols, long inCols) {\n#if defined(__AVX__)\n  int avx = haveCPUFeature(kCPUFeature_AVX);\n  if (avx)\n  {\n    convolve_5x5_avx(output, input, kernel, outRows, outCols, outCols, inCols);\n  }\n  else\n#endif\n  {\n    convolve_5x5_sse(output, input, kernel, outRows, outCols, outCols, inCols);\n  }\n}\n"
  },
  {
    "path": "lib/TH/generic/simd/convolve.h",
    "content": "void convolve_5x5(float* output, float* input, float* kernel, long outRows, long outCols, long inCols);"
  },
  {
    "path": "lib/TH/generic/simd/convolve5x5_avx.c",
    "content": "#include <immintrin.h>\n#include \"common_simd.h\"\n\n#define CLEAR_AVX() _mm256_zeroupper()\n\nvoid convolve_5x5_1_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {\n  long i = 0;\n  long alignedCount = count & 0xFFFFFFF8;\n  DECLARE_OUTPUT_1()\n  for (; i < alignedCount; i+=8) {\n    CONVOLVE_8COLS_XROWS(1, i)\n  }\n}\n\nvoid convolve_5x5_2_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {\n  long i = 0;\n  long alignedCount = count & 0xFFFFFFF8;\n  DECLARE_OUTPUT_2()\n  for (; i < alignedCount; i+=8) {\n    CONVOLVE_8COLS_XROWS(2, i)\n  }\n}\n\nvoid convolve_5x5_4_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {\n  long i = 0;\n  long alignedCount = count & 0xFFFFFFF8;\n  DECLARE_OUTPUT_4()\n  for (; i < alignedCount; i+=8) {\n    CONVOLVE_8COLS_XROWS(4, i)\n  }\n}\n\nvoid convolve_5x5_5_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {\n  long i = 0;\n  long alignedCount = count & 0xFFFFFFF8;\n  DECLARE_OUTPUT_5()\n  for (; i < alignedCount; i+=8) {\n    CONVOLVE_8COLS_XROWS(5, i)\n  }\n}\n\nvoid convolve_5x5_6_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {\n  long i = 0;\n  long alignedCount = count & 0xFFFFFFF8;\n  DECLARE_OUTPUT_6()\n  for (; i < alignedCount; i+=8) {\n    CONVOLVE_8COLS_XROWS(6, i)\n  }\n}\n\nvoid convolve_5x5_7_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {\n  long i = 0;\n  long alignedCount = count & 0xFFFFFFF8;\n  DECLARE_OUTPUT_7()\n  for (; i < alignedCount; i+=8) {\n    CONVOLVE_8COLS_XROWS(7, i)\n  }\n}\n\nvoid convolve_5x5_8_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {\n  long i = 0;\n  long alignedCount = count & 0xFFFFFFF8;\n  DECLARE_OUTPUT_8()\n  for (; i < alignedCount; i+=8) {\n    CONVOLVE_8COLS_XROWS(8, i)\n  }\n}\n\nvoid convolve_5x5_64x64_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {\n  for(int i = 0; i < 60; i+=6)\n  {\n    DECLARE_OUTPUT_6()\n    CONVOLVE_8COLS_XROWS(6, 0)\n    CONVOLVE_8COLS_XROWS(6, 8)\n    CONVOLVE_8COLS_XROWS(6, 16)\n    CONVOLVE_8COLS_XROWS(6, 24)\n    CONVOLVE_8COLS_XROWS(6, 32)\n    CONVOLVE_8COLS_XROWS(6, 40)\n    CONVOLVE_8COLS_XROWS(6, 48)\n    CONVOLVE_8COLS_XROWS(6, 56)\n    output += outputStride * 6;\n    image += inputStride * 6;\n  }\n  DECLARE_OUTPUT_4()\n  CONVOLVE_8COLS_XROWS(4, 0)\n  CONVOLVE_8COLS_XROWS(4, 8)\n  CONVOLVE_8COLS_XROWS(4, 16)\n  CONVOLVE_8COLS_XROWS(4, 24)\n  CONVOLVE_8COLS_XROWS(4, 32)\n  CONVOLVE_8COLS_XROWS(4, 40)\n  CONVOLVE_8COLS_XROWS(4, 48)\n  CONVOLVE_8COLS_XROWS(4, 56)\n}\n\nvoid convolve_5x5_32x32_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {\n  for(int i = 0; i < 30; i+=6)\n  {\n    DECLARE_OUTPUT_6()\n    CONVOLVE_8COLS_XROWS(6, 0)\n    CONVOLVE_8COLS_XROWS(6, 8)\n    CONVOLVE_8COLS_XROWS(6, 16)\n    CONVOLVE_8COLS_XROWS(6, 24)\n    output += outputStride * 6;\n    image += inputStride * 6;\n  }\n  DECLARE_OUTPUT_2()\n  CONVOLVE_8COLS_XROWS(2, 0)\n  CONVOLVE_8COLS_XROWS(2, 8)\n  CONVOLVE_8COLS_XROWS(2, 16)\n  CONVOLVE_8COLS_XROWS(2, 24)\n}\n\nvoid convolve_5x5_16x16_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {\n  for(int i = 0; i < 12; i+=6)\n  {\n    DECLARE_OUTPUT_6()\n    CONVOLVE_8COLS_XROWS(6, 0)\n    CONVOLVE_8COLS_XROWS(6, 8)\n    output += outputStride * 6;\n    image += inputStride * 6;\n  }\n  DECLARE_OUTPUT_4()\n  CONVOLVE_8COLS_XROWS(4, 0)\n  CONVOLVE_8COLS_XROWS(4, 8)\n}\n\nvoid convolve_5x5_8x8_avx(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {\n  DECLARE_OUTPUT_8()\n  CONVOLVE_8COLS_XROWS(8, 0)\n}\n\nvoid convolve_5x5_sse(float* output, float* input, float* kernel, long outRows, long outCols, long outStride, long inCols);\n\nvoid convolve_5x5_avx(float* output, float* input, float* kernel, long outRows, long outCols, long outStride, long inCols) {\n  long ic = inCols;\n  long yy = 0;\n  float* t_ = input;\n  float* r_ = output;\n  float* k_ = kernel;\n\n  if((outRows == 64) && (outCols == 64)) {\n    convolve_5x5_64x64_avx(output, input, kernel, outRows, outStride, inCols);\n    return;\n  }\n\n  if((outRows == 32) && (outCols == 32)) {\n    convolve_5x5_32x32_avx(output, input, kernel, outRows, outStride, inCols);\n    return;\n  }\n\n  if((outRows == 16) && (outCols == 16)) {\n    convolve_5x5_16x16_avx(output, input, kernel, outRows, outStride, inCols);\n    return;\n  }\n\n  if((outRows == 8) && (outCols == 8)) {\n    convolve_5x5_8x8_avx(output, input, kernel, outRows, outStride, inCols);\n    return;\n  }\n\n  for(; yy < (outRows / 6 ) * 6; yy += 6) {\n    float *pi_ = t_ + yy*ic;\n    float *pw_ = k_;\n    float *pis_ = pi_;\n    convolve_5x5_6_avx(r_, pis_, pw_, outCols, outStride, ic);\n    r_ += (outStride * 6);\n  }\n\n  // more than 2 rows left to process and we ended up on a non-multiple of 4\n  if((yy < (outRows & 0xFFFFFFFE)) && ((yy % 4) != 0)) {\n    // process 2 rows to align on the next multiple of 4 rows (because we were a multiple of 6 after the previous loop)\n    float *pi_ = t_ + yy*ic;\n    float *pw_ = k_;\n    float *pis_ = pi_;\n    convolve_5x5_2_avx(r_, pis_, pw_, outCols, outStride, ic);\n    r_ += (outStride * 2);\n    yy += 2;\n  }\n\n  for(; yy < (outRows & 0xFFFFFFFC); yy += 4) {\n    float *pi_ = t_ + yy*ic;\n    float *pw_ = k_;\n    float *pis_ = pi_;\n    convolve_5x5_4_avx(r_, pis_, pw_, outCols, outStride, ic);\n    r_ += (outStride * 4);\n  }\n\n  for(; yy < (outRows & 0xFFFFFFFE); yy += 2) {\n    float *pi_ = t_ + yy*ic;\n    float *pw_ = k_;\n    float *pis_ = pi_;\n    convolve_5x5_2_avx(r_, pis_, pw_, outCols, outStride, ic);\n    r_ += (outStride * 2);\n  }\n\n  for(; yy < outRows; yy += 1) {\n    float *pi_ = t_ + yy*ic;\n    float *pw_ = k_;\n    float *pis_ = pi_;\n    convolve_5x5_1_avx(r_, pis_, pw_, outCols, outStride, ic);\n    r_ += (outStride * 1);\n  }\n\n  long procCols = outCols & 0xFFFFFFF8; // avx version processes 8 cols at a time\n  long remCols = outCols - procCols;\n\n  //process the rest using sse\n  if( remCols > 0) {\n    CLEAR_AVX();\n    convolve_5x5_sse(&output[procCols], &input[procCols], kernel, outRows, remCols, outStride, inCols);\n  }\n}"
  },
  {
    "path": "lib/TH/generic/simd/convolve5x5_sse.c",
    "content": "#include <smmintrin.h>\n#include \"common_simd.h\"\n\n\n/* SSE variants */\nvoid convolve_5x5_1_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {\n  long i = 0;\n  long alignedCount4 = count & 0xFFFFFFFC;\n  DECLARE_OUTPUT_1()\n  for (; i < alignedCount4; i+=4) {\n    CONVOLVE_4COLS_XROWS(1, i)\n  }\n  for (; i < (count); i++) {\n    float output0 = output[i + outputStride * 0];\n    int row;\n    for (row = 0; row < 5; row++) {\n      int col;\n      for (col = 0; col < 5; col++) {\n        output0 += weight[5 * row + col] * image[i + (row + 0) * inputStride + col];\n      }\n    }\n    output[i + outputStride * 0] = output0;\n  }\n}\n\nvoid convolve_5x5_2_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {\n  long i = 0;\n  long alignedCount4 = count & 0xFFFFFFFC;\n  DECLARE_OUTPUT_2()\n  for (; i < alignedCount4; i+=4) {\n    CONVOLVE_4COLS_XROWS(2, i)\n  }\n  for (; i < (count); i++) {\n    float output0 = output[i + outputStride * 0];\n    float output1 = output[i + outputStride * 1];\n    int row;\n    for (row = 0; row < 5; row++) {\n      int col;\n      for (col = 0; col < 5; col++) {\n        output0 += weight[5 * row + col] * image[i + (row + 0) * inputStride + col];\n        output1 += weight[5 * row + col] * image[i + (row + 1) * inputStride + col];\n      }\n    }\n    output[i + outputStride * 0] = output0;\n    output[i + outputStride * 1] = output1;\n  }\n}\n\nvoid convolve_5x5_4_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {\n  long i = 0;\n  long alignedCount4 = count & 0xFFFFFFFC;\n  DECLARE_OUTPUT_4()\n  for (; i < alignedCount4; i+=4) {\n    CONVOLVE_4COLS_XROWS(4, i)\n  }\n  for (; i < (count); i++) {\n    float output0 = output[i + outputStride * 0];\n    float output1 = output[i + outputStride * 1];\n    float output2 = output[i + outputStride * 2];\n    float output3 = output[i + outputStride * 3];\n    int row;\n    for (row = 0; row < 5; row++) {\n      int col;\n      for (col = 0; col < 5; col++) {\n        output0 += weight[5 * row + col] * image[i + (row + 0) * inputStride + col];\n        output1 += weight[5 * row + col] * image[i + (row + 1) * inputStride + col];\n        output2 += weight[5 * row + col] * image[i + (row + 2) * inputStride + col];\n        output3 += weight[5 * row + col] * image[i + (row + 3) * inputStride + col];\n      }\n    }\n    output[i + outputStride * 0] = output0;\n    output[i + outputStride * 1] = output1;\n    output[i + outputStride * 2] = output2;\n    output[i + outputStride * 3] = output3;\n  }\n}\n\nvoid convolve_5x5_6_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {\n  long i = 0;\n  long alignedCount4 = count & 0xFFFFFFFC;\n  DECLARE_OUTPUT_6()\n  for (; i < alignedCount4; i+=4) {\n    CONVOLVE_4COLS_XROWS(6, i)\n  }\n  for (; i<(count); i++) {\n    float output0 = output[i + outputStride * 0];\n    float output1 = output[i + outputStride * 1];\n    float output2 = output[i + outputStride * 2];\n    float output3 = output[i + outputStride * 3];\n    float output4 = output[i + outputStride * 4];\n    float output5 = output[i + outputStride * 5];\n    int row;\n    for (row = 0; row < 5; row++) {\n      int col;\n      for (col = 0; col < 5; col++) {\n        output0 += weight[5 * row + col] * image[i + (row + 0) * inputStride + col];\n        output1 += weight[5 * row + col] * image[i + (row + 1) * inputStride + col];\n        output2 += weight[5 * row + col] * image[i + (row + 2) * inputStride + col];\n        output3 += weight[5 * row + col] * image[i + (row + 3) * inputStride + col];\n        output4 += weight[5 * row + col] * image[i + (row + 4) * inputStride + col];\n        output5 += weight[5 * row + col] * image[i + (row + 5) * inputStride + col];\n      }\n    }\n    output[i + outputStride * 0] = output0;\n    output[i + outputStride * 1] = output1;\n    output[i + outputStride * 2] = output2;\n    output[i + outputStride * 3] = output3;\n    output[i + outputStride * 4] = output4;\n    output[i + outputStride * 5] = output5;\n  }\n}\n\nvoid convolve_5x5_8_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {\n  long i = 0;\n  long alignedCount4 = count & 0xFFFFFFFC;\n  DECLARE_OUTPUT_8()\n  for (; i < alignedCount4; i+=4) {\n    CONVOLVE_4COLS_XROWS(8, i)\n  }\n  for (; i<(count); i++) {\n    float output0 = output[i + outputStride * 0];\n    float output1 = output[i + outputStride * 1];\n    float output2 = output[i + outputStride * 2];\n    float output3 = output[i + outputStride * 3];\n    float output4 = output[i + outputStride * 4];\n    float output5 = output[i + outputStride * 5];\n    float output6 = output[i + outputStride * 6];\n    float output7 = output[i + outputStride * 7];\n    int row;\n    for (row = 0; row < 5; row++) {\n      int col;\n      for (col = 0; col < 5; col++) {\n        output0 += weight[5 * row + col] * image[i + (row + 0) * inputStride + col];\n        output1 += weight[5 * row + col] * image[i + (row + 1) * inputStride + col];\n        output2 += weight[5 * row + col] * image[i + (row + 2) * inputStride + col];\n        output3 += weight[5 * row + col] * image[i + (row + 3) * inputStride + col];\n        output4 += weight[5 * row + col] * image[i + (row + 4) * inputStride + col];\n        output5 += weight[5 * row + col] * image[i + (row + 5) * inputStride + col];\n        output6 += weight[5 * row + col] * image[i + (row + 6) * inputStride + col];\n        output7 += weight[5 * row + col] * image[i + (row + 7) * inputStride + col];\n      }\n    }\n    output[i + outputStride * 0] = output0;\n    output[i + outputStride * 1] = output1;\n    output[i + outputStride * 2] = output2;\n    output[i + outputStride * 3] = output3;\n    output[i + outputStride * 4] = output4;\n    output[i + outputStride * 5] = output5;\n    output[i + outputStride * 6] = output6;\n    output[i + outputStride * 7] = output7;\n  }\n}\n\n#define UNROLL_SSE_CONVOLUTION 0\n#if (UNROLL_SSE_CONVOLUTION)\n\nvoid convolve_5x5_64x64_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {\n  for(int i = 0; i < 60; i+=6)\n  {\n    DECLARE_OUTPUT_6()\n    CONVOLVE_4COLS_XROWS(6, 0)\n    CONVOLVE_4COLS_XROWS(6, 4)\n    CONVOLVE_4COLS_XROWS(6, 8)\n    CONVOLVE_4COLS_XROWS(6, 12)\n    CONVOLVE_4COLS_XROWS(6, 16)\n    CONVOLVE_4COLS_XROWS(6, 20)\n    CONVOLVE_4COLS_XROWS(6, 24)\n    CONVOLVE_4COLS_XROWS(6, 28)\n    CONVOLVE_4COLS_XROWS(6, 32)\n    CONVOLVE_4COLS_XROWS(6, 36)\n    CONVOLVE_4COLS_XROWS(6, 40)\n    CONVOLVE_4COLS_XROWS(6, 44)\n    CONVOLVE_4COLS_XROWS(6, 48)\n    CONVOLVE_4COLS_XROWS(6, 52)\n    CONVOLVE_4COLS_XROWS(6, 56)\n    CONVOLVE_4COLS_XROWS(6, 60)\n    output += outputStride * 6;\n    image += inputStride * 6;\n  }\n  DECLARE_OUTPUT_4()\n  CONVOLVE_4COLS_XROWS(4, 0)\n  CONVOLVE_4COLS_XROWS(4, 4)\n  CONVOLVE_4COLS_XROWS(4, 8)\n  CONVOLVE_4COLS_XROWS(4, 12)\n  CONVOLVE_4COLS_XROWS(4, 16)\n  CONVOLVE_4COLS_XROWS(4, 20)\n  CONVOLVE_4COLS_XROWS(4, 24)\n  CONVOLVE_4COLS_XROWS(4, 28)\n  CONVOLVE_4COLS_XROWS(4, 32)\n  CONVOLVE_4COLS_XROWS(4, 36)\n  CONVOLVE_4COLS_XROWS(4, 40)\n  CONVOLVE_4COLS_XROWS(4, 44)\n  CONVOLVE_4COLS_XROWS(4, 48)\n  CONVOLVE_4COLS_XROWS(4, 52)\n  CONVOLVE_4COLS_XROWS(4, 56)\n  CONVOLVE_4COLS_XROWS(4, 60)\n}\n\nvoid convolve_5x5_32x32_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {\n  for(int i = 0; i < 30; i+=6)\n  {\n    DECLARE_OUTPUT_6()\n\n      CONVOLVE_4COLS_XROWS(6, 0)\n      CONVOLVE_4COLS_XROWS(6, 4)\n      CONVOLVE_4COLS_XROWS(6, 8)\n      CONVOLVE_4COLS_XROWS(6, 12)\n      CONVOLVE_4COLS_XROWS(6, 16)\n      CONVOLVE_4COLS_XROWS(6, 20)\n      CONVOLVE_4COLS_XROWS(6, 24)\n      CONVOLVE_4COLS_XROWS(6, 28)\n\n    output += outputStride * 6;\n    image += inputStride * 6;\n  }\n  DECLARE_OUTPUT_2()\n  CONVOLVE_4COLS_XROWS(2, 0)\n  CONVOLVE_4COLS_XROWS(2, 4)\n  CONVOLVE_4COLS_XROWS(2, 8)\n  CONVOLVE_4COLS_XROWS(2, 12)\n  CONVOLVE_4COLS_XROWS(2, 16)\n  CONVOLVE_4COLS_XROWS(2, 20)\n  CONVOLVE_4COLS_XROWS(2, 24)\n  CONVOLVE_4COLS_XROWS(2, 28)\n}\n\nvoid convolve_5x5_16x16_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {\n  for(int i = 0; i < 12; i+=6)\n  {\n    DECLARE_OUTPUT_6()\n    CONVOLVE_4COLS_XROWS(6, 0)\n    CONVOLVE_4COLS_XROWS(6, 4)\n    CONVOLVE_4COLS_XROWS(6, 8)\n    CONVOLVE_4COLS_XROWS(6, 12)\n    output += outputStride * 6;\n    image += inputStride * 6;\n  }\n  DECLARE_OUTPUT_4()\n  CONVOLVE_4COLS_XROWS(4, 0)\n  CONVOLVE_4COLS_XROWS(4, 4)\n  CONVOLVE_4COLS_XROWS(4, 8)\n  CONVOLVE_4COLS_XROWS(4, 12)\n}\n\nvoid convolve_5x5_8x8_sse(float* output, float* image, float* weight, long count, long outputStride, long inputStride) {\n  DECLARE_OUTPUT_8()\n  CONVOLVE_4COLS_XROWS(8, 0)\n  CONVOLVE_4COLS_XROWS(8, 4)\n}\n\n#endif\n\nvoid convolve_5x5_sse(float* output, float* input, float* kernel, long outRows, long outCols, long outStride, long inCols) {\n  long yy = 0;\n  float* t_ = input;\n  float* r_ = output;\n  float* k_ = kernel;\n#if (UNROLL_SSE_CONVOLUTION)\n  if((outRows == 64) && (outCols == 64)) {\n    convolve_5x5_64x64_sse(output, input, kernel, outRows, outStride, inCols);\n    return;\n  }\n\n  if((outRows == 32) && (outCols == 32)) {\n    convolve_5x5_32x32_sse(output, input, kernel, outRows, outStride, inCols);\n    return;\n  }\n\n  if((outRows == 16) && (outCols == 16)) {\n    convolve_5x5_16x16_sse(output, input, kernel, outRows, outStride, inCols);\n    return;\n  }\n\n  if((outRows == 8) && (outCols == 8)) {\n    convolve_5x5_8x8_sse(output, input, kernel, outRows, outStride, inCols);\n    return;\n  }\n#endif\n  for(; yy < (outRows / 6 ) * 6; yy += 6) {\n    float *pi_ = t_ + yy*inCols;\n    float *pw_ = k_;\n    float *pis_ = pi_;\n    convolve_5x5_6_sse(r_, pis_, pw_, outCols, outStride, inCols);\n    r_ += (outStride * 6);\n  }\n  // more than 2 rows left to process and we ended up on a non-multiple of 4\n  if((yy < (outRows & 0xFFFFFFFE)) && ((yy % 4) != 0)) {\n    // process 2 rows to align on the next multiple of 4 rows (because we were a multiple of 6 after the previous loop)\n    float *pi_ = t_ + yy*inCols;\n    float *pw_ = k_;\n    float *pis_ = pi_;\n    convolve_5x5_2_sse(r_, pis_, pw_, outCols, outStride, inCols);\n    r_ += (outStride * 2);\n    yy += 2;\n  }\n\n  for(; yy < (outRows & 0xFFFFFFFC); yy += 4) {\n    float *pi_ = t_ + yy*inCols;\n    float *pw_ = k_;\n    float *pis_ = pi_;\n    convolve_5x5_4_sse(r_, pis_, pw_, outCols, outStride, inCols);\n    r_ += (outStride * 4);\n  }\n\n  for(; yy < (outRows & 0xFFFFFFFE); yy += 2) {\n    float *pi_ = t_ + yy*inCols;\n    float *pw_ = k_;\n    float *pis_ = pi_;\n    convolve_5x5_2_sse(r_, pis_, pw_, outCols, outStride, inCols);\n    r_ += (outStride * 2);\n  }\n\n  for(; yy < outRows; yy += 1) {\n    float *pi_ = t_ + yy*inCols;\n    float *pw_ = k_;\n    float *pis_ = pi_;\n    convolve_5x5_1_sse(r_, pis_, pw_, outCols, outStride, inCols);\n    r_ += (outStride * 1);\n  }\n}\n"
  },
  {
    "path": "lib/TH/generic/simd/simd.h",
    "content": "#ifndef TH_SIMD_INC\n#define TH_SIMD_INC\n\n#include <stdint.h>\n#include <stdlib.h>\n#if defined(_MSC_VER)\n#include <intrin.h>\n#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)\n#include <cpuid.h>\n#endif\n\n// Can be found on Intel ISA Reference for CPUID\n#define CPUID_AVX2_BIT 0x20       // Bit 5 of EBX for EAX=0x7\n#define CPUID_AVX_BIT  0x10000000 // Bit 28 of ECX for EAX=0x1\n#define CPUID_SSE_BIT  0x2000000  // bit 25 of EDX for EAX=0x1\n\n// Helper macros for initialization\n#define FUNCTION_IMPL(NAME, EXT) \\\n    { .function=(void *)NAME,    \\\n      .supportedSimdExt=EXT      \\\n    }\n\n#define INIT_DISPATCH_PTR(OP)    \\\n  do {                           \\\n    int i;                       \\\n    for (i = 0; i < sizeof(THVector_(OP ## _DISPATCHTABLE)) / sizeof(FunctionDescription); ++i) { \\\n      THVector_(OP ## _DISPATCHPTR) = THVector_(OP ## _DISPATCHTABLE)[i].function;                     \\\n      if (THVector_(OP ## _DISPATCHTABLE)[i].supportedSimdExt & hostSimdExts) {                       \\\n        break;                                                                                     \\\n      }                                                                                            \\\n    }                                                                                              \\\n  } while(0)\n\n\ntypedef struct FunctionDescription\n{\n  void *function;\n  uint32_t supportedSimdExt;\n} FunctionDescription;\n\n\nenum SIMDExtensions\n{\n#if defined(__NEON__)\n  SIMDExtension_NEON    = 0x1,\n#elif defined(__PPC64__)\n  SIMDExtension_VSX     = 0x1,\n#else\n  SIMDExtension_AVX2    = 0x1,\n  SIMDExtension_AVX     = 0x2,\n  SIMDExtension_SSE     = 0x4,\n#endif\n  SIMDExtension_DEFAULT = 0x0\n};\n\n\n#if defined(__arm__) || defined(__aarch64__) // incl. armel, armhf, arm64\n\n #if defined(__NEON__)\n\nstatic inline uint32_t detectHostSIMDExtensions()\n{\n  return SIMDExtension_NEON;\n}\n\n #else //ARM without NEON\n\nstatic inline uint32_t detectHostSIMDExtensions()\n{\n  return SIMDExtension_DEFAULT;\n}\n\n #endif\n\n#elif defined(__PPC64__)\n\n #if defined(__VSX__)\n\nstatic inline uint32_t detectHostSIMDExtensions()\n{\n  uint32_t hostSimdExts = SIMDExtension_DEFAULT;\n  char *evar;\n\n  evar = getenv(\"TH_NO_VSX\");\n  if (evar == NULL || strncmp(evar, \"1\", 2) != 0)\n    hostSimdExts = SIMDExtension_VSX;\n  return hostSimdExts;\n}\n\n #else //PPC64 without VSX\n\nstatic inline uint32_t detectHostSIMDExtensions()\n{\n  return SIMDExtension_DEFAULT;\n}\n\n #endif\n\n#else   // x86\nstatic inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)\n{\n#if defined(_MSC_VER)\n  uint32_t cpuInfo[4];\n  __cpuid(cpuInfo, *eax);\n  *eax = cpuInfo[0];\n  *ebx = cpuInfo[1];\n  *ecx = cpuInfo[2];\n  *edx = cpuInfo[3];\n#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)\n  uint32_t level = *eax;\n  __get_cpuid (level, eax, ebx, ecx, edx);\n#else\n  uint32_t a = *eax, b, c = *ecx, d;\n  asm volatile ( \"cpuid\\n\\t\"\n\t\t : \"+a\"(a), \"=b\"(b), \"+c\"(c), \"=d\"(d) );\n  *eax = a;\n  *ebx = b;\n  *ecx = c;\n  *edx = d;\n#endif\n}\n\nstatic inline uint32_t detectHostSIMDExtensions()\n{\n  uint32_t eax, ebx, ecx, edx;\n  uint32_t hostSimdExts = 0x0;\n  int TH_NO_AVX = 1, TH_NO_AVX2 = 1, TH_NO_SSE = 1;\n  char *evar;\n\n  evar = getenv(\"TH_NO_AVX2\");\n  if (evar == NULL || strncmp(evar, \"1\", 2) != 0)\n    TH_NO_AVX2 = 0;\n\n  // Check for AVX2. Requires separate CPUID\n  eax = 0x7;\n  ecx = 0x0;\n  cpuid(&eax, &ebx, &ecx, &edx);\n  if ((ebx & CPUID_AVX2_BIT) && TH_NO_AVX2 == 0) {\n    hostSimdExts |= SIMDExtension_AVX2;\n  }\n\n  // Detect and enable AVX and SSE\n  eax = 0x1;\n  cpuid(&eax, &ebx, &ecx, &edx);\n\n  evar = getenv(\"TH_NO_AVX\");\n  if (evar == NULL || strncmp(evar, \"1\", 2) != 0)\n    TH_NO_AVX = 0;\n  if (ecx & CPUID_AVX_BIT && TH_NO_AVX == 0) {\n    hostSimdExts |= SIMDExtension_AVX;\n  }\n\n  evar = getenv(\"TH_NO_SSE\");\n  if (evar == NULL || strncmp(evar, \"1\", 2) != 0)\n    TH_NO_SSE = 0;\n  if (edx & CPUID_SSE_BIT && TH_NO_SSE == 0) {\n    hostSimdExts |= SIMDExtension_SSE;\n  }\n\n  return hostSimdExts;\n}\n\n#endif // end SIMD extension detection code\n\n#endif\n"
  },
  {
    "path": "lib/TH/vector/AVX.c",
    "content": "#if defined(__AVX__)\n#ifndef _MSC_VER\n#include <x86intrin.h>\n#else\n#include <intrin.h>\n#endif\n\n#include \"AVX.h\"\n\nvoid THDoubleVector_copy_AVX(double *y, const double *x, const ptrdiff_t n) {\n  ptrdiff_t i;\n  ptrdiff_t off;\n  for (i=0; i<=((n)-8); i+=8) {\n    _mm256_storeu_pd(y+i, _mm256_loadu_pd(x+i));\n    _mm256_storeu_pd(y+i+4, _mm256_loadu_pd(x+i+4));\n  }\n  off = (n) - ((n)%8);\n  for (i=0; i<((n)%8); i++) {\n    y[off+i] = x[off+i];\n  }\n}\n\nvoid THDoubleVector_fill_AVX(double *x, const double c, const ptrdiff_t n) {\n  ptrdiff_t i;\n  ptrdiff_t off;\n  __m256d YMM0 = _mm256_set_pd(c, c, c, c);\n  for (i=0; i<=((n)-16); i+=16) {\n    _mm256_storeu_pd((x)+i  , YMM0);\n    _mm256_storeu_pd((x)+i+4, YMM0);\n    _mm256_storeu_pd((x)+i+8, YMM0);\n    _mm256_storeu_pd((x)+i+12, YMM0);\n  }\n  off = (n) - ((n)%16);\n  for (i=0; i<((n)%16); i++) {\n    x[off+i] = c;\n  }\n}\n\nvoid THDoubleVector_cdiv_AVX(double *z, const double *x, const double *y, const ptrdiff_t n) {\n  ptrdiff_t i;\n  __m256d YMM0, YMM1, YMM2, YMM3;\n  for (i=0; i<=((n)-8); i+=8) {\n    YMM0 = _mm256_loadu_pd(x+i);\n    YMM1 = _mm256_loadu_pd(x+i+4);\n    YMM2 = _mm256_loadu_pd(y+i);\n    YMM3 = _mm256_loadu_pd(y+i+4);\n    YMM2 = _mm256_div_pd(YMM0, YMM2);\n    YMM3 = _mm256_div_pd(YMM1, YMM3);\n    _mm256_storeu_pd(z+i, YMM2);\n    _mm256_storeu_pd(z+i+4, YMM3);\n  }\n  for (; i<(n); i++) {\n    z[i] = x[i] / y[i];\n  }\n}\n\nvoid THDoubleVector_divs_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {\n  ptrdiff_t i;\n  __m256d YMM15 = _mm256_set_pd(c, c, c, c);\n  __m256d YMM0, YMM1;\n  for (i=0; i<=((n)-8); i+=8) {\n    YMM0 = _mm256_loadu_pd(x+i);\n    YMM1 = _mm256_loadu_pd(x+i+4);\n    YMM0 = _mm256_div_pd(YMM0, YMM15);\n    YMM1 = _mm256_div_pd(YMM1, YMM15);\n    _mm256_storeu_pd(y+i, YMM0);\n    _mm256_storeu_pd(y+i+4, YMM1);\n  }\n  for (; i<(n); i++) {\n    y[i] = x[i] / c;\n  }\n}\n\nvoid THDoubleVector_cmul_AVX(double *z, const double *x, const double *y, const ptrdiff_t n) {\n  ptrdiff_t i;\n  __m256d YMM0, YMM1, YMM2, YMM3;\n  for (i=0; i<=((n)-8); i+=8) {\n    YMM0 = _mm256_loadu_pd(x+i);\n    YMM1 = _mm256_loadu_pd(x+i+4);\n    YMM2 = _mm256_loadu_pd(y+i);\n    YMM3 = _mm256_loadu_pd(y+i+4);\n    YMM2 = _mm256_mul_pd(YMM0, YMM2);\n    YMM3 = _mm256_mul_pd(YMM1, YMM3);\n    _mm256_storeu_pd(z+i, YMM2);\n    _mm256_storeu_pd(z+i+4, YMM3);\n  }\n  for (; i<n; i++) {\n    z[i] = x[i] * y[i];\n  }\n}\n\nvoid THDoubleVector_muls_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {\n  ptrdiff_t i;\n  __m256d YMM15 = _mm256_set_pd(c, c, c, c);\n  __m256d YMM0, YMM1;\n  for (i=0; i<=((n)-8); i+=8) {\n    YMM0 = _mm256_loadu_pd(x+i);\n    YMM1 = _mm256_loadu_pd(x+i+4);\n    YMM0 = _mm256_mul_pd(YMM0, YMM15);\n    YMM1 = _mm256_mul_pd(YMM1, YMM15);\n    _mm256_storeu_pd(y+i, YMM0);\n    _mm256_storeu_pd(y+i+4, YMM1);\n  }\n  for (; i<n; i++) {\n    y[i] = x[i] * c;\n  }\n}\n\nvoid THDoubleVector_cadd_AVX(double *z, const double *x, const double *y, const double c, const ptrdiff_t n) {\n  ptrdiff_t i;\n  __m256d YMM15 = _mm256_set_pd(c, c, c, c);\n  __m256d YMM0, YMM1, YMM2, YMM3;\n  for (i=0; i<=((n)-4); i+=4) {\n    YMM0 = _mm256_loadu_pd(y+i);\n    YMM1 = _mm256_loadu_pd(x+i);\n    YMM2 = _mm256_mul_pd(YMM0, YMM15);\n    YMM3 = _mm256_add_pd(YMM1, YMM2);\n    _mm256_storeu_pd(z+i, YMM3);\n  }\n  for (; i<(n); i++) {\n    z[i] = x[i] + y[i] * c;\n  }\n}\n\nvoid THDoubleVector_adds_AVX(double *y, const double *x, const double c, const ptrdiff_t n) {\n  ptrdiff_t i;\n  __m256d YMM15 = _mm256_set_pd(c, c, c, c);\n  __m256d YMM0, YMM1;\n  for (i=0; i<=((n)-8); i+=8) {\n    YMM0 = _mm256_loadu_pd(x+i);\n    YMM1 = _mm256_loadu_pd(x+i+4);\n    YMM0 = _mm256_add_pd(YMM0, YMM15);\n    YMM1 = _mm256_add_pd(YMM1, YMM15);\n    _mm256_storeu_pd(y+i, YMM0);\n    _mm256_storeu_pd(y+i+4, YMM1);\n  }\n  for (; i<(n); i++) {\n    y[i] = x[i] + c;\n  }\n}\n\nvoid THFloatVector_copy_AVX(float *y, const float *x, const ptrdiff_t n) {\n  ptrdiff_t i;\n  ptrdiff_t off;\n  for (i=0; i<=((n)-16); i+=16) {\n    _mm256_storeu_ps(y+i, _mm256_loadu_ps(x+i));\n    _mm256_storeu_ps(y+i+8, _mm256_loadu_ps(x+i+8));\n  }\n  off = (n) - ((n)%16);\n  for (i=0; i<((n)%16); i++) {\n    y[off+i] = x[off+i];\n  }\n}\n\nvoid THFloatVector_fill_AVX(float *x, const float c, const ptrdiff_t n) {\n  ptrdiff_t i;\n  ptrdiff_t off;\n  __m256 YMM0 = _mm256_set_ps(c, c, c, c, c, c, c, c);\n  for (i=0; i<=((n)-32); i+=32) {\n    _mm256_storeu_ps((x)+i  , YMM0);\n    _mm256_storeu_ps((x)+i+8, YMM0);\n    _mm256_storeu_ps((x)+i+16, YMM0);\n    _mm256_storeu_ps((x)+i+24, YMM0);\n  }\n  off = (n) - ((n)%32);\n  for (i=0; i<((n)%32); i++) {\n    x[off+i] = c;\n  }\n}\n\nvoid THFloatVector_cdiv_AVX(float *z, const float *x, const float *y, const ptrdiff_t n) {\n  ptrdiff_t i;\n  __m256 YMM0, YMM1, YMM2, YMM3;\n  for (i=0; i<=((n)-16); i+=16) {\n    YMM0 = _mm256_loadu_ps(x+i);\n    YMM1 = _mm256_loadu_ps(x+i+8);\n    YMM2 = _mm256_loadu_ps(y+i);\n    YMM3 = _mm256_loadu_ps(y+i+8);\n    YMM2 = _mm256_div_ps(YMM0, YMM2);\n    YMM3 = _mm256_div_ps(YMM1, YMM3);\n    _mm256_storeu_ps(z+i, YMM2);\n    _mm256_storeu_ps(z+i+8, YMM3);\n  }\n  for (; i<(n); i++) {\n    z[i] = x[i] / y[i];\n  }\n}\n\nvoid THFloatVector_divs_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {\n  ptrdiff_t i;\n  __m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);\n  __m256 YMM0, YMM1;\n  for (i=0; i<=((n)-16); i+=16) {\n    YMM0 = _mm256_loadu_ps(x+i);\n    YMM1 = _mm256_loadu_ps(x+i+8);\n    YMM0 = _mm256_div_ps(YMM0, YMM15);\n    YMM1 = _mm256_div_ps(YMM1, YMM15);\n    _mm256_storeu_ps(y+i, YMM0);\n    _mm256_storeu_ps(y+i+8, YMM1);\n  }\n  for (; i<(n); i++) {\n    y[i] = x[i] / c;\n  }\n}\n\nvoid THFloatVector_cmul_AVX(float *z, const float *x, const float *y, const ptrdiff_t n) {\n  ptrdiff_t i;\n  __m256 YMM0, YMM1, YMM2, YMM3;\n  for (i=0; i<=((n)-16); i+=16) {\n    YMM0 = _mm256_loadu_ps(x+i);\n    YMM1 = _mm256_loadu_ps(x+i+8);\n    YMM2 = _mm256_loadu_ps(y+i);\n    YMM3 = _mm256_loadu_ps(y+i+8);\n    YMM2 = _mm256_mul_ps(YMM0, YMM2);\n    YMM3 = _mm256_mul_ps(YMM1, YMM3);\n    _mm256_storeu_ps(z+i, YMM2);\n    _mm256_storeu_ps(z+i+8, YMM3);\n  }\n  for (; i<n; i++) {\n    z[i] = x[i] * y[i];\n  }\n}\n\nvoid THFloatVector_muls_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {\n  ptrdiff_t i;\n  __m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);\n  __m256 YMM0, YMM1;\n  for (i=0; i<=((n)-16); i+=16) {\n    YMM0 = _mm256_loadu_ps(x+i);\n    YMM1 = _mm256_loadu_ps(x+i+8);\n    YMM0 = _mm256_mul_ps(YMM0, YMM15);\n    YMM1 = _mm256_mul_ps(YMM1, YMM15);\n    _mm256_storeu_ps(y+i, YMM0);\n    _mm256_storeu_ps(y+i+8, YMM1);\n  }\n  for (; i<n; i++) {\n    y[i] = x[i] * c;\n  }\n}\n\nvoid THFloatVector_cadd_AVX(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) {\n  ptrdiff_t i;\n  __m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);\n  __m256 YMM0, YMM1, YMM2, YMM3;\n  for (i=0; i<=((n)-8); i+=8) {\n    YMM0 = _mm256_loadu_ps(y+i);\n    YMM1 = _mm256_loadu_ps(x+i);\n    YMM2 = _mm256_mul_ps(YMM0, YMM15);\n    YMM3 = _mm256_add_ps(YMM1, YMM2);\n    _mm256_storeu_ps(z+i, YMM3);\n  }\n  for (; i<(n); i++) {\n    z[i] = x[i] + y[i] * c;\n  }\n}\n\nvoid THFloatVector_adds_AVX(float *y, const float *x, const float c, const ptrdiff_t n) {\n  ptrdiff_t i;\n  __m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);\n  __m256 YMM0, YMM1;\n  for (i=0; i<=((n)-16); i+=16) {\n    YMM0 = _mm256_loadu_ps(x+i);\n    YMM1 = _mm256_loadu_ps(x+i+8);\n    YMM0 = _mm256_add_ps(YMM0, YMM15);\n    YMM1 = _mm256_add_ps(YMM1, YMM15);\n    _mm256_storeu_ps(y+i, YMM0);\n    _mm256_storeu_ps(y+i+8, YMM1);\n  }\n  for (; i<(n); i++) {\n    y[i] = x[i] + c;\n  }\n}\n\n#endif // defined(__AVX__)\n"
  },
  {
    "path": "lib/TH/vector/AVX.h",
    "content": "#ifndef TH_AVX_H\n#define TH_AVX_H\n\n#include <stddef.h>\n\nvoid THDoubleVector_copy_AVX(double *y, const double *x, const ptrdiff_t n);\nvoid THDoubleVector_fill_AVX(double *x, const double c, const ptrdiff_t n);\nvoid THDoubleVector_cdiv_AVX(double *z, const double *x, const double *y, const ptrdiff_t n);\nvoid THDoubleVector_divs_AVX(double *y, const double *x, const double c, const ptrdiff_t n);\nvoid THDoubleVector_cmul_AVX(double *z, const double *x, const double *y, const ptrdiff_t n);\nvoid THDoubleVector_muls_AVX(double *y, const double *x, const double c, const ptrdiff_t n);\nvoid THDoubleVector_cadd_AVX(double *z, const double *x, const double *y, const double c, const ptrdiff_t n);\nvoid THDoubleVector_adds_AVX(double *y, const double *x, const double c, const ptrdiff_t n);\nvoid THFloatVector_copy_AVX(float *y, const float *x, const ptrdiff_t n);\nvoid THFloatVector_fill_AVX(float *x, const float c, const ptrdiff_t n);\nvoid THFloatVector_cdiv_AVX(float *z, const float *x, const float *y, const ptrdiff_t n);\nvoid THFloatVector_divs_AVX(float *y, const float *x, const float c, const ptrdiff_t n);\nvoid THFloatVector_cmul_AVX(float *z, const float *x, const float *y, const ptrdiff_t n);\nvoid THFloatVector_muls_AVX(float *y, const float *x, const float c, const ptrdiff_t n);\nvoid THFloatVector_cadd_AVX(float *z, const float *x, const float *y, const float c, const ptrdiff_t n);\nvoid THFloatVector_adds_AVX(float *y, const float *x, const float c, const ptrdiff_t n);\n\n#endif\n"
  },
  {
    "path": "lib/TH/vector/AVX2.c",
    "content": "#if defined(__AVX2__)\n#ifndef _MSC_VER\n#include <x86intrin.h>\n#else\n#include <intrin.h>\n#endif\n#include \"AVX2.h\"\n\nvoid THDoubleVector_cadd_AVX2(double *z, const double *x, const double *y, const double c, const ptrdiff_t n) {\n  ptrdiff_t i;\n  __m256d YMM15 = _mm256_set_pd(c, c, c, c);\n  __m256d YMM0, YMM1, YMM2, YMM3;\n  for (i=0; i<=((n)-8); i+=8) {\n    YMM0 = _mm256_loadu_pd(y+i);\n    YMM1 = _mm256_loadu_pd(y+i+4);\n    YMM2 = _mm256_loadu_pd(x+i);\n    YMM3 = _mm256_loadu_pd(x+i+4);\n    YMM2 = _mm256_fmadd_pd(YMM0, YMM15, YMM2);\n    YMM3 = _mm256_fmadd_pd(YMM1, YMM15, YMM3);\n    _mm256_storeu_pd(z+i, YMM2);\n    _mm256_storeu_pd(z+i+4, YMM3);\n  }\n  for (; i<(n); i++) {\n    z[i] = x[i] + y[i] * c;\n  }\n}\n\nvoid THFloatVector_cadd_AVX2(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) {\n  ptrdiff_t i;\n  __m256 YMM15 = _mm256_set_ps(c, c, c, c, c, c, c, c);\n  __m256 YMM0, YMM1, YMM2, YMM3;\n  for (i=0; i<=((n)-16); i+=16) {\n    YMM0 = _mm256_loadu_ps(y+i);\n    YMM1 = _mm256_loadu_ps(y+i+8);\n    YMM2 = _mm256_loadu_ps(x+i);\n    YMM3 = _mm256_loadu_ps(x+i+8);\n    YMM2 = _mm256_fmadd_ps(YMM0, YMM15, YMM2);\n    YMM3 = _mm256_fmadd_ps(YMM1, YMM15, YMM3);\n    _mm256_storeu_ps(z+i, YMM2);\n    _mm256_storeu_ps(z+i+8, YMM3);\n  }\n  for (; i<(n); i++) {\n    z[i] = x[i] + y[i] * c;\n  }\n}\n\n#endif // defined(__AVX2__)\n"
  },
  {
    "path": "lib/TH/vector/AVX2.h",
    "content": "#ifndef TH_AVX2_H\n#define TH_AVX2_H\n\n#include <stddef.h>\n\nvoid THDoubleVector_cadd_AVX2(double *z, const double *x, const double *y, const double c, const ptrdiff_t n);\nvoid THFloatVector_cadd_AVX2(float *z, const float *x, const float *y, const float c, const ptrdiff_t n);\n\n#endif\n"
  },
  {
    "path": "lib/TH/vector/NEON.c",
    "content": "static void THFloatVector_fill_NEON(float *x, const float c, const ptrdiff_t n) {\n  long i = 0;\n\n  for(; i < n-4; i += 4)\n  {\n    x[i] = c;\n    x[i+1] = c;\n    x[i+2] = c;\n    x[i+3] = c;\n  }\n\n  for(; i < n; i++)\n    x[i] = c;\n\n}\n\nstatic void THFloatVector_cmul_NEON(float *z, const float *x, const float* y, const ptrdiff_t n) {\n  long i = 0;\n\n  for(; i < n-4; i += 4)\n  {\n    z[i] = x[i] * y[i];\n    z[i+1] = x[i+1] * y[i+1];\n    z[i+2] = x[i+2] * y[i+2];\n    z[i+3] = x[i+3] * y[i+3];\n  }\n\n  for(; i < n; i++)\n    z[i] = x[i] * y[i];\n}\n\nstatic void THFloatVector_muls_NEON(float *y, const float *x, const float c, const ptrdiff_t n) {\n  long i = 0;\n\n  for(; i < n-4; i += 4)\n  {\n    y[i] = x[i] * c;\n    y[i+1] = x[i+1] * c;\n    y[i+2] = x[i+2] * c;\n    y[i+3] = x[i+3] * c;\n  }\n\n  for(; i < n; i++)\n    y[i] = x[i] * c;\n}\n\nstatic void THFloatVector_cadd_NEON(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) {\n  long i = 0;\n\n  for(;i < n-4; i += 4)\n  {\n    z[i] = x[i] + c * y[i];\n    z[i+1] = x[i+1] + c * y[i+1];\n    z[i+2] = x[i+2] + c * y[i+2];\n    z[i+3] = x[i+3] + c * y[i+3];\n  }\n\n  for(; i < n; i++)\n    z[i] = x[i] + c * y[i];\n}\n\nstatic void THFloatVector_adds_NEON(float *y, const float *x, const float c, const ptrdiff_t n) {\n  long i = 0;\n\n  for(;i < n-4; i += 4)\n  {\n    y[i] = x[i] + c;\n    y[i+1] = x[i+1] + c;\n    y[i+2] = x[i+2] + c;\n    y[i+3] = x[i+3] + c;\n  }\n\n  for(; i < n; i++)\n    y[i] = x[i] + c;\n}\n\nstatic void THFloatVector_cdiv_NEON(float *z, const float *x, const float *y, const ptrdiff_t n) {\n  long i = 0;\n\n  for(;i < n-4; i += 4)\n  {\n    z[i] = x[i] / y[i];\n    z[i+1] = x[i+1] / y[i+1];\n    z[i+2] = x[i+2] / y[i+2];\n    z[i+3] = x[i+3] / y[i+3];\n  }\n\n  for(; i < n; i++)\n    z[i] = x[i] / y[i];\n}\n\nstatic void THFloatVector_divs_NEON(float *y, const float *x, const float c, const ptrdiff_t n) {\n  long i = 0;\n\n  for(;i < n-4; i += 4)\n  {\n    y[i] = x[i] / c;\n    y[i+1] = x[i+1] / c;\n    y[i+2] = x[i+2] / c;\n    y[i+3] = x[i+3] / c;\n  }\n\n  for(; i < n; i++)\n    y[i] = x[i] / c;\n}\n"
  },
  {
    "path": "lib/TH/vector/SSE.c",
    "content": "#ifndef _MSC_VER\n#include <x86intrin.h>\n#else\n#include <intrin.h>\n#endif\n\nstatic void THDoubleVector_fill_SSE(double *x, const double c, const ptrdiff_t n) {\n  ptrdiff_t i;\n  ptrdiff_t off;\n  __m128d XMM0 = _mm_set1_pd(c);\n  for (i=0; i<=((n)-8); i+=8) {\n    _mm_storeu_pd((x)+i  , XMM0);\n    _mm_storeu_pd((x)+i+2, XMM0);\n    _mm_storeu_pd((x)+i+4, XMM0);\n    _mm_storeu_pd((x)+i+6, XMM0);\n  }\n  off = (n) - ((n)%8);\n  for (i=0; i<((n)%8); i++) {\n    x[off+i] = c;\n  }\n}\n\nstatic void THDoubleVector_cadd_SSE(double *z, const double *x, const double *y, const double c, const ptrdiff_t n) {\n  ptrdiff_t i;\n  __m128d XMM7 = _mm_set1_pd(c);\n  __m128d XMM0, XMM2;\n  for (i=0; i<=((n)-2); i+=2) {\n    XMM0 = _mm_loadu_pd((x)+i);\n    XMM2 = _mm_loadu_pd((y)+i);\n    XMM2 = _mm_mul_pd(XMM2, XMM7);\n    XMM2 = _mm_add_pd(XMM0, XMM2);\n    _mm_storeu_pd((z)+i, XMM2);\n  }\n  for (; i<(n); i++) {\n    z[i] = x[i] + c * y[i];\n  }\n}\n\nstatic void THDoubleVector_adds_SSE(double *y, const double *x, const double c, const ptrdiff_t n) {\n  ptrdiff_t i;\n  __m128d XMM7 = _mm_set1_pd(c);\n  __m128d XMM0, XMM2;\n  for (i=0; i<=((n)-4); i+=4) {\n    XMM0 = _mm_loadu_pd((x)+i);\n    XMM2 = _mm_loadu_pd((x)+i+2);\n    XMM0 = _mm_add_pd(XMM0, XMM7);\n    XMM2 = _mm_add_pd(XMM2, XMM7);\n    _mm_storeu_pd((y)+i, XMM0);\n    _mm_storeu_pd((y)+i+2, XMM2);\n  }\n  for (; i<(n); i++) {\n    y[i] = x[i] + c;\n  }\n}\n\nstatic void THDoubleVector_cmul_SSE(double *z, const double *x, const double *y, const ptrdiff_t n) {\n  ptrdiff_t i;\n  for (i=0; i<=((n)-8); i+=8) {\n    __m128d XMM0 = _mm_loadu_pd((x)+i  );\n    __m128d XMM1 = _mm_loadu_pd((x)+i+2);\n    __m128d XMM2 = _mm_loadu_pd((x)+i+4);\n    __m128d XMM3 = _mm_loadu_pd((x)+i+6);\n    __m128d XMM4 = _mm_loadu_pd((y)+i  );\n    __m128d XMM5 = _mm_loadu_pd((y)+i+2);\n    __m128d XMM6 = _mm_loadu_pd((y)+i+4);\n    __m128d XMM7 = _mm_loadu_pd((y)+i+6);\n    XMM4 = _mm_mul_pd(XMM4, XMM0);\n    XMM5 = _mm_mul_pd(XMM5, XMM1);\n    XMM6 = _mm_mul_pd(XMM6, XMM2);\n    XMM7 = _mm_mul_pd(XMM7, XMM3);\n    _mm_storeu_pd((z)+i  , XMM4);\n    _mm_storeu_pd((z)+i+2, XMM5);\n    _mm_storeu_pd((z)+i+4, XMM6);\n    _mm_storeu_pd((z)+i+6, XMM7);\n  }\n  for (; i<(n); i++) {\n    z[i] = x[i] * y[i];\n  }\n}\n\nstatic void THDoubleVector_muls_SSE(double *y, const double *x, const double c, const ptrdiff_t n) {\n  ptrdiff_t i;\n  __m128d XMM15 = _mm_set1_pd(c);\n  for (i=0; i<=((n)-8); i+=8) {\n    __m128d XMM0 = _mm_loadu_pd((x)+i  );\n    __m128d XMM1 = _mm_loadu_pd((x)+i+2);\n    __m128d XMM2 = _mm_loadu_pd((x)+i+4);\n    __m128d XMM3 = _mm_loadu_pd((x)+i+6);\n    __m128d XMM4 = _mm_mul_pd(XMM15, XMM0);\n    __m128d XMM5 = _mm_mul_pd(XMM15, XMM1);\n    __m128d XMM6 = _mm_mul_pd(XMM15, XMM2);\n    __m128d XMM7 = _mm_mul_pd(XMM15, XMM3);\n    _mm_storeu_pd((y)+i  , XMM4);\n    _mm_storeu_pd((y)+i+2, XMM5);\n    _mm_storeu_pd((y)+i+4, XMM6);\n    _mm_storeu_pd((y)+i+6, XMM7);\n  }\n  for (; i<(n); i++) {\n    y[i] = x[i] * c;\n  }\n}\n\nstatic void THDoubleVector_cdiv_SSE(double *z, const double *x, const double *y, const ptrdiff_t n) {\n  ptrdiff_t i;\n  __m128d XMM0, XMM1, XMM2, XMM3;\n  for (i=0; i<=((n)-4); i+=4) {\n    XMM0 = _mm_loadu_pd(x+i);\n    XMM1 = _mm_loadu_pd(x+i+2);\n    XMM2 = _mm_loadu_pd(y+i);\n    XMM3 = _mm_loadu_pd(y+i+2);\n    XMM2 = _mm_div_pd(XMM0, XMM2);\n    XMM3 = _mm_div_pd(XMM1, XMM3);\n    _mm_storeu_pd(z+i, XMM2);\n    _mm_storeu_pd(z+i+2, XMM3);\n  }\n  for (; i<(n); i++) {\n    z[i] = x[i] / y[i];\n  }\n}\n\nstatic void THDoubleVector_divs_SSE(double *y, const double *x, const double c, const ptrdiff_t n) {\n  ptrdiff_t i;\n  __m128d XMM7 = _mm_set1_pd(c);\n  __m128d XMM0, XMM1;\n  for (i=0; i<=((n)-4); i+=4) {\n    XMM0 = _mm_loadu_pd(x+i);\n    XMM1 = _mm_loadu_pd(x+i+2);\n    XMM0 = _mm_div_pd(XMM0, XMM7);\n    XMM1 = _mm_div_pd(XMM1, XMM7);\n    _mm_storeu_pd(y+i, XMM0);\n    _mm_storeu_pd(y+i+2, XMM1);\n  }\n  for (; i<(n); i++) {\n    y[i] = x[i] / c;\n  }\n}\n\nstatic void THFloatVector_fill_SSE(float *x, const float c, const ptrdiff_t n) {\n  ptrdiff_t i;\n  __m128 XMM0 = _mm_set_ps1(c);\n  ptrdiff_t off;\n  for (i=0; i<=((n)-16); i+=16) {\n    _mm_storeu_ps((x)+i  ,  XMM0);\n    _mm_storeu_ps((x)+i+4,  XMM0);\n    _mm_storeu_ps((x)+i+8,  XMM0);\n    _mm_storeu_ps((x)+i+12, XMM0);\n  }\n  off = (n) - ((n)%16);\n  for (i=0; i<((n)%16); i++) {\n    x[off+i] = c;\n  }\n}\n\n\nstatic void THFloatVector_cadd_SSE(float *z, const float *x, const float *y, const float c, const ptrdiff_t n) {\n  ptrdiff_t i;\n  __m128 XMM7 = _mm_set_ps1(c);\n  __m128 XMM0, XMM2;\n  for (i=0; i<=((n)-4); i+=4) {\n    XMM0 = _mm_loadu_ps((x)+i);\n    XMM2 = _mm_loadu_ps((y)+i);\n    XMM2 = _mm_mul_ps(XMM2, XMM7);\n    XMM2 = _mm_add_ps(XMM0, XMM2);\n    _mm_storeu_ps((z)+i, XMM2);\n  }\n  for (; i<(n); i++) {\n    z[i] = x[i] + c * y[i];\n  }\n}\n\nstatic void THFloatVector_adds_SSE(float *y, const float *x, const float c, const ptrdiff_t n) {\n  ptrdiff_t i;\n  __m128 XMM7 = _mm_set1_ps(c);\n  __m128 XMM0, XMM2;\n  for (i=0; i<=((n)-8); i+=8) {\n    XMM0 = _mm_loadu_ps((x)+i);\n    XMM2 = _mm_loadu_ps((x)+i+4);\n    XMM0 = _mm_add_ps(XMM0, XMM7);\n    XMM2 = _mm_add_ps(XMM2, XMM7);\n    _mm_storeu_ps((y)+i, XMM0);\n    _mm_storeu_ps((y)+i+4, XMM2);\n  }\n  for (; i<(n); i++) {\n    y[i] = x[i] + c;\n  }\n}\n\nstatic void THFloatVector_cmul_SSE(float *z, const float *x, const float *y, const ptrdiff_t n) {\n  ptrdiff_t i;\n  for (i=0; i<=((n)-16); i+=16) {\n    __m128 XMM0 = _mm_loadu_ps((x)+i   );\n    __m128 XMM1 = _mm_loadu_ps((x)+i+ 4);\n    __m128 XMM2 = _mm_loadu_ps((x)+i+ 8);\n    __m128 XMM3 = _mm_loadu_ps((x)+i+12);\n    __m128 XMM4 = _mm_loadu_ps((y)+i   );\n    __m128 XMM5 = _mm_loadu_ps((y)+i+ 4);\n    __m128 XMM6 = _mm_loadu_ps((y)+i+ 8);\n    __m128 XMM7 = _mm_loadu_ps((y)+i+12);\n    XMM4 = _mm_mul_ps(XMM4, XMM0);\n    XMM5 = _mm_mul_ps(XMM5, XMM1);\n    XMM6 = _mm_mul_ps(XMM6, XMM2);\n    XMM7 = _mm_mul_ps(XMM7, XMM3);\n    _mm_storeu_ps((z)+i   , XMM4);\n    _mm_storeu_ps((z)+i+ 4, XMM5);\n    _mm_storeu_ps((z)+i+ 8, XMM6);\n    _mm_storeu_ps((z)+i+12, XMM7);\n  }\n  for (; i<(n); i++) {\n    z[i] = x[i] * y[i];\n  }\n}\n\nstatic void THFloatVector_muls_SSE(float *y, const float *x, const float c, const ptrdiff_t n) {\n  ptrdiff_t i;\n  __m128 XMM15 = _mm_set_ps1(c);\n  for (i=0; i<=((n)-16); i+=16) {\n    __m128 XMM0 = _mm_loadu_ps((x)+i   );\n    __m128 XMM1 = _mm_loadu_ps((x)+i+ 4);\n    __m128 XMM2 = _mm_loadu_ps((x)+i+ 8);\n    __m128 XMM3 = _mm_loadu_ps((x)+i+12);\n    __m128 XMM4 = _mm_mul_ps(XMM15, XMM0);\n    __m128 XMM5 = _mm_mul_ps(XMM15, XMM1);\n    __m128 XMM6 = _mm_mul_ps(XMM15, XMM2);\n    __m128 XMM7 = _mm_mul_ps(XMM15, XMM3);\n    _mm_storeu_ps((y)+i   , XMM4);\n    _mm_storeu_ps((y)+i+ 4, XMM5);\n    _mm_storeu_ps((y)+i+ 8, XMM6);\n    _mm_storeu_ps((y)+i+12, XMM7);\n  }\n  for (; i<(n); i++) {\n    y[i] = x[i] * c;\n  }\n}\n\nstatic void THFloatVector_cdiv_SSE(float *z, const float *x, const float *y, const ptrdiff_t n) {\n  ptrdiff_t i;\n  __m128 XMM0, XMM1, XMM2, XMM3;\n  for (i=0; i<=((n)-8); i+=8) {\n    XMM0 = _mm_loadu_ps(x+i);\n    XMM1 = _mm_loadu_ps(x+i+4);\n    XMM2 = _mm_loadu_ps(y+i);\n    XMM3 = _mm_loadu_ps(y+i+4);\n    XMM2 = _mm_div_ps(XMM0, XMM2);\n    XMM3 = _mm_div_ps(XMM1, XMM3);\n    _mm_storeu_ps(z+i, XMM2);\n    _mm_storeu_ps(z+i+4, XMM3);\n  }\n  for (; i<(n); i++) {\n    z[i] = x[i] / y[i];\n  }\n}\n\nstatic void THFloatVector_divs_SSE(float *y, const float *x, const float c, const ptrdiff_t n) {\n  ptrdiff_t i;\n  __m128 XMM7 = _mm_set1_ps(c);\n  __m128 XMM0, XMM1;\n  for (i=0; i<=((n)-8); i+=8) {\n    XMM0 = _mm_loadu_ps(x+i);\n    XMM1 = _mm_loadu_ps(x+i+4);\n    XMM0 = _mm_div_ps(XMM0, XMM7);\n    XMM1 = _mm_div_ps(XMM1, XMM7);\n    _mm_storeu_ps(y+i, XMM0);\n    _mm_storeu_ps(y+i+4, XMM1);\n  }\n  for (; i<(n); i++) {\n    y[i] = x[i] / c;\n  }\n}\n"
  },
  {
    "path": "lib/TH/vector/VSX.c",
    "content": "#ifdef __PPC64__\n#include <altivec.h>\n#include <stddef.h>\n\n\n//--------------------------------------------------------------------------------------------------\n// THDoubleVector_fill_VSX:\n//--------------------------------------------------------------------------------------------------\nstatic void THDoubleVector_fill_VSX(double *x, const double c, const ptrdiff_t n)\n{\n    ptrdiff_t i;\n\n    double val[2] = {c, c};\n    vector double fp64vec2 = vec_xl(0, val);\n\n    for (i = 0; i <= n-128; i += 128)\n    {\n        vec_xst(fp64vec2, 0, x+(i    ));\n        vec_xst(fp64vec2, 0, x+(i+2  ));\n        vec_xst(fp64vec2, 0, x+(i+4  ));\n        vec_xst(fp64vec2, 0, x+(i+6  ));\n        vec_xst(fp64vec2, 0, x+(i+8  ));\n        vec_xst(fp64vec2, 0, x+(i+10 ));\n        vec_xst(fp64vec2, 0, x+(i+12 ));\n        vec_xst(fp64vec2, 0, x+(i+14 ));\n        vec_xst(fp64vec2, 0, x+(i+16 ));\n        vec_xst(fp64vec2, 0, x+(i+18 ));\n        vec_xst(fp64vec2, 0, x+(i+20 ));\n        vec_xst(fp64vec2, 0, x+(i+22 ));\n        vec_xst(fp64vec2, 0, x+(i+24 ));\n        vec_xst(fp64vec2, 0, x+(i+26 ));\n        vec_xst(fp64vec2, 0, x+(i+28 ));\n        vec_xst(fp64vec2, 0, x+(i+30 ));\n        vec_xst(fp64vec2, 0, x+(i+32 ));\n        vec_xst(fp64vec2, 0, x+(i+34 ));\n        vec_xst(fp64vec2, 0, x+(i+36 ));\n        vec_xst(fp64vec2, 0, x+(i+38 ));\n        vec_xst(fp64vec2, 0, x+(i+40 ));\n        vec_xst(fp64vec2, 0, x+(i+42 ));\n        vec_xst(fp64vec2, 0, x+(i+44 ));\n        vec_xst(fp64vec2, 0, x+(i+46 ));\n        vec_xst(fp64vec2, 0, x+(i+48 ));\n        vec_xst(fp64vec2, 0, x+(i+50 ));\n        vec_xst(fp64vec2, 0, x+(i+52 ));\n        vec_xst(fp64vec2, 0, x+(i+54 ));\n        vec_xst(fp64vec2, 0, x+(i+56 ));\n        vec_xst(fp64vec2, 0, x+(i+58 ));\n        vec_xst(fp64vec2, 0, x+(i+60 ));\n        vec_xst(fp64vec2, 0, x+(i+62 ));\n        vec_xst(fp64vec2, 0, x+(i+64 ));\n        vec_xst(fp64vec2, 0, x+(i+66 ));\n        vec_xst(fp64vec2, 0, x+(i+68 ));\n        vec_xst(fp64vec2, 0, x+(i+70 ));\n        vec_xst(fp64vec2, 0, x+(i+72 ));\n        vec_xst(fp64vec2, 0, x+(i+74 ));\n        vec_xst(fp64vec2, 0, x+(i+76 ));\n        vec_xst(fp64vec2, 0, x+(i+78 ));\n        vec_xst(fp64vec2, 0, x+(i+80 ));\n        vec_xst(fp64vec2, 0, x+(i+82 ));\n        vec_xst(fp64vec2, 0, x+(i+84 ));\n        vec_xst(fp64vec2, 0, x+(i+86 ));\n        vec_xst(fp64vec2, 0, x+(i+88 ));\n        vec_xst(fp64vec2, 0, x+(i+90 ));\n        vec_xst(fp64vec2, 0, x+(i+92 ));\n        vec_xst(fp64vec2, 0, x+(i+94 ));\n        vec_xst(fp64vec2, 0, x+(i+96 ));\n        vec_xst(fp64vec2, 0, x+(i+98 ));\n        vec_xst(fp64vec2, 0, x+(i+100));\n        vec_xst(fp64vec2, 0, x+(i+102));\n        vec_xst(fp64vec2, 0, x+(i+104));\n        vec_xst(fp64vec2, 0, x+(i+106));\n        vec_xst(fp64vec2, 0, x+(i+108));\n        vec_xst(fp64vec2, 0, x+(i+110));\n        vec_xst(fp64vec2, 0, x+(i+112));\n        vec_xst(fp64vec2, 0, x+(i+114));\n        vec_xst(fp64vec2, 0, x+(i+116));\n        vec_xst(fp64vec2, 0, x+(i+118));\n        vec_xst(fp64vec2, 0, x+(i+120));\n        vec_xst(fp64vec2, 0, x+(i+122));\n        vec_xst(fp64vec2, 0, x+(i+124));\n        vec_xst(fp64vec2, 0, x+(i+126));\n    }\n    for (; i <= n-16; i += 16)\n    {\n        vec_xst(fp64vec2, 0, x+(i    ));\n        vec_xst(fp64vec2, 0, x+(i+2  ));\n        vec_xst(fp64vec2, 0, x+(i+4  ));\n        vec_xst(fp64vec2, 0, x+(i+6  ));\n        vec_xst(fp64vec2, 0, x+(i+8  ));\n        vec_xst(fp64vec2, 0, x+(i+10 ));\n        vec_xst(fp64vec2, 0, x+(i+12 ));\n        vec_xst(fp64vec2, 0, x+(i+14 ));\n    }\n    for (; i <= n-2; i += 2)\n        vec_xst(fp64vec2, 0, x+(i    ));\n    for (; i < n; i++)\n        x[i] = c;\n}\n\n\n//--------------------------------------------------------------------------------------------------\n// THDoubleVector_cadds_VSX:\n//--------------------------------------------------------------------------------------------------\nstatic void THDoubleVector_cadd_VSX(double *z, const double *x, const double *y, const double c, const ptrdiff_t n)\n{\n    ptrdiff_t i;\n\n    double val[2] = {c, c};\n    vector double c_fp64vec2 = vec_xl(0, val);\n\n    vector double y0_fp64vec2, y1_fp64vec2, y2_fp64vec2, y3_fp64vec2, y4_fp64vec2, y5_fp64vec2, y6_fp64vec2, y7_fp64vec2;\n    vector double y8_fp64vec2, y9_fp64vec2, y10_fp64vec2, y11_fp64vec2;\n    vector double x0_fp64vec2, x1_fp64vec2, x2_fp64vec2, x3_fp64vec2, x4_fp64vec2, x5_fp64vec2, x6_fp64vec2, x7_fp64vec2;\n    vector double x8_fp64vec2, x9_fp64vec2, x10_fp64vec2, x11_fp64vec2;\n\n\n    for (i = 0; i <= n-24; i += 24)\n    {\n        y0_fp64vec2  = vec_xl(0, y+(i   ));\n        y1_fp64vec2  = vec_xl(0, y+(i+2 ));\n        y2_fp64vec2  = vec_xl(0, y+(i+4 ));\n        y3_fp64vec2  = vec_xl(0, y+(i+6 ));\n        y4_fp64vec2  = vec_xl(0, y+(i+8 ));\n        y5_fp64vec2  = vec_xl(0, y+(i+10));\n        y6_fp64vec2  = vec_xl(0, y+(i+12));\n        y7_fp64vec2  = vec_xl(0, y+(i+14));\n        y8_fp64vec2  = vec_xl(0, y+(i+16));\n        y9_fp64vec2  = vec_xl(0, y+(i+18));\n        y10_fp64vec2 = vec_xl(0, y+(i+20));\n        y11_fp64vec2 = vec_xl(0, y+(i+22));\n\n        x0_fp64vec2  = vec_xl(0, x+(i   ));\n        x1_fp64vec2  = vec_xl(0, x+(i+2 ));\n        x2_fp64vec2  = vec_xl(0, x+(i+4 ));\n        x3_fp64vec2  = vec_xl(0, x+(i+6 ));\n        x4_fp64vec2  = vec_xl(0, x+(i+8 ));\n        x5_fp64vec2  = vec_xl(0, x+(i+10));\n        x6_fp64vec2  = vec_xl(0, x+(i+12));\n        x7_fp64vec2  = vec_xl(0, x+(i+14));\n        x8_fp64vec2  = vec_xl(0, x+(i+16));\n        x9_fp64vec2  = vec_xl(0, x+(i+18));\n        x10_fp64vec2 = vec_xl(0, x+(i+20));\n        x11_fp64vec2 = vec_xl(0, x+(i+22));\n\n        y0_fp64vec2  = vec_madd(y0_fp64vec2, c_fp64vec2,  x0_fp64vec2);\n        y1_fp64vec2  = vec_madd(y1_fp64vec2, c_fp64vec2, x1_fp64vec2);\n        y2_fp64vec2  = vec_madd(y2_fp64vec2, c_fp64vec2, x2_fp64vec2);\n        y3_fp64vec2  = vec_madd(y3_fp64vec2, c_fp64vec2, x3_fp64vec2);\n        y4_fp64vec2  = vec_madd(y4_fp64vec2, c_fp64vec2, x4_fp64vec2);\n        y5_fp64vec2  = vec_madd(y5_fp64vec2, c_fp64vec2, x5_fp64vec2);\n        y6_fp64vec2  = vec_madd(y6_fp64vec2, c_fp64vec2, x6_fp64vec2);\n        y7_fp64vec2  = vec_madd(y7_fp64vec2, c_fp64vec2, x7_fp64vec2);\n        y8_fp64vec2  = vec_madd(y8_fp64vec2, c_fp64vec2, x8_fp64vec2);\n        y9_fp64vec2  = vec_madd(y9_fp64vec2, c_fp64vec2, x9_fp64vec2);\n        y10_fp64vec2 = vec_madd(y10_fp64vec2, c_fp64vec2,x10_fp64vec2);\n        y11_fp64vec2 = vec_madd(y11_fp64vec2, c_fp64vec2,x11_fp64vec2);\n\n        vec_xst(y0_fp64vec2,  0, z+(i   ));\n        vec_xst(y1_fp64vec2,  0, z+(i+2 ));\n        vec_xst(y2_fp64vec2,  0, z+(i+4 ));\n        vec_xst(y3_fp64vec2,  0, z+(i+6 ));\n        vec_xst(y4_fp64vec2,  0, z+(i+8 ));\n        vec_xst(y5_fp64vec2,  0, z+(i+10));\n        vec_xst(y6_fp64vec2,  0, z+(i+12));\n        vec_xst(y7_fp64vec2,  0, z+(i+14));\n        vec_xst(y8_fp64vec2,  0, z+(i+16));\n        vec_xst(y9_fp64vec2,  0, z+(i+18));\n        vec_xst(y10_fp64vec2, 0, z+(i+20));\n        vec_xst(y11_fp64vec2, 0, z+(i+22));\n    }\n    for (; i <= n-8; i += 8)\n    {\n        y0_fp64vec2  = vec_xl(0, y+(i   ));\n        y1_fp64vec2  = vec_xl(0, y+(i+2 ));\n        y2_fp64vec2  = vec_xl(0, y+(i+4 ));\n        y3_fp64vec2  = vec_xl(0, y+(i+6 ));\n\n        x0_fp64vec2  = vec_xl(0, x+(i   ));\n        x1_fp64vec2  = vec_xl(0, x+(i+2 ));\n        x2_fp64vec2  = vec_xl(0, x+(i+4 ));\n        x3_fp64vec2  = vec_xl(0, x+(i+6 ));\n\n        y0_fp64vec2  = vec_madd(y0_fp64vec2, c_fp64vec2, x0_fp64vec2);\n        y1_fp64vec2  = vec_madd(y1_fp64vec2, c_fp64vec2, x1_fp64vec2);\n        y2_fp64vec2  = vec_madd(y2_fp64vec2, c_fp64vec2, x2_fp64vec2);\n        y3_fp64vec2  = vec_madd(y3_fp64vec2, c_fp64vec2, x3_fp64vec2);\n\n        vec_xst(y0_fp64vec2,  0, z+(i   ));\n        vec_xst(y1_fp64vec2,  0, z+(i+2 ));\n        vec_xst(y2_fp64vec2,  0, z+(i+4 ));\n        vec_xst(y3_fp64vec2,  0, z+(i+6 ));\n    }\n    for (; i <= n-2; i += 2)\n    {\n        y0_fp64vec2  = vec_xl(0, y+(i   ));\n        x0_fp64vec2  = vec_xl(0, x+(i   ));\n        y0_fp64vec2  = vec_madd(y0_fp64vec2, c_fp64vec2, x0_fp64vec2);\n        vec_xst(y0_fp64vec2,  0, z+(i   ));\n    }\n    for (; i < n; i++)\n        z[i] = x[i] + c* y[i];\n}\n\n\n//--------------------------------------------------------------------------------------------------\n// THDoubleVector_adds_VSX:\n//--------------------------------------------------------------------------------------------------\nstatic void THDoubleVector_adds_VSX(double *y, const double *x, const double c, const ptrdiff_t n)\n{\n    ptrdiff_t i;\n\n    double val[2] = {c, c};\n    vector double c_fp64vec2 = vec_xl(0, val);\n\n    vector double y0_fp64vec2, y1_fp64vec2, y2_fp64vec2, y3_fp64vec2, y4_fp64vec2, y5_fp64vec2, y6_fp64vec2, y7_fp64vec2;\n    vector double y8_fp64vec2, y9_fp64vec2, y10_fp64vec2, y11_fp64vec2;\n    vector double x0_fp64vec2, x1_fp64vec2, x2_fp64vec2, x3_fp64vec2, x4_fp64vec2, x5_fp64vec2, x6_fp64vec2, x7_fp64vec2;\n    vector double x8_fp64vec2, x9_fp64vec2, x10_fp64vec2, x11_fp64vec2;\n\n\n    for (i = 0; i <= n-24; i += 24)\n    {\n        x0_fp64vec2  = vec_xl(0, x+(i   ));\n        x1_fp64vec2  = vec_xl(0, x+(i+2 ));\n        x2_fp64vec2  = vec_xl(0, x+(i+4 ));\n        x3_fp64vec2  = vec_xl(0, x+(i+6 ));\n        x4_fp64vec2  = vec_xl(0, x+(i+8 ));\n        x5_fp64vec2  = vec_xl(0, x+(i+10));\n        x6_fp64vec2  = vec_xl(0, x+(i+12));\n        x7_fp64vec2  = vec_xl(0, x+(i+14));\n        x8_fp64vec2  = vec_xl(0, x+(i+16));\n        x9_fp64vec2  = vec_xl(0, x+(i+18));\n        x10_fp64vec2 = vec_xl(0, x+(i+20));\n        x11_fp64vec2 = vec_xl(0, x+(i+22));\n\n        y0_fp64vec2  = vec_add(x0_fp64vec2,  c_fp64vec2);\n        y1_fp64vec2  = vec_add(x1_fp64vec2,  c_fp64vec2);\n        y2_fp64vec2  = vec_add(x2_fp64vec2,  c_fp64vec2);\n        y3_fp64vec2  = vec_add(x3_fp64vec2,  c_fp64vec2);\n        y4_fp64vec2  = vec_add(x4_fp64vec2,  c_fp64vec2);\n        y5_fp64vec2  = vec_add(x5_fp64vec2,  c_fp64vec2);\n        y6_fp64vec2  = vec_add(x6_fp64vec2,  c_fp64vec2);\n        y7_fp64vec2  = vec_add(x7_fp64vec2,  c_fp64vec2);\n        y8_fp64vec2  = vec_add(x8_fp64vec2,  c_fp64vec2);\n        y9_fp64vec2  = vec_add(x9_fp64vec2,  c_fp64vec2);\n        y10_fp64vec2 = vec_add(x10_fp64vec2, c_fp64vec2);\n        y11_fp64vec2 = vec_add(x11_fp64vec2, c_fp64vec2);\n        \n\n        vec_xst(y0_fp64vec2,  0, y+(i   ));\n        vec_xst(y1_fp64vec2,  0, y+(i+2 ));\n        vec_xst(y2_fp64vec2,  0, y+(i+4 ));\n        vec_xst(y3_fp64vec2,  0, y+(i+6 ));\n        vec_xst(y4_fp64vec2,  0, y+(i+8 ));\n        vec_xst(y5_fp64vec2,  0, y+(i+10));\n        vec_xst(y6_fp64vec2,  0, y+(i+12));\n        vec_xst(y7_fp64vec2,  0, y+(i+14));\n        vec_xst(y8_fp64vec2,  0, y+(i+16));\n        vec_xst(y9_fp64vec2,  0, y+(i+18));\n        vec_xst(y10_fp64vec2, 0, y+(i+20));\n        vec_xst(y11_fp64vec2, 0, y+(i+22));\n    }\n    for (; i <= n-8; i += 8)\n    {\n        x0_fp64vec2  = vec_xl(0, x+(i   ));\n        x1_fp64vec2  = vec_xl(0, x+(i+2 ));\n        x2_fp64vec2  = vec_xl(0, x+(i+4 ));\n        x3_fp64vec2  = vec_xl(0, x+(i+6 ));\n\n        y0_fp64vec2  = vec_add(x0_fp64vec2,  c_fp64vec2);\n        y1_fp64vec2  = vec_add(x1_fp64vec2,  c_fp64vec2);\n        y2_fp64vec2  = vec_add(x2_fp64vec2,  c_fp64vec2);\n        y3_fp64vec2  = vec_add(x3_fp64vec2,  c_fp64vec2);\n\n        vec_xst(y0_fp64vec2,  0, y+(i   ));\n        vec_xst(y1_fp64vec2,  0, y+(i+2 ));\n        vec_xst(y2_fp64vec2,  0, y+(i+4 ));\n        vec_xst(y3_fp64vec2,  0, y+(i+6 ));\n    }\n    for (; i <= n-2; i += 2)\n    {\n        x0_fp64vec2  = vec_xl(0, x+(i   ));\n        y0_fp64vec2  = vec_add(x0_fp64vec2,  c_fp64vec2);\n        vec_xst(y0_fp64vec2,  0, y+(i   ));\n    }\n    for (; i < n; i++)\n        y[i] = x[i] +c;\n}\n\n\n//--------------------------------------------------------------------------------------------------\n// THDoubleVector_cmul_VSX:\n//--------------------------------------------------------------------------------------------------\nstatic void THDoubleVector_cmul_VSX(double *z, const double *x, const double *y, const ptrdiff_t n)\n{\n    ptrdiff_t i;\n\n    vector double y0_fp64vec2, y1_fp64vec2, y2_fp64vec2, y3_fp64vec2, y4_fp64vec2, y5_fp64vec2, y6_fp64vec2, y7_fp64vec2;\n    vector double y8_fp64vec2, y9_fp64vec2, y10_fp64vec2, y11_fp64vec2;\n    vector double x0_fp64vec2, x1_fp64vec2, x2_fp64vec2, x3_fp64vec2, x4_fp64vec2, x5_fp64vec2, x6_fp64vec2, x7_fp64vec2;\n    vector double x8_fp64vec2, x9_fp64vec2, x10_fp64vec2, x11_fp64vec2;\n\n\n    for (i = 0; i <= n-24; i += 24)\n    {\n        y0_fp64vec2  = vec_xl(0, y+(i   ));\n        y1_fp64vec2  = vec_xl(0, y+(i+2 ));\n        y2_fp64vec2  = vec_xl(0, y+(i+4 ));\n        y3_fp64vec2  = vec_xl(0, y+(i+6 ));\n        y4_fp64vec2  = vec_xl(0, y+(i+8 ));\n        y5_fp64vec2  = vec_xl(0, y+(i+10));\n        y6_fp64vec2  = vec_xl(0, y+(i+12));\n        y7_fp64vec2  = vec_xl(0, y+(i+14));\n        y8_fp64vec2  = vec_xl(0, y+(i+16));\n        y9_fp64vec2  = vec_xl(0, y+(i+18));\n        y10_fp64vec2 = vec_xl(0, y+(i+20));\n        y11_fp64vec2 = vec_xl(0, y+(i+22));\n\n        x0_fp64vec2  = vec_xl(0, x+(i   ));\n        x1_fp64vec2  = vec_xl(0, x+(i+2 ));\n        x2_fp64vec2  = vec_xl(0, x+(i+4 ));\n        x3_fp64vec2  = vec_xl(0, x+(i+6 ));\n        x4_fp64vec2  = vec_xl(0, x+(i+8 ));\n        x5_fp64vec2  = vec_xl(0, x+(i+10));\n        x6_fp64vec2  = vec_xl(0, x+(i+12));\n        x7_fp64vec2  = vec_xl(0, x+(i+14));\n        x8_fp64vec2  = vec_xl(0, x+(i+16));\n        x9_fp64vec2  = vec_xl(0, x+(i+18));\n        x10_fp64vec2 = vec_xl(0, x+(i+20));\n        x11_fp64vec2 = vec_xl(0, x+(i+22));\n\n        y0_fp64vec2  = vec_mul(y0_fp64vec2,  x0_fp64vec2);\n        y1_fp64vec2  = vec_mul(y1_fp64vec2,  x1_fp64vec2);\n        y2_fp64vec2  = vec_mul(y2_fp64vec2,  x2_fp64vec2);\n        y3_fp64vec2  = vec_mul(y3_fp64vec2,  x3_fp64vec2);\n        y4_fp64vec2  = vec_mul(y4_fp64vec2,  x4_fp64vec2);\n        y5_fp64vec2  = vec_mul(y5_fp64vec2,  x5_fp64vec2);\n        y6_fp64vec2  = vec_mul(y6_fp64vec2,  x6_fp64vec2);\n        y7_fp64vec2  = vec_mul(y7_fp64vec2,  x7_fp64vec2);\n        y8_fp64vec2  = vec_mul(y8_fp64vec2,  x8_fp64vec2);\n        y9_fp64vec2  = vec_mul(y9_fp64vec2,  x9_fp64vec2);\n        y10_fp64vec2 = vec_mul(y10_fp64vec2, x10_fp64vec2);\n        y11_fp64vec2 = vec_mul(y11_fp64vec2, x11_fp64vec2);\n\n        vec_xst(y0_fp64vec2,  0, z+(i   ));\n        vec_xst(y1_fp64vec2,  0, z+(i+2 ));\n        vec_xst(y2_fp64vec2,  0, z+(i+4 ));\n        vec_xst(y3_fp64vec2,  0, z+(i+6 ));\n        vec_xst(y4_fp64vec2,  0, z+(i+8 ));\n        vec_xst(y5_fp64vec2,  0, z+(i+10));\n        vec_xst(y6_fp64vec2,  0, z+(i+12));\n        vec_xst(y7_fp64vec2,  0, z+(i+14));\n        vec_xst(y8_fp64vec2,  0, z+(i+16));\n        vec_xst(y9_fp64vec2,  0, z+(i+18));\n        vec_xst(y10_fp64vec2, 0, z+(i+20));\n        vec_xst(y11_fp64vec2, 0, z+(i+22));\n    }\n    for (; i <= n-8; i += 8)\n    {\n        y0_fp64vec2  = vec_xl(0, y+(i   ));\n        y1_fp64vec2  = vec_xl(0, y+(i+2 ));\n        y2_fp64vec2  = vec_xl(0, y+(i+4 ));\n        y3_fp64vec2  = vec_xl(0, y+(i+6 ));\n\n        x0_fp64vec2  = vec_xl(0, x+(i   ));\n        x1_fp64vec2  = vec_xl(0, x+(i+2 ));\n        x2_fp64vec2  = vec_xl(0, x+(i+4 ));\n        x3_fp64vec2  = vec_xl(0, x+(i+6 ));\n\n        y0_fp64vec2  = vec_mul(y0_fp64vec2,  x0_fp64vec2);\n        y1_fp64vec2  = vec_mul(y1_fp64vec2,  x1_fp64vec2);\n        y2_fp64vec2  = vec_mul(y2_fp64vec2,  x2_fp64vec2);\n        y3_fp64vec2  = vec_mul(y3_fp64vec2,  x3_fp64vec2);\n\n        vec_xst(y0_fp64vec2,  0, z+(i   ));\n        vec_xst(y1_fp64vec2,  0, z+(i+2 ));\n        vec_xst(y2_fp64vec2,  0, z+(i+4 ));\n        vec_xst(y3_fp64vec2,  0, z+(i+6 ));\n    }\n    for (; i <= n-2; i += 2)\n    {\n        y0_fp64vec2  = vec_xl(0, y+(i   ));\n        x0_fp64vec2  = vec_xl(0, x+(i   ));\n        y0_fp64vec2  = vec_mul(y0_fp64vec2,  x0_fp64vec2);\n        vec_xst(y0_fp64vec2,  0, z+(i   ));\n    }\n    for (; i < n; i++)\n        z[i] = x[i] * y[i];\n}\n\n\n//--------------------------------------------------------------------------------------------------\n// THDoubleVector_muls_VSX:\n//--------------------------------------------------------------------------------------------------\nstatic void THDoubleVector_muls_VSX(double *y, const double *x, const double c, const ptrdiff_t n)\n{\n    ptrdiff_t i;\n\n    double val[2] = {c, c};\n    vector double c_fp64vec2 = vec_xl(0, val);\n\n    vector double y0_fp64vec2, y1_fp64vec2, y2_fp64vec2, y3_fp64vec2, y4_fp64vec2, y5_fp64vec2, y6_fp64vec2, y7_fp64vec2;\n    vector double y8_fp64vec2, y9_fp64vec2, y10_fp64vec2, y11_fp64vec2;\n    vector double x0_fp64vec2, x1_fp64vec2, x2_fp64vec2, x3_fp64vec2, x4_fp64vec2, x5_fp64vec2, x6_fp64vec2, x7_fp64vec2;\n    vector double x8_fp64vec2, x9_fp64vec2, x10_fp64vec2, x11_fp64vec2;\n\n\n    for (i = 0; i <= n-24; i += 24)\n    {\n        x0_fp64vec2  = vec_xl(0, x+(i   ));\n        x1_fp64vec2  = vec_xl(0, x+(i+2 ));\n        x2_fp64vec2  = vec_xl(0, x+(i+4 ));\n        x3_fp64vec2  = vec_xl(0, x+(i+6 ));\n        x4_fp64vec2  = vec_xl(0, x+(i+8 ));\n        x5_fp64vec2  = vec_xl(0, x+(i+10));\n        x6_fp64vec2  = vec_xl(0, x+(i+12));\n        x7_fp64vec2  = vec_xl(0, x+(i+14));\n        x8_fp64vec2  = vec_xl(0, x+(i+16));\n        x9_fp64vec2  = vec_xl(0, x+(i+18));\n        x10_fp64vec2 = vec_xl(0, x+(i+20));\n        x11_fp64vec2 = vec_xl(0, x+(i+22));\n\n        y0_fp64vec2  = vec_mul(x0_fp64vec2,  c_fp64vec2);\n        y1_fp64vec2  = vec_mul(x1_fp64vec2,  c_fp64vec2);\n        y2_fp64vec2  = vec_mul(x2_fp64vec2,  c_fp64vec2);\n        y3_fp64vec2  = vec_mul(x3_fp64vec2,  c_fp64vec2);\n        y4_fp64vec2  = vec_mul(x4_fp64vec2,  c_fp64vec2);\n        y5_fp64vec2  = vec_mul(x5_fp64vec2,  c_fp64vec2);\n        y6_fp64vec2  = vec_mul(x6_fp64vec2,  c_fp64vec2);\n        y7_fp64vec2  = vec_mul(x7_fp64vec2,  c_fp64vec2);\n        y8_fp64vec2  = vec_mul(x8_fp64vec2,  c_fp64vec2);\n        y9_fp64vec2  = vec_mul(x9_fp64vec2,  c_fp64vec2);\n        y10_fp64vec2 = vec_mul(x10_fp64vec2, c_fp64vec2);\n        y11_fp64vec2 = vec_mul(x11_fp64vec2, c_fp64vec2);\n        \n\n        vec_xst(y0_fp64vec2,  0, y+(i   ));\n        vec_xst(y1_fp64vec2,  0, y+(i+2 ));\n        vec_xst(y2_fp64vec2,  0, y+(i+4 ));\n        vec_xst(y3_fp64vec2,  0, y+(i+6 ));\n        vec_xst(y4_fp64vec2,  0, y+(i+8 ));\n        vec_xst(y5_fp64vec2,  0, y+(i+10));\n        vec_xst(y6_fp64vec2,  0, y+(i+12));\n        vec_xst(y7_fp64vec2,  0, y+(i+14));\n        vec_xst(y8_fp64vec2,  0, y+(i+16));\n        vec_xst(y9_fp64vec2,  0, y+(i+18));\n        vec_xst(y10_fp64vec2, 0, y+(i+20));\n        vec_xst(y11_fp64vec2, 0, y+(i+22));\n    }\n    for (; i <= n-8; i += 8)\n    {\n        x0_fp64vec2  = vec_xl(0, x+(i   ));\n        x1_fp64vec2  = vec_xl(0, x+(i+2 ));\n        x2_fp64vec2  = vec_xl(0, x+(i+4 ));\n        x3_fp64vec2  = vec_xl(0, x+(i+6 ));\n\n        y0_fp64vec2  = vec_mul(x0_fp64vec2,  c_fp64vec2);\n        y1_fp64vec2  = vec_mul(x1_fp64vec2,  c_fp64vec2);\n        y2_fp64vec2  = vec_mul(x2_fp64vec2,  c_fp64vec2);\n        y3_fp64vec2  = vec_mul(x3_fp64vec2,  c_fp64vec2);\n\n        vec_xst(y0_fp64vec2,  0, y+(i   ));\n        vec_xst(y1_fp64vec2,  0, y+(i+2 ));\n        vec_xst(y2_fp64vec2,  0, y+(i+4 ));\n        vec_xst(y3_fp64vec2,  0, y+(i+6 ));\n    }\n    for (; i <= n-2; i += 2)\n    {\n        x0_fp64vec2  = vec_xl(0, x+(i   ));\n        y0_fp64vec2  = vec_mul(x0_fp64vec2,  c_fp64vec2);\n        vec_xst(y0_fp64vec2,  0, y+(i   ));\n    }\n    for (; i < n; i++)\n        y[i] = c * x[i];\n}\n\n\n//--------------------------------------------------------------------------------------------------\n// THDoubleVector_cdiv_VSX:\n//--------------------------------------------------------------------------------------------------\nstatic void THDoubleVector_cdiv_VSX(double *z, const double *x, const double *y, const ptrdiff_t n)\n{\n    ptrdiff_t i;\n\n    vector double y0_fp64vec2, y1_fp64vec2, y2_fp64vec2, y3_fp64vec2, y4_fp64vec2, y5_fp64vec2, y6_fp64vec2, y7_fp64vec2;\n    vector double y8_fp64vec2, y9_fp64vec2, y10_fp64vec2, y11_fp64vec2;\n    vector double x0_fp64vec2, x1_fp64vec2, x2_fp64vec2, x3_fp64vec2, x4_fp64vec2, x5_fp64vec2, x6_fp64vec2, x7_fp64vec2;\n    vector double x8_fp64vec2, x9_fp64vec2, x10_fp64vec2, x11_fp64vec2;\n\n\n    for (i = 0; i <= n-24; i += 24)\n    {\n        y0_fp64vec2  = vec_xl(0, y+(i   ));\n        y1_fp64vec2  = vec_xl(0, y+(i+2 ));\n        y2_fp64vec2  = vec_xl(0, y+(i+4 ));\n        y3_fp64vec2  = vec_xl(0, y+(i+6 ));\n        y4_fp64vec2  = vec_xl(0, y+(i+8 ));\n        y5_fp64vec2  = vec_xl(0, y+(i+10));\n        y6_fp64vec2  = vec_xl(0, y+(i+12));\n        y7_fp64vec2  = vec_xl(0, y+(i+14));\n        y8_fp64vec2  = vec_xl(0, y+(i+16));\n        y9_fp64vec2  = vec_xl(0, y+(i+18));\n        y10_fp64vec2 = vec_xl(0, y+(i+20));\n        y11_fp64vec2 = vec_xl(0, y+(i+22));\n\n        x0_fp64vec2  = vec_xl(0, x+(i   ));\n        x1_fp64vec2  = vec_xl(0, x+(i+2 ));\n        x2_fp64vec2  = vec_xl(0, x+(i+4 ));\n        x3_fp64vec2  = vec_xl(0, x+(i+6 ));\n        x4_fp64vec2  = vec_xl(0, x+(i+8 ));\n        x5_fp64vec2  = vec_xl(0, x+(i+10));\n        x6_fp64vec2  = vec_xl(0, x+(i+12));\n        x7_fp64vec2  = vec_xl(0, x+(i+14));\n        x8_fp64vec2  = vec_xl(0, x+(i+16));\n        x9_fp64vec2  = vec_xl(0, x+(i+18));\n        x10_fp64vec2 = vec_xl(0, x+(i+20));\n        x11_fp64vec2 = vec_xl(0, x+(i+22));\n\n        y0_fp64vec2  = vec_div(x0_fp64vec2,  y0_fp64vec2);\n        y1_fp64vec2  = vec_div(x1_fp64vec2,  y1_fp64vec2);\n        y2_fp64vec2  = vec_div(x2_fp64vec2,  y2_fp64vec2);\n        y3_fp64vec2  = vec_div(x3_fp64vec2,  y3_fp64vec2);\n        y4_fp64vec2  = vec_div(x4_fp64vec2,  y4_fp64vec2);\n        y5_fp64vec2  = vec_div(x5_fp64vec2,  y5_fp64vec2);\n        y6_fp64vec2  = vec_div(x6_fp64vec2,  y6_fp64vec2);\n        y7_fp64vec2  = vec_div(x7_fp64vec2,  y7_fp64vec2);\n        y8_fp64vec2  = vec_div(x8_fp64vec2,  y8_fp64vec2);\n        y9_fp64vec2  = vec_div(x9_fp64vec2,  y9_fp64vec2);\n        y10_fp64vec2 = vec_div(x10_fp64vec2, y10_fp64vec2);\n        y11_fp64vec2 = vec_div(x11_fp64vec2, y11_fp64vec2);\n\n        vec_xst(y0_fp64vec2,  0, z+(i   ));\n        vec_xst(y1_fp64vec2,  0, z+(i+2 ));\n        vec_xst(y2_fp64vec2,  0, z+(i+4 ));\n        vec_xst(y3_fp64vec2,  0, z+(i+6 ));\n        vec_xst(y4_fp64vec2,  0, z+(i+8 ));\n        vec_xst(y5_fp64vec2,  0, z+(i+10));\n        vec_xst(y6_fp64vec2,  0, z+(i+12));\n        vec_xst(y7_fp64vec2,  0, z+(i+14));\n        vec_xst(y8_fp64vec2,  0, z+(i+16));\n        vec_xst(y9_fp64vec2,  0, z+(i+18));\n        vec_xst(y10_fp64vec2, 0, z+(i+20));\n        vec_xst(y11_fp64vec2, 0, z+(i+22));\n    }\n    for (; i <= n-8; i += 8)\n    {\n        y0_fp64vec2  = vec_xl(0, y+(i   ));\n        y1_fp64vec2  = vec_xl(0, y+(i+2 ));\n        y2_fp64vec2  = vec_xl(0, y+(i+4 ));\n        y3_fp64vec2  = vec_xl(0, y+(i+6 ));\n\n        x0_fp64vec2  = vec_xl(0, x+(i   ));\n        x1_fp64vec2  = vec_xl(0, x+(i+2 ));\n        x2_fp64vec2  = vec_xl(0, x+(i+4 ));\n        x3_fp64vec2  = vec_xl(0, x+(i+6 ));\n\n        y0_fp64vec2  = vec_div(x0_fp64vec2,  y0_fp64vec2);\n        y1_fp64vec2  = vec_div(x1_fp64vec2,  y1_fp64vec2);\n        y2_fp64vec2  = vec_div(x2_fp64vec2,  y2_fp64vec2);\n        y3_fp64vec2  = vec_div(x3_fp64vec2,  y3_fp64vec2);\n\n        vec_xst(y0_fp64vec2,  0, z+(i   ));\n        vec_xst(y1_fp64vec2,  0, z+(i+2 ));\n        vec_xst(y2_fp64vec2,  0, z+(i+4 ));\n        vec_xst(y3_fp64vec2,  0, z+(i+6 ));\n    }\n    for (; i <= n-2; i += 2)\n    {\n        y0_fp64vec2  = vec_xl(0, y+(i   ));\n        x0_fp64vec2  = vec_xl(0, x+(i   ));\n        y0_fp64vec2  = vec_div(x0_fp64vec2,  y0_fp64vec2);\n        vec_xst(y0_fp64vec2,  0, z+(i   ));\n    }\n    for (; i < n; i++)\n        z[i] = x[i] / y[i];\n}\n\n\n//--------------------------------------------------------------------------------------------------\n// THDoubleVector_divs_VSX:\n//--------------------------------------------------------------------------------------------------\nstatic void THDoubleVector_divs_VSX(double *y, const double *x, const double c, const ptrdiff_t n)\n{\n    ptrdiff_t i;\n\n    double val[2] = {c, c};\n    vector double c_fp64vec2 = vec_xl(0, val);\n\n    vector double y0_fp64vec2, y1_fp64vec2, y2_fp64vec2, y3_fp64vec2, y4_fp64vec2, y5_fp64vec2, y6_fp64vec2, y7_fp64vec2;\n    vector double y8_fp64vec2, y9_fp64vec2, y10_fp64vec2, y11_fp64vec2;\n    vector double x0_fp64vec2, x1_fp64vec2, x2_fp64vec2, x3_fp64vec2, x4_fp64vec2, x5_fp64vec2, x6_fp64vec2, x7_fp64vec2;\n    vector double x8_fp64vec2, x9_fp64vec2, x10_fp64vec2, x11_fp64vec2;\n\n\n    for (i = 0; i <= n-24; i += 24)\n    {\n        x0_fp64vec2  = vec_xl(0, x+(i   ));\n        x1_fp64vec2  = vec_xl(0, x+(i+2 ));\n        x2_fp64vec2  = vec_xl(0, x+(i+4 ));\n        x3_fp64vec2  = vec_xl(0, x+(i+6 ));\n        x4_fp64vec2  = vec_xl(0, x+(i+8 ));\n        x5_fp64vec2  = vec_xl(0, x+(i+10));\n        x6_fp64vec2  = vec_xl(0, x+(i+12));\n        x7_fp64vec2  = vec_xl(0, x+(i+14));\n        x8_fp64vec2  = vec_xl(0, x+(i+16));\n        x9_fp64vec2  = vec_xl(0, x+(i+18));\n        x10_fp64vec2 = vec_xl(0, x+(i+20));\n        x11_fp64vec2 = vec_xl(0, x+(i+22));\n\n        y0_fp64vec2  = vec_div(x0_fp64vec2,  c_fp64vec2);\n        y1_fp64vec2  = vec_div(x1_fp64vec2,  c_fp64vec2);\n        y2_fp64vec2  = vec_div(x2_fp64vec2,  c_fp64vec2);\n        y3_fp64vec2  = vec_div(x3_fp64vec2,  c_fp64vec2);\n        y4_fp64vec2  = vec_div(x4_fp64vec2,  c_fp64vec2);\n        y5_fp64vec2  = vec_div(x5_fp64vec2,  c_fp64vec2);\n        y6_fp64vec2  = vec_div(x6_fp64vec2,  c_fp64vec2);\n        y7_fp64vec2  = vec_div(x7_fp64vec2,  c_fp64vec2);\n        y8_fp64vec2  = vec_div(x8_fp64vec2,  c_fp64vec2);\n        y9_fp64vec2  = vec_div(x9_fp64vec2,  c_fp64vec2);\n        y10_fp64vec2 = vec_div(x10_fp64vec2, c_fp64vec2);\n        y11_fp64vec2 = vec_div(x11_fp64vec2, c_fp64vec2);\n        \n\n        vec_xst(y0_fp64vec2,  0, y+(i   ));\n        vec_xst(y1_fp64vec2,  0, y+(i+2 ));\n        vec_xst(y2_fp64vec2,  0, y+(i+4 ));\n        vec_xst(y3_fp64vec2,  0, y+(i+6 ));\n        vec_xst(y4_fp64vec2,  0, y+(i+8 ));\n        vec_xst(y5_fp64vec2,  0, y+(i+10));\n        vec_xst(y6_fp64vec2,  0, y+(i+12));\n        vec_xst(y7_fp64vec2,  0, y+(i+14));\n        vec_xst(y8_fp64vec2,  0, y+(i+16));\n        vec_xst(y9_fp64vec2,  0, y+(i+18));\n        vec_xst(y10_fp64vec2, 0, y+(i+20));\n        vec_xst(y11_fp64vec2, 0, y+(i+22));\n    }\n    for (; i <= n-8; i += 8)\n    {\n        x0_fp64vec2  = vec_xl(0, x+(i   ));\n        x1_fp64vec2  = vec_xl(0, x+(i+2 ));\n        x2_fp64vec2  = vec_xl(0, x+(i+4 ));\n        x3_fp64vec2  = vec_xl(0, x+(i+6 ));\n\n        y0_fp64vec2  = vec_div(x0_fp64vec2,  c_fp64vec2);\n        y1_fp64vec2  = vec_div(x1_fp64vec2,  c_fp64vec2);\n        y2_fp64vec2  = vec_div(x2_fp64vec2,  c_fp64vec2);\n        y3_fp64vec2  = vec_div(x3_fp64vec2,  c_fp64vec2);\n\n        vec_xst(y0_fp64vec2,  0, y+(i   ));\n        vec_xst(y1_fp64vec2,  0, y+(i+2 ));\n        vec_xst(y2_fp64vec2,  0, y+(i+4 ));\n        vec_xst(y3_fp64vec2,  0, y+(i+6 ));\n\n        vec_xst(y0_fp64vec2,  0, y+(i   ));\n        vec_xst(y1_fp64vec2,  0, y+(i+2 ));\n        vec_xst(y2_fp64vec2,  0, y+(i+4 ));\n        vec_xst(y3_fp64vec2,  0, y+(i+6 ));\n    }\n    for (; i <= n-2; i += 2)\n    {\n        x0_fp64vec2  = vec_xl(0, x+(i   ));\n        y0_fp64vec2  = vec_div(x0_fp64vec2,  c_fp64vec2);\n        vec_xst(y0_fp64vec2,  0, y+(i   ));\n    }\n    for (; i < n; i++)\n        y[i] = x[i] / c;\n}\n\n\n//--------------------------------------------------------------------------------------------------\n// THFloatVector_fill_VSX:\n//--------------------------------------------------------------------------------------------------\nstatic void THFloatVector_fill_VSX(float *x, const float c, const ptrdiff_t n)\n{\n    ptrdiff_t i;\n\n    float val[4] = {c, c, c, c};\n    vector float fp32vec4 = vec_xl(0, val);\n\n    for (i = 0; i <= n-256; i += 256)\n    {\n        vec_xst(fp32vec4, 0, x+(i    ));\n        vec_xst(fp32vec4, 0, x+(i+4  ));\n        vec_xst(fp32vec4, 0, x+(i+8  ));\n        vec_xst(fp32vec4, 0, x+(i+12 ));\n        vec_xst(fp32vec4, 0, x+(i+16 ));\n        vec_xst(fp32vec4, 0, x+(i+20 ));\n        vec_xst(fp32vec4, 0, x+(i+24 ));\n        vec_xst(fp32vec4, 0, x+(i+28 ));\n        vec_xst(fp32vec4, 0, x+(i+32 ));\n        vec_xst(fp32vec4, 0, x+(i+36 ));\n        vec_xst(fp32vec4, 0, x+(i+40 ));\n        vec_xst(fp32vec4, 0, x+(i+44 ));\n        vec_xst(fp32vec4, 0, x+(i+48 ));\n        vec_xst(fp32vec4, 0, x+(i+52 ));\n        vec_xst(fp32vec4, 0, x+(i+56 ));\n        vec_xst(fp32vec4, 0, x+(i+60 ));\n        vec_xst(fp32vec4, 0, x+(i+64 ));\n        vec_xst(fp32vec4, 0, x+(i+68 ));\n        vec_xst(fp32vec4, 0, x+(i+72 ));\n        vec_xst(fp32vec4, 0, x+(i+76 ));\n        vec_xst(fp32vec4, 0, x+(i+80 ));\n        vec_xst(fp32vec4, 0, x+(i+84 ));\n        vec_xst(fp32vec4, 0, x+(i+88 ));\n        vec_xst(fp32vec4, 0, x+(i+92 ));\n        vec_xst(fp32vec4, 0, x+(i+96 ));\n        vec_xst(fp32vec4, 0, x+(i+100));\n        vec_xst(fp32vec4, 0, x+(i+104));\n        vec_xst(fp32vec4, 0, x+(i+108));\n        vec_xst(fp32vec4, 0, x+(i+112));\n        vec_xst(fp32vec4, 0, x+(i+116));\n        vec_xst(fp32vec4, 0, x+(i+120));\n        vec_xst(fp32vec4, 0, x+(i+124));\n        vec_xst(fp32vec4, 0, x+(i+128));\n        vec_xst(fp32vec4, 0, x+(i+132));\n        vec_xst(fp32vec4, 0, x+(i+136));\n        vec_xst(fp32vec4, 0, x+(i+140));\n        vec_xst(fp32vec4, 0, x+(i+144));\n        vec_xst(fp32vec4, 0, x+(i+148));\n        vec_xst(fp32vec4, 0, x+(i+152));\n        vec_xst(fp32vec4, 0, x+(i+156));\n        vec_xst(fp32vec4, 0, x+(i+160));\n        vec_xst(fp32vec4, 0, x+(i+164));\n        vec_xst(fp32vec4, 0, x+(i+168));\n        vec_xst(fp32vec4, 0, x+(i+172));\n        vec_xst(fp32vec4, 0, x+(i+176));\n        vec_xst(fp32vec4, 0, x+(i+180));\n        vec_xst(fp32vec4, 0, x+(i+184));\n        vec_xst(fp32vec4, 0, x+(i+188));\n        vec_xst(fp32vec4, 0, x+(i+192));\n        vec_xst(fp32vec4, 0, x+(i+196));\n        vec_xst(fp32vec4, 0, x+(i+200));\n        vec_xst(fp32vec4, 0, x+(i+204));\n        vec_xst(fp32vec4, 0, x+(i+208));\n        vec_xst(fp32vec4, 0, x+(i+212));\n        vec_xst(fp32vec4, 0, x+(i+216));\n        vec_xst(fp32vec4, 0, x+(i+220));\n        vec_xst(fp32vec4, 0, x+(i+224));\n        vec_xst(fp32vec4, 0, x+(i+228));\n        vec_xst(fp32vec4, 0, x+(i+232));\n        vec_xst(fp32vec4, 0, x+(i+236));\n        vec_xst(fp32vec4, 0, x+(i+240));\n        vec_xst(fp32vec4, 0, x+(i+244));\n        vec_xst(fp32vec4, 0, x+(i+248));\n        vec_xst(fp32vec4, 0, x+(i+252));\n    }\n    for (; i <= n-32; i += 32)\n    {\n        vec_xst(fp32vec4, 0, x+(i    ));\n        vec_xst(fp32vec4, 0, x+(i+4  ));\n        vec_xst(fp32vec4, 0, x+(i+8  ));\n        vec_xst(fp32vec4, 0, x+(i+12 ));\n        vec_xst(fp32vec4, 0, x+(i+16 ));\n        vec_xst(fp32vec4, 0, x+(i+20 ));\n        vec_xst(fp32vec4, 0, x+(i+24 ));\n        vec_xst(fp32vec4, 0, x+(i+28 ));\n    }\n    for (; i <= n-4; i += 4)\n        vec_xst(fp32vec4, 0, x+(i    ));\n    for (; i < n; i++)\n        x[i] = c;\n}\n\n\n//--------------------------------------------------------------------------------------------------\n// THFloatVector_cadd_VSX:\n//--------------------------------------------------------------------------------------------------\nstatic void THFloatVector_cadd_VSX(float *z, const float *x, const float *y, const float c, const ptrdiff_t n)\n{\n    ptrdiff_t i;\n\n    float val[4] = {c, c, c, c};\n    vector float c_fp32vec4 = vec_xl(0, val);\n\n    vector float y0_fp32vec4, y1_fp32vec4, y2_fp32vec4, y3_fp32vec4, y4_fp32vec4, y5_fp32vec4, y6_fp32vec4, y7_fp32vec4;\n    vector float y8_fp32vec4, y9_fp32vec4, y10_fp32vec4, y11_fp32vec4;\n    vector float x0_fp32vec4, x1_fp32vec4, x2_fp32vec4, x3_fp32vec4, x4_fp32vec4, x5_fp32vec4, x6_fp32vec4, x7_fp32vec4;\n    vector float x8_fp32vec4, x9_fp32vec4, x10_fp32vec4, x11_fp32vec4;\n\n\n    for (i = 0; i <= n-48; i += 48)\n    {\n        y0_fp32vec4  = vec_xl(0, y+(i   ));\n        y1_fp32vec4  = vec_xl(0, y+(i+4 ));\n        y2_fp32vec4  = vec_xl(0, y+(i+8 ));\n        y3_fp32vec4  = vec_xl(0, y+(i+12));\n        y4_fp32vec4  = vec_xl(0, y+(i+16 ));\n        y5_fp32vec4  = vec_xl(0, y+(i+20));\n        y6_fp32vec4  = vec_xl(0, y+(i+24));\n        y7_fp32vec4  = vec_xl(0, y+(i+28));\n        y8_fp32vec4  = vec_xl(0, y+(i+32));\n        y9_fp32vec4  = vec_xl(0, y+(i+36));\n        y10_fp32vec4 = vec_xl(0, y+(i+40));\n        y11_fp32vec4 = vec_xl(0, y+(i+44));\n\n        x0_fp32vec4  = vec_xl(0, x+(i   ));\n        x1_fp32vec4  = vec_xl(0, x+(i+4 ));\n        x2_fp32vec4  = vec_xl(0, x+(i+8 ));\n        x3_fp32vec4  = vec_xl(0, x+(i+12 ));\n        x4_fp32vec4  = vec_xl(0, x+(i+16 ));\n        x5_fp32vec4  = vec_xl(0, x+(i+20));\n        x6_fp32vec4  = vec_xl(0, x+(i+24));\n        x7_fp32vec4  = vec_xl(0, x+(i+28));\n        x8_fp32vec4  = vec_xl(0, x+(i+32));\n        x9_fp32vec4  = vec_xl(0, x+(i+36));\n        x10_fp32vec4 = vec_xl(0, x+(i+40));\n        x11_fp32vec4 = vec_xl(0, x+(i+44));\n\n        y0_fp32vec4  = vec_madd(y0_fp32vec4, c_fp32vec4,  x0_fp32vec4);\n        y1_fp32vec4  = vec_madd(y1_fp32vec4, c_fp32vec4, x1_fp32vec4);\n        y2_fp32vec4  = vec_madd(y2_fp32vec4, c_fp32vec4, x2_fp32vec4);\n        y3_fp32vec4  = vec_madd(y3_fp32vec4, c_fp32vec4, x3_fp32vec4);\n        y4_fp32vec4  = vec_madd(y4_fp32vec4, c_fp32vec4, x4_fp32vec4);\n        y5_fp32vec4  = vec_madd(y5_fp32vec4, c_fp32vec4, x5_fp32vec4);\n        y6_fp32vec4  = vec_madd(y6_fp32vec4, c_fp32vec4, x6_fp32vec4);\n        y7_fp32vec4  = vec_madd(y7_fp32vec4, c_fp32vec4, x7_fp32vec4);\n        y8_fp32vec4  = vec_madd(y8_fp32vec4, c_fp32vec4, x8_fp32vec4);\n        y9_fp32vec4  = vec_madd(y9_fp32vec4, c_fp32vec4, x9_fp32vec4);\n        y10_fp32vec4 = vec_madd(y10_fp32vec4, c_fp32vec4, x10_fp32vec4);\n        y11_fp32vec4 = vec_madd(y11_fp32vec4, c_fp32vec4, x11_fp32vec4);\n\n        vec_xst(y0_fp32vec4,  0, z+(i   ));\n        vec_xst(y1_fp32vec4,  0, z+(i+4 ));\n        vec_xst(y2_fp32vec4,  0, z+(i+8 ));\n        vec_xst(y3_fp32vec4,  0, z+(i+12 ));\n        vec_xst(y4_fp32vec4,  0, z+(i+16 ));\n        vec_xst(y5_fp32vec4,  0, z+(i+20));\n        vec_xst(y6_fp32vec4,  0, z+(i+24));\n        vec_xst(y7_fp32vec4,  0, z+(i+28));\n        vec_xst(y8_fp32vec4,  0, z+(i+32));\n        vec_xst(y9_fp32vec4,  0, z+(i+36));\n        vec_xst(y10_fp32vec4, 0, z+(i+40));\n        vec_xst(y11_fp32vec4, 0, z+(i+44));\n    }\n    for (; i <= n-16; i += 16)\n    {\n        y0_fp32vec4  = vec_xl(0, y+(i   ));\n        y1_fp32vec4  = vec_xl(0, y+(i+4 ));\n        y2_fp32vec4  = vec_xl(0, y+(i+8 ));\n        y3_fp32vec4  = vec_xl(0, y+(i+12 ));\n\n        x0_fp32vec4  = vec_xl(0, x+(i   ));\n        x1_fp32vec4  = vec_xl(0, x+(i+4 ));\n        x2_fp32vec4  = vec_xl(0, x+(i+8 ));\n        x3_fp32vec4  = vec_xl(0, x+(i+12 ));\n\n        y0_fp32vec4  = vec_madd(y0_fp32vec4, c_fp32vec4, x0_fp32vec4);\n        y1_fp32vec4  = vec_madd(y1_fp32vec4, c_fp32vec4, x1_fp32vec4);\n        y2_fp32vec4  = vec_madd(y2_fp32vec4, c_fp32vec4, x2_fp32vec4);\n        y3_fp32vec4  = vec_madd(y3_fp32vec4, c_fp32vec4, x3_fp32vec4);\n\n        vec_xst(y0_fp32vec4,  0, z+(i   ));\n        vec_xst(y1_fp32vec4,  0, z+(i+4 ));\n        vec_xst(y2_fp32vec4,  0, z+(i+8 ));\n        vec_xst(y3_fp32vec4,  0, z+(i+12 ));\n    }\n    for (; i <= n-4; i += 4)\n    {\n        y0_fp32vec4  = vec_xl(0, y+(i   ));\n        x0_fp32vec4  = vec_xl(0, x+(i   ));\n        y0_fp32vec4  = vec_madd(y0_fp32vec4, c_fp32vec4, x0_fp32vec4);\n        vec_xst(y0_fp32vec4,  0, z+(i   ));\n    }\n    for (; i < n; i++)\n        z[i] = x[i] + c* y[i];\n}\n\n\n//--------------------------------------------------------------------------------------------------\n// THFloatVector_adds_VSX:\n//--------------------------------------------------------------------------------------------------\nstatic void THFloatVector_adds_VSX(float *y, const float *x, const float c, const ptrdiff_t n)\n{\n    ptrdiff_t i;\n    float val[4] = {c, c, c, c};\n    vector float c_fp32vec4 = vec_xl(0, val);\n\n    vector float y0_fp32vec4, y1_fp32vec4, y2_fp32vec4, y3_fp32vec4, y4_fp32vec4, y5_fp32vec4, y6_fp32vec4, y7_fp32vec4;\n    vector float y8_fp32vec4, y9_fp32vec4, y10_fp32vec4, y11_fp32vec4;\n    vector float x0_fp32vec4, x1_fp32vec4, x2_fp32vec4, x3_fp32vec4, x4_fp32vec4, x5_fp32vec4, x6_fp32vec4, x7_fp32vec4;\n    vector float x8_fp32vec4, x9_fp32vec4, x10_fp32vec4, x11_fp32vec4;\n\n\n    for (i = 0; i <= n-48; i += 48)\n    {\n        x0_fp32vec4  = vec_xl(0, x+(i   ));\n        x1_fp32vec4  = vec_xl(0, x+(i+4 ));\n        x2_fp32vec4  = vec_xl(0, x+(i+8 ));\n        x3_fp32vec4  = vec_xl(0, x+(i+12));\n        x4_fp32vec4  = vec_xl(0, x+(i+16));\n        x5_fp32vec4  = vec_xl(0, x+(i+20));\n        x6_fp32vec4  = vec_xl(0, x+(i+24));\n        x7_fp32vec4  = vec_xl(0, x+(i+28));\n        x8_fp32vec4  = vec_xl(0, x+(i+32));\n        x9_fp32vec4  = vec_xl(0, x+(i+36));\n        x10_fp32vec4 = vec_xl(0, x+(i+40));\n        x11_fp32vec4 = vec_xl(0, x+(i+44));\n\n        y0_fp32vec4  = vec_add(x0_fp32vec4,  c_fp32vec4);\n        y1_fp32vec4  = vec_add(x1_fp32vec4,  c_fp32vec4);\n        y2_fp32vec4  = vec_add(x2_fp32vec4,  c_fp32vec4);\n        y3_fp32vec4  = vec_add(x3_fp32vec4,  c_fp32vec4);\n        y4_fp32vec4  = vec_add(x4_fp32vec4,  c_fp32vec4);\n        y5_fp32vec4  = vec_add(x5_fp32vec4,  c_fp32vec4);\n        y6_fp32vec4  = vec_add(x6_fp32vec4,  c_fp32vec4);\n        y7_fp32vec4  = vec_add(x7_fp32vec4,  c_fp32vec4);\n        y8_fp32vec4  = vec_add(x8_fp32vec4,  c_fp32vec4);\n        y9_fp32vec4  = vec_add(x9_fp32vec4,  c_fp32vec4);\n        y10_fp32vec4 = vec_add(x10_fp32vec4, c_fp32vec4);\n        y11_fp32vec4 = vec_add(x11_fp32vec4, c_fp32vec4);\n\n        vec_xst(y0_fp32vec4,  0, y+(i   ));\n        vec_xst(y1_fp32vec4,  0, y+(i+4 ));\n        vec_xst(y2_fp32vec4,  0, y+(i+8 ));\n        vec_xst(y3_fp32vec4,  0, y+(i+12));\n        vec_xst(y4_fp32vec4,  0, y+(i+16));\n        vec_xst(y5_fp32vec4,  0, y+(i+20));\n        vec_xst(y6_fp32vec4,  0, y+(i+24));\n        vec_xst(y7_fp32vec4,  0, y+(i+28));\n        vec_xst(y8_fp32vec4,  0, y+(i+32));\n        vec_xst(y9_fp32vec4,  0, y+(i+36));\n        vec_xst(y10_fp32vec4, 0, y+(i+40));\n        vec_xst(y11_fp32vec4, 0, y+(i+44));\n    }\n    for (; i <= n-16; i += 16)\n    {\n        x0_fp32vec4  = vec_xl(0, x+(i   ));\n        x1_fp32vec4  = vec_xl(0, x+(i+4 ));\n        x2_fp32vec4  = vec_xl(0, x+(i+8 ));\n        x3_fp32vec4  = vec_xl(0, x+(i+12));\n\n        y0_fp32vec4  = vec_add(x0_fp32vec4,  c_fp32vec4);\n        y1_fp32vec4  = vec_add(x1_fp32vec4,  c_fp32vec4);\n        y2_fp32vec4  = vec_add(x2_fp32vec4,  c_fp32vec4);\n        y3_fp32vec4  = vec_add(x3_fp32vec4,  c_fp32vec4);\n\n        vec_xst(y0_fp32vec4,  0, y+(i   ));\n        vec_xst(y1_fp32vec4,  0, y+(i+4 ));\n        vec_xst(y2_fp32vec4,  0, y+(i+8 ));\n        vec_xst(y3_fp32vec4,  0, y+(i+12));\n    }\n    for (; i <= n-4; i += 4)\n    {\n        x0_fp32vec4  = vec_xl(0, x+(i   ));\n        y0_fp32vec4  = vec_add(x0_fp32vec4,  c_fp32vec4);\n        vec_xst(y0_fp32vec4,  0, y+(i   ));\n    }\n    for (; i < n; i++)\n        y[i] = c + x[i];\n}\n\n\n//--------------------------------------------------------------------------------------------------\n// THFloatVector_cmul_VSX:\n//--------------------------------------------------------------------------------------------------\nstatic void THFloatVector_cmul_VSX(float *z, const float *y, const float *x, const ptrdiff_t n)\n{\n    ptrdiff_t i;\n\n    vector float y0_fp32vec4, y1_fp32vec4, y2_fp32vec4, y3_fp32vec4, y4_fp32vec4, y5_fp32vec4, y6_fp32vec4, y7_fp32vec4;\n    vector float y8_fp32vec4, y9_fp32vec4, y10_fp32vec4, y11_fp32vec4;\n    vector float x0_fp32vec4, x1_fp32vec4, x2_fp32vec4, x3_fp32vec4, x4_fp32vec4, x5_fp32vec4, x6_fp32vec4, x7_fp32vec4;\n    vector float x8_fp32vec4, x9_fp32vec4, x10_fp32vec4, x11_fp32vec4;\n\n\n    for (i = 0; i <= n-48; i += 48)\n    {\n        y0_fp32vec4  = vec_xl(0, y+(i   ));\n        y1_fp32vec4  = vec_xl(0, y+(i+4 ));\n        y2_fp32vec4  = vec_xl(0, y+(i+8 ));\n        y3_fp32vec4  = vec_xl(0, y+(i+12 ));\n        y4_fp32vec4  = vec_xl(0, y+(i+16 ));\n        y5_fp32vec4  = vec_xl(0, y+(i+20));\n        y6_fp32vec4  = vec_xl(0, y+(i+24));\n        y7_fp32vec4  = vec_xl(0, y+(i+28));\n        y8_fp32vec4  = vec_xl(0, y+(i+32));\n        y9_fp32vec4  = vec_xl(0, y+(i+36));\n        y10_fp32vec4 = vec_xl(0, y+(i+40));\n        y11_fp32vec4 = vec_xl(0, y+(i+44));\n\n        x0_fp32vec4  = vec_xl(0, x+(i   ));\n        x1_fp32vec4  = vec_xl(0, x+(i+4 ));\n        x2_fp32vec4  = vec_xl(0, x+(i+8 ));\n        x3_fp32vec4  = vec_xl(0, x+(i+12 ));\n        x4_fp32vec4  = vec_xl(0, x+(i+16 ));\n        x5_fp32vec4  = vec_xl(0, x+(i+20));\n        x6_fp32vec4  = vec_xl(0, x+(i+24));\n        x7_fp32vec4  = vec_xl(0, x+(i+28));\n        x8_fp32vec4  = vec_xl(0, x+(i+32));\n        x9_fp32vec4  = vec_xl(0, x+(i+36));\n        x10_fp32vec4 = vec_xl(0, x+(i+40));\n        x11_fp32vec4 = vec_xl(0, x+(i+44));\n\n        y0_fp32vec4  = vec_mul(y0_fp32vec4,  x0_fp32vec4);\n        y1_fp32vec4  = vec_mul(y1_fp32vec4,  x1_fp32vec4);\n        y2_fp32vec4  = vec_mul(y2_fp32vec4,  x2_fp32vec4);\n        y3_fp32vec4  = vec_mul(y3_fp32vec4,  x3_fp32vec4);\n        y4_fp32vec4  = vec_mul(y4_fp32vec4,  x4_fp32vec4);\n        y5_fp32vec4  = vec_mul(y5_fp32vec4,  x5_fp32vec4);\n        y6_fp32vec4  = vec_mul(y6_fp32vec4,  x6_fp32vec4);\n        y7_fp32vec4  = vec_mul(y7_fp32vec4,  x7_fp32vec4);\n        y8_fp32vec4  = vec_mul(y8_fp32vec4,  x8_fp32vec4);\n        y9_fp32vec4  = vec_mul(y9_fp32vec4,  x9_fp32vec4);\n        y10_fp32vec4 = vec_mul(y10_fp32vec4, x10_fp32vec4);\n        y11_fp32vec4 = vec_mul(y11_fp32vec4, x11_fp32vec4);\n\n        vec_xst(y0_fp32vec4,  0, z+(i   ));\n        vec_xst(y1_fp32vec4,  0, z+(i+4 ));\n        vec_xst(y2_fp32vec4,  0, z+(i+8 ));\n        vec_xst(y3_fp32vec4,  0, z+(i+12 ));\n        vec_xst(y4_fp32vec4,  0, z+(i+16 ));\n        vec_xst(y5_fp32vec4,  0, z+(i+20));\n        vec_xst(y6_fp32vec4,  0, z+(i+24));\n        vec_xst(y7_fp32vec4,  0, z+(i+28));\n        vec_xst(y8_fp32vec4,  0, z+(i+32));\n        vec_xst(y9_fp32vec4,  0, z+(i+36));\n        vec_xst(y10_fp32vec4, 0, z+(i+40));\n        vec_xst(y11_fp32vec4, 0, z+(i+44));\n    }\n    for (; i <= n-16; i += 16)\n    {\n        y0_fp32vec4  = vec_xl(0, y+(i   ));\n        y1_fp32vec4  = vec_xl(0, y+(i+4 ));\n        y2_fp32vec4  = vec_xl(0, y+(i+8 ));\n        y3_fp32vec4  = vec_xl(0, y+(i+12 ));\n\n        x0_fp32vec4  = vec_xl(0, x+(i   ));\n        x1_fp32vec4  = vec_xl(0, x+(i+4 ));\n        x2_fp32vec4  = vec_xl(0, x+(i+8 ));\n        x3_fp32vec4  = vec_xl(0, x+(i+12 ));\n\n        y0_fp32vec4  = vec_mul(y0_fp32vec4,  x0_fp32vec4);\n        y1_fp32vec4  = vec_mul(y1_fp32vec4,  x1_fp32vec4);\n        y2_fp32vec4  = vec_mul(y2_fp32vec4,  x2_fp32vec4);\n        y3_fp32vec4  = vec_mul(y3_fp32vec4,  x3_fp32vec4);\n\n        vec_xst(y0_fp32vec4,  0, z+(i   ));\n        vec_xst(y1_fp32vec4,  0, z+(i+4 ));\n        vec_xst(y2_fp32vec4,  0, z+(i+8 ));\n        vec_xst(y3_fp32vec4,  0, z+(i+12 ));\n    }\n    for (; i <= n-4; i += 4)\n    {\n        y0_fp32vec4  = vec_xl(0, y+(i   ));\n        x0_fp32vec4  = vec_xl(0, x+(i   ));\n        y0_fp32vec4  = vec_mul(y0_fp32vec4,  x0_fp32vec4);\n        vec_xst(y0_fp32vec4,  0, z+(i   ));\n    }\n    for (; i < n; i++)\n        z[i] = y[i] * x[i];\n}\n\n\n//--------------------------------------------------------------------------------------------------\n// THFloatVector_muls_VSX:\n//--------------------------------------------------------------------------------------------------\nstatic void THFloatVector_muls_VSX(float *y, const float *x, const float c, const ptrdiff_t n)\n{\n    ptrdiff_t i;\n    float val[4] = {c, c, c, c};\n    vector float c_fp32vec4 = vec_xl(0, val);\n\n    vector float y0_fp32vec4, y1_fp32vec4, y2_fp32vec4, y3_fp32vec4, y4_fp32vec4, y5_fp32vec4, y6_fp32vec4, y7_fp32vec4;\n    vector float y8_fp32vec4, y9_fp32vec4, y10_fp32vec4, y11_fp32vec4;\n    vector float x0_fp32vec4, x1_fp32vec4, x2_fp32vec4, x3_fp32vec4, x4_fp32vec4, x5_fp32vec4, x6_fp32vec4, x7_fp32vec4;\n    vector float x8_fp32vec4, x9_fp32vec4, x10_fp32vec4, x11_fp32vec4;\n\n\n    for (i = 0; i <= n-48; i += 48)\n    {\n        x0_fp32vec4  = vec_xl(0, x+(i   ));\n        x1_fp32vec4  = vec_xl(0, x+(i+4 ));\n        x2_fp32vec4  = vec_xl(0, x+(i+8 ));\n        x3_fp32vec4  = vec_xl(0, x+(i+12));\n        x4_fp32vec4  = vec_xl(0, x+(i+16));\n        x5_fp32vec4  = vec_xl(0, x+(i+20));\n        x6_fp32vec4  = vec_xl(0, x+(i+24));\n        x7_fp32vec4  = vec_xl(0, x+(i+28));\n        x8_fp32vec4  = vec_xl(0, x+(i+32));\n        x9_fp32vec4  = vec_xl(0, x+(i+36));\n        x10_fp32vec4 = vec_xl(0, x+(i+40));\n        x11_fp32vec4 = vec_xl(0, x+(i+44));\n\n        y0_fp32vec4  = vec_mul(x0_fp32vec4,  c_fp32vec4);\n        y1_fp32vec4  = vec_mul(x1_fp32vec4,  c_fp32vec4);\n        y2_fp32vec4  = vec_mul(x2_fp32vec4,  c_fp32vec4);\n        y3_fp32vec4  = vec_mul(x3_fp32vec4,  c_fp32vec4);\n        y4_fp32vec4  = vec_mul(x4_fp32vec4,  c_fp32vec4);\n        y5_fp32vec4  = vec_mul(x5_fp32vec4,  c_fp32vec4);\n        y6_fp32vec4  = vec_mul(x6_fp32vec4,  c_fp32vec4);\n        y7_fp32vec4  = vec_mul(x7_fp32vec4,  c_fp32vec4);\n        y8_fp32vec4  = vec_mul(x8_fp32vec4,  c_fp32vec4);\n        y9_fp32vec4  = vec_mul(x9_fp32vec4,  c_fp32vec4);\n        y10_fp32vec4 = vec_mul(x10_fp32vec4, c_fp32vec4);\n        y11_fp32vec4 = vec_mul(x11_fp32vec4, c_fp32vec4);\n\n        vec_xst(y0_fp32vec4,  0, y+(i   ));\n        vec_xst(y1_fp32vec4,  0, y+(i+4 ));\n        vec_xst(y2_fp32vec4,  0, y+(i+8 ));\n        vec_xst(y3_fp32vec4,  0, y+(i+12));\n        vec_xst(y4_fp32vec4,  0, y+(i+16));\n        vec_xst(y5_fp32vec4,  0, y+(i+20));\n        vec_xst(y6_fp32vec4,  0, y+(i+24));\n        vec_xst(y7_fp32vec4,  0, y+(i+28));\n        vec_xst(y8_fp32vec4,  0, y+(i+32));\n        vec_xst(y9_fp32vec4,  0, y+(i+36));\n        vec_xst(y10_fp32vec4, 0, y+(i+40));\n        vec_xst(y11_fp32vec4, 0, y+(i+44));\n    }\n    for (; i <= n-16; i += 16)\n    {\n        x0_fp32vec4  = vec_xl(0, x+(i   ));\n        x1_fp32vec4  = vec_xl(0, x+(i+4 ));\n        x2_fp32vec4  = vec_xl(0, x+(i+8 ));\n        x3_fp32vec4  = vec_xl(0, x+(i+12));\n\n        y0_fp32vec4  = vec_mul(x0_fp32vec4,  c_fp32vec4);\n        y1_fp32vec4  = vec_mul(x1_fp32vec4,  c_fp32vec4);\n        y2_fp32vec4  = vec_mul(x2_fp32vec4,  c_fp32vec4);\n        y3_fp32vec4  = vec_mul(x3_fp32vec4,  c_fp32vec4);\n\n        vec_xst(y0_fp32vec4,  0, y+(i   ));\n        vec_xst(y1_fp32vec4,  0, y+(i+4 ));\n        vec_xst(y2_fp32vec4,  0, y+(i+8 ));\n        vec_xst(y3_fp32vec4,  0, y+(i+12));\n    }\n    for (; i <= n-4; i += 4)\n    {\n        x0_fp32vec4  = vec_xl(0, x+(i   ));\n        y0_fp32vec4  = vec_mul(x0_fp32vec4,  c_fp32vec4);\n        vec_xst(y0_fp32vec4,  0, y+(i   ));\n    }\n    for (; i < n; i++)\n        y[i] = c * x[i];\n}\n\n\n//--------------------------------------------------------------------------------------------------\n// THFloatVector_cdiv_VSX:\n//--------------------------------------------------------------------------------------------------\nstatic void THFloatVector_cdiv_VSX(float *z, const float *x, const float *y, const ptrdiff_t n)\n{\n    ptrdiff_t i;\n\n    vector float y0_fp32vec4, y1_fp32vec4, y2_fp32vec4, y3_fp32vec4, y4_fp32vec4, y5_fp32vec4, y6_fp32vec4, y7_fp32vec4;\n    vector float y8_fp32vec4, y9_fp32vec4, y10_fp32vec4, y11_fp32vec4;\n    vector float x0_fp32vec4, x1_fp32vec4, x2_fp32vec4, x3_fp32vec4, x4_fp32vec4, x5_fp32vec4, x6_fp32vec4, x7_fp32vec4;\n    vector float x8_fp32vec4, x9_fp32vec4, x10_fp32vec4, x11_fp32vec4;\n\n\n    for (i = 0; i <= n-48; i += 48)\n    {\n        y0_fp32vec4  = vec_xl(0, y+(i   ));\n        y1_fp32vec4  = vec_xl(0, y+(i+4));\n        y2_fp32vec4  = vec_xl(0, y+(i+8));\n        y3_fp32vec4  = vec_xl(0, y+(i+12));\n        y4_fp32vec4  = vec_xl(0, y+(i+16));\n        y5_fp32vec4  = vec_xl(0, y+(i+20));\n        y6_fp32vec4  = vec_xl(0, y+(i+24));\n        y7_fp32vec4  = vec_xl(0, y+(i+28));\n        y8_fp32vec4  = vec_xl(0, y+(i+32));\n        y9_fp32vec4  = vec_xl(0, y+(i+36));\n        y10_fp32vec4 = vec_xl(0, y+(i+40));\n        y11_fp32vec4 = vec_xl(0, y+(i+44));\n\n        x0_fp32vec4  = vec_xl(0, x+(i   ));\n        x1_fp32vec4  = vec_xl(0, x+(i+4 ));\n        x2_fp32vec4  = vec_xl(0, x+(i+8 ));\n        x3_fp32vec4  = vec_xl(0, x+(i+12 ));\n        x4_fp32vec4  = vec_xl(0, x+(i+16 ));\n        x5_fp32vec4  = vec_xl(0, x+(i+20));\n        x6_fp32vec4  = vec_xl(0, x+(i+24));\n        x7_fp32vec4  = vec_xl(0, x+(i+28));\n        x8_fp32vec4  = vec_xl(0, x+(i+32));\n        x9_fp32vec4  = vec_xl(0, x+(i+36));\n        x10_fp32vec4 = vec_xl(0, x+(i+40));\n        x11_fp32vec4 = vec_xl(0, x+(i+44));\n\n        y0_fp32vec4  = vec_div(x0_fp32vec4,  y0_fp32vec4);\n        y1_fp32vec4  = vec_div(x1_fp32vec4,  y1_fp32vec4);\n        y2_fp32vec4  = vec_div(x2_fp32vec4,  y2_fp32vec4);\n        y3_fp32vec4  = vec_div(x3_fp32vec4,  y3_fp32vec4);\n        y4_fp32vec4  = vec_div(x4_fp32vec4,  y4_fp32vec4);\n        y5_fp32vec4  = vec_div(x5_fp32vec4,  y5_fp32vec4);\n        y6_fp32vec4  = vec_div(x6_fp32vec4,  y6_fp32vec4);\n        y7_fp32vec4  = vec_div(x7_fp32vec4,  y7_fp32vec4);\n        y8_fp32vec4  = vec_div(x8_fp32vec4,  y8_fp32vec4);\n        y9_fp32vec4  = vec_div(x9_fp32vec4,  y9_fp32vec4);\n        y10_fp32vec4 = vec_div(x10_fp32vec4, y10_fp32vec4);\n        y11_fp32vec4 = vec_div(x11_fp32vec4, y11_fp32vec4);\n\n        vec_xst(y0_fp32vec4,  0, z+(i   ));\n        vec_xst(y1_fp32vec4,  0, z+(i+4 ));\n        vec_xst(y2_fp32vec4,  0, z+(i+8 ));\n        vec_xst(y3_fp32vec4,  0, z+(i+12 ));\n        vec_xst(y4_fp32vec4,  0, z+(i+16 ));\n        vec_xst(y5_fp32vec4,  0, z+(i+20));\n        vec_xst(y6_fp32vec4,  0, z+(i+24));\n        vec_xst(y7_fp32vec4,  0, z+(i+28));\n        vec_xst(y8_fp32vec4,  0, z+(i+32));\n        vec_xst(y9_fp32vec4,  0, z+(i+36));\n        vec_xst(y10_fp32vec4, 0, z+(i+40));\n        vec_xst(y11_fp32vec4, 0, z+(i+44));\n    }\n    for (; i <= n-16; i += 16)\n    {\n        y0_fp32vec4  = vec_xl(0, y+(i   ));\n        y1_fp32vec4  = vec_xl(0, y+(i+4 ));\n        y2_fp32vec4  = vec_xl(0, y+(i+8 ));\n        y3_fp32vec4  = vec_xl(0, y+(i+12 ));\n\n        x0_fp32vec4  = vec_xl(0, x+(i   ));\n        x1_fp32vec4  = vec_xl(0, x+(i+4 ));\n        x2_fp32vec4  = vec_xl(0, x+(i+8 ));\n        x3_fp32vec4  = vec_xl(0, x+(i+12 ));\n\n        y0_fp32vec4  = vec_div(x0_fp32vec4,  y0_fp32vec4);\n        y1_fp32vec4  = vec_div(x1_fp32vec4,  y1_fp32vec4);\n        y2_fp32vec4  = vec_div(x2_fp32vec4,  y2_fp32vec4);\n        y3_fp32vec4  = vec_div(x3_fp32vec4,  y3_fp32vec4);\n\n        vec_xst(y0_fp32vec4,  0, z+(i   ));\n        vec_xst(y1_fp32vec4,  0, z+(i+4 ));\n        vec_xst(y2_fp32vec4,  0, z+(i+8 ));\n        vec_xst(y3_fp32vec4,  0, z+(i+12 ));\n    }\n    for (; i <= n-4; i += 4)\n    {\n        y0_fp32vec4  = vec_xl(0, y+(i   ));\n        x0_fp32vec4  = vec_xl(0, x+(i   ));\n        y0_fp32vec4  = vec_div(x0_fp32vec4,  y0_fp32vec4);\n        vec_xst(y0_fp32vec4,  0, z+(i   ));\n    }\n    for (; i < n; i++)\n        z[i] = x[i] / y[i];\n}\n\n\n//--------------------------------------------------------------------------------------------------\n// THFloatVector_divs_VSX:\n//--------------------------------------------------------------------------------------------------\nstatic void THFloatVector_divs_VSX(float *y, const float*x, const float c, const ptrdiff_t n)\n{\n    ptrdiff_t i;\n\n    float val[4] = {c, c, c, c};\n    vector float c_fp64vec2 = vec_xl(0, val);\n\n    vector float y0_fp64vec2, y1_fp64vec2, y2_fp64vec2, y3_fp64vec2, y4_fp64vec2, y5_fp64vec2, y6_fp64vec2, y7_fp64vec2;\n    vector float y8_fp64vec2, y9_fp64vec2, y10_fp64vec2, y11_fp64vec2;\n    vector float x0_fp64vec2, x1_fp64vec2, x2_fp64vec2, x3_fp64vec2, x4_fp64vec2, x5_fp64vec2, x6_fp64vec2, x7_fp64vec2;\n    vector float x8_fp64vec2, x9_fp64vec2, x10_fp64vec2, x11_fp64vec2;\n\n\n    for (i = 0; i <= n-48; i += 48)\n    {\n        x0_fp64vec2  = vec_xl(0, x+(i   ));\n        x1_fp64vec2  = vec_xl(0, x+(i+4 ));\n        x2_fp64vec2  = vec_xl(0, x+(i+8 ));\n        x3_fp64vec2  = vec_xl(0, x+(i+12 ));\n        x4_fp64vec2  = vec_xl(0, x+(i+16 ));\n        x5_fp64vec2  = vec_xl(0, x+(i+20));\n        x6_fp64vec2  = vec_xl(0, x+(i+24));\n        x7_fp64vec2  = vec_xl(0, x+(i+28));\n        x8_fp64vec2  = vec_xl(0, x+(i+32));\n        x9_fp64vec2  = vec_xl(0, x+(i+36));\n        x10_fp64vec2 = vec_xl(0, x+(i+40));\n        x11_fp64vec2 = vec_xl(0, x+(i+44));\n\n        y0_fp64vec2  = vec_div(x0_fp64vec2,  c_fp64vec2);\n        y1_fp64vec2  = vec_div(x1_fp64vec2,  c_fp64vec2);\n        y2_fp64vec2  = vec_div(x2_fp64vec2,  c_fp64vec2);\n        y3_fp64vec2  = vec_div(x3_fp64vec2,  c_fp64vec2);\n        y4_fp64vec2  = vec_div(x4_fp64vec2,  c_fp64vec2);\n        y5_fp64vec2  = vec_div(x5_fp64vec2,  c_fp64vec2);\n        y6_fp64vec2  = vec_div(x6_fp64vec2,  c_fp64vec2);\n        y7_fp64vec2  = vec_div(x7_fp64vec2,  c_fp64vec2);\n        y8_fp64vec2  = vec_div(x8_fp64vec2,  c_fp64vec2);\n        y9_fp64vec2  = vec_div(x9_fp64vec2,  c_fp64vec2);\n        y10_fp64vec2 = vec_div(x10_fp64vec2, c_fp64vec2);\n        y11_fp64vec2 = vec_div(x11_fp64vec2, c_fp64vec2);\n        \n\n        vec_xst(y0_fp64vec2,  0, y+(i   ));\n        vec_xst(y1_fp64vec2,  0, y+(i+4 ));\n        vec_xst(y2_fp64vec2,  0, y+(i+8 ));\n        vec_xst(y3_fp64vec2,  0, y+(i+12 ));\n        vec_xst(y4_fp64vec2,  0, y+(i+16 ));\n        vec_xst(y5_fp64vec2,  0, y+(i+20));\n        vec_xst(y6_fp64vec2,  0, y+(i+24));\n        vec_xst(y7_fp64vec2,  0, y+(i+28));\n        vec_xst(y8_fp64vec2,  0, y+(i+32));\n        vec_xst(y9_fp64vec2,  0, y+(i+36));\n        vec_xst(y10_fp64vec2, 0, y+(i+40));\n        vec_xst(y11_fp64vec2, 0, y+(i+44));\n    }\n    for (; i <= n-16; i += 16)\n    {\n        x0_fp64vec2  = vec_xl(0, x+(i   ));\n        x1_fp64vec2  = vec_xl(0, x+(i+4 ));\n        x2_fp64vec2  = vec_xl(0, x+(i+8 ));\n        x3_fp64vec2  = vec_xl(0, x+(i+12 ));\n\n        y0_fp64vec2  = vec_div(x0_fp64vec2,  c_fp64vec2);\n        y1_fp64vec2  = vec_div(x1_fp64vec2,  c_fp64vec2);\n        y2_fp64vec2  = vec_div(x2_fp64vec2,  c_fp64vec2);\n        y3_fp64vec2  = vec_div(x3_fp64vec2,  c_fp64vec2);\n\n        vec_xst(y0_fp64vec2,  0, y+(i   ));\n        vec_xst(y1_fp64vec2,  0, y+(i+4 ));\n        vec_xst(y2_fp64vec2,  0, y+(i+8 ));\n        vec_xst(y3_fp64vec2,  0, y+(i+12 ));\n\n        vec_xst(y0_fp64vec2,  0, y+(i   ));\n        vec_xst(y1_fp64vec2,  0, y+(i+4 ));\n        vec_xst(y2_fp64vec2,  0, y+(i+8 ));\n        vec_xst(y3_fp64vec2,  0, y+(i+16 ));\n    }\n    for (; i <= n-4; i += 4)\n    {\n        x0_fp64vec2  = vec_xl(0, x+(i   ));\n        y0_fp64vec2  = vec_div(x0_fp64vec2,  c_fp64vec2);\n        vec_xst(y0_fp64vec2,  0, y+(i   ));\n    }\n    for (; i < n; i++)\n        y[i] = x[i] / c;\n}\n\n\n//------------------------------------------------\n//\n// Testing for correctness and performance\n//\n// If you want to run these tests, compile this\n// file with -DRUN_VSX_TESTS on a Power machine,\n// and then run the executable that is generated.\n//\n//------------------------------------------------\n//\n// Example passing run (from a Power8 machine):\n//\n//    $ gcc VSX.c -O2 -D RUN_VSX_TESTS -o vsxtest\n//    $ ./vsxtest\n//\n//\tTODO\n//\n//\n//    Finished running all tests. All tests PASSED.\n//\n//------------------------------------------------\n#ifdef RUN_VSX_TESTS\n\n#include <stdio.h>\n#include <stdlib.h>\n#include <time.h>\n#include <assert.h>\n#include <math.h>\n\n#define VSX_PERF_NUM_TEST_ELEMENTS 100000000\n#define VSX_FUNC_NUM_TEST_ELEMENTS 2507\n\n\n//--------------------------------------------------------------------------------------------------\n// Standard implementations:\n//--------------------------------------------------------------------------------------------------\nstatic void standardDouble_fill(double *x, const double c, const ptrdiff_t n)\n{\n    for (ptrdiff_t i = 0; i < n; i++)\n        x[i] = c;\n}\n\nstatic void standardFloat_fill(float *x, const float c, const ptrdiff_t n)\n{\n    for (ptrdiff_t i = 0; i < n; i++)\n        x[i] = c;\n}\n\nstatic void standardDouble_cadd(double *z, const double *x,  const double *y, const double c, const ptrdiff_t n)\n{\n  for (ptrdiff_t i = 0; i < n; i++)\n    z[i] = x[i] + c * y[i];\n}\n\nstatic void standardFloat_cadd(float *z, const float *x, const float *y, const float c, const ptrdiff_t n)\n{\n  for (ptrdiff_t i = 0; i < n; i++)\n    z[i] = x[i] + c * y[i];\n}\n\nstatic void standardDouble_adds(double *y, const double *x, const double c, const ptrdiff_t n)\n{\n  for (ptrdiff_t i = 0; i < n; i++)\n    y[i] = c + x[i];\n}\n\nstatic void standardFloat_adds(float *y, const float *x, const float c, const ptrdiff_t n)\n{\n  for (ptrdiff_t i = 0; i < n; i++)\n    y[i] = c + x[i];\n}\n\nstatic void standardDouble_cmul(double *z, const double *x,  const double *y, const ptrdiff_t n)\n{\n  for (ptrdiff_t i = 0; i < n; i++)\n    z[i] = x[i] * y[i];\n}\n\nstatic void standardFloat_cmul(float *z, const float *x, const float *y, const ptrdiff_t n)\n{\n  for (ptrdiff_t i = 0; i < n; i++)\n    z[i] = x[i] * y[i];\n}\n\nstatic void standardDouble_muls(double *y, const double *x, const double c, const ptrdiff_t n)\n{\n  for (ptrdiff_t i = 0; i < n; i++)\n    y[i] = c * x[i];\n}\n\nstatic void standardFloat_muls(float *y, const float *x, const float c, const ptrdiff_t n)\n{\n  for (ptrdiff_t i = 0; i < n; i++)\n    y[i] = c * x[i];\n}\n\nstatic void standardDouble_cdiv(double *z, const double *x,  const double *y, const ptrdiff_t n)\n{\n  for (ptrdiff_t i = 0; i < n; i++)\n    z[i] = x[i] / y[i];\n}\n\nstatic void standardFloat_cdiv(float *z, const float *x, const float *y, const ptrdiff_t n)\n{\n  for (ptrdiff_t i = 0; i < n; i++)\n    z[i] = x[i] / y[i];\n}\n\nstatic void standardDouble_divs(double *y, const double *x, const double c, const ptrdiff_t n)\n{\n  for (ptrdiff_t i = 0; i < n; i++)\n    y[i] = x[i] / c;\n}\n\nstatic void standardFloat_divs(float *y, const float *x, const float c, const ptrdiff_t n)\n{\n  for (ptrdiff_t i = 0; i < n; i++)\n    y[i] = x[i] / c;\n}\n\ndouble randDouble()\n{\n    return (double)(rand()%100)/(double)(rand()%100) * (rand()%2 ? -1.0 : 1.0);\n}\n\nint near(double a, double b)\n{\n    int aClass = fpclassify(a);\n    int bClass = fpclassify(b);\n\n    if(aClass != bClass)             // i.e. is it NAN, infinite, or finite...?\n        return 0;\n\n    if(aClass == FP_INFINITE)       // if it is infinite, the sign must be the same, i.e. positive infinity is not near negative infinity\n        return (signbit(a) == signbit(b));\n    else if(aClass == FP_NORMAL)    // if it is a normal number then check the magnitude of the difference between the numbers\n        return fabs(a - b) < 0.001;\n    else                            // if both number are of the same class as each other and are of any other class (i.e. such as NAN), then they are near to each other.\n        return 1;\n}\n\n\n//--------------------------------------------------------------------------------------------------\n// Standard tests:\n//--------------------------------------------------------------------------------------------------\nvoid test_THDoubleVector_fill_VSX()\n{\n    clock_t start, end;\n    double elapsedSeconds_optimized, elapsedSeconds_standard;\n\n    double *x_standard  = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));\n    double *x_optimized = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));\n\n    double yVal0 = 17.2;\n    double yVal1 = 8.2;\n    double yVal2 = 5.1;\n    double yVal3 = -0.9;\n\n    //-------------------------------------------------\n    // Performance Test\n    //-------------------------------------------------\n    start = clock();\n    standardDouble_fill(x_standard, yVal0, VSX_PERF_NUM_TEST_ELEMENTS  );\n    standardDouble_fill(x_standard, yVal1, VSX_PERF_NUM_TEST_ELEMENTS-1);\n    standardDouble_fill(x_standard, yVal2, VSX_PERF_NUM_TEST_ELEMENTS-2);\n    standardDouble_fill(x_standard, yVal3, VSX_PERF_NUM_TEST_ELEMENTS-3);\n    end = clock();\n\n    elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC;\n    printf(\"standardDouble_fill() test took %.5lf seconds\\n\", elapsedSeconds_standard);\n\n    start = clock();\n    THDoubleVector_fill_VSX(x_optimized, yVal0, VSX_PERF_NUM_TEST_ELEMENTS  );\n    THDoubleVector_fill_VSX(x_optimized, yVal1, VSX_PERF_NUM_TEST_ELEMENTS-1);\n    THDoubleVector_fill_VSX(x_optimized, yVal2, VSX_PERF_NUM_TEST_ELEMENTS-2);\n    THDoubleVector_fill_VSX(x_optimized, yVal3, VSX_PERF_NUM_TEST_ELEMENTS-3);\n    end = clock();\n\n    elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC;\n    printf(\"THDoubleVector_fill_VSX() test took %.5lf seconds\\n\", elapsedSeconds_optimized);\n\n\n    //-------------------------------------------------\n    // Correctness Test\n    //-------------------------------------------------\n    yVal0 += 1.0;\n    yVal1 += 1.0;\n    yVal2 += 1.0;\n    yVal3 -= 1.0;\n\n    standardDouble_fill(    x_standard,  yVal0, VSX_FUNC_NUM_TEST_ELEMENTS);\n    THDoubleVector_fill_VSX(x_optimized, yVal0, VSX_FUNC_NUM_TEST_ELEMENTS);\n    for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)\n        assert(x_optimized[i] == yVal0);\n\n    standardDouble_fill(    x_standard+1,  yVal1, VSX_FUNC_NUM_TEST_ELEMENTS-2);\n    THDoubleVector_fill_VSX(x_optimized+1, yVal1, VSX_FUNC_NUM_TEST_ELEMENTS-2);\n    standardDouble_fill(    x_standard+2,  yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-4);\n    THDoubleVector_fill_VSX(x_optimized+2, yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-4);\n    standardDouble_fill(    x_standard+3,  yVal3, VSX_FUNC_NUM_TEST_ELEMENTS-6);\n    THDoubleVector_fill_VSX(x_optimized+3, yVal3, VSX_FUNC_NUM_TEST_ELEMENTS-6);\n    standardDouble_fill(    x_standard+517,  yVal0, VSX_FUNC_NUM_TEST_ELEMENTS-1029);\n    THDoubleVector_fill_VSX(x_optimized+517, yVal0, VSX_FUNC_NUM_TEST_ELEMENTS-1029);\n    int r = rand() % 258;\n    standardDouble_fill(    x_standard+517+r,  yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));\n    THDoubleVector_fill_VSX(x_optimized+517+r, yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));\n    for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)\n        assert(x_optimized[i] == x_standard[i]);\n    printf(\"All assertions PASSED for THDoubleVector_fill_VSX() test.\\n\\n\");\n\n\n    free(x_standard);\n    free(x_optimized);\n}\n\n\nvoid test_THFloatVector_fill_VSX()\n{\n    clock_t start, end;\n    double elapsedSeconds_optimized, elapsedSeconds_standard;\n\n    float *x_standard  = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));\n    float *x_optimized = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));\n\n    float yVal0 = 17.2;\n    float yVal1 = 8.2;\n    float yVal2 = 5.1;\n    float yVal3 = -0.9;\n\n    //-------------------------------------------------\n    // Performance Test\n    //-------------------------------------------------\n    start = clock();\n    standardFloat_fill(x_standard, yVal0, VSX_PERF_NUM_TEST_ELEMENTS  );\n    standardFloat_fill(x_standard, yVal1, VSX_PERF_NUM_TEST_ELEMENTS-1);\n    standardFloat_fill(x_standard, yVal2, VSX_PERF_NUM_TEST_ELEMENTS-2);\n    standardFloat_fill(x_standard, yVal3, VSX_PERF_NUM_TEST_ELEMENTS-3);\n    end = clock();\n\n    elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC;\n    printf(\"standardFloat_fill() test took %.5lf seconds\\n\", elapsedSeconds_standard);\n\n    start = clock();\n    THFloatVector_fill_VSX(x_optimized, yVal0, VSX_PERF_NUM_TEST_ELEMENTS  );\n    THFloatVector_fill_VSX(x_optimized, yVal1, VSX_PERF_NUM_TEST_ELEMENTS-1);\n    THFloatVector_fill_VSX(x_optimized, yVal2, VSX_PERF_NUM_TEST_ELEMENTS-2);\n    THFloatVector_fill_VSX(x_optimized, yVal3, VSX_PERF_NUM_TEST_ELEMENTS-3);\n    end = clock();\n\n    elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC;\n    printf(\"THFloatVector_fill_VSX() test took %.5lf seconds\\n\", elapsedSeconds_optimized);\n\n\n    //-------------------------------------------------\n    // Correctness Test\n    //-------------------------------------------------\n    yVal0 += 1.0;\n    yVal1 += 1.0;\n    yVal2 += 1.0;\n    yVal3 -= 1.0;\n\n    standardFloat_fill(    x_standard,  yVal0, VSX_FUNC_NUM_TEST_ELEMENTS);\n    THFloatVector_fill_VSX(x_optimized, yVal0, VSX_FUNC_NUM_TEST_ELEMENTS);\n    for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)\n        assert(x_optimized[i] == yVal0);\n\n    standardFloat_fill(    x_standard+1,  yVal1, VSX_FUNC_NUM_TEST_ELEMENTS-2);\n    THFloatVector_fill_VSX(x_optimized+1, yVal1, VSX_FUNC_NUM_TEST_ELEMENTS-2);\n    standardFloat_fill(    x_standard+2,  yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-4);\n    THFloatVector_fill_VSX(x_optimized+2, yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-4);\n    standardFloat_fill(    x_standard+3,  yVal3, VSX_FUNC_NUM_TEST_ELEMENTS-6);\n    THFloatVector_fill_VSX(x_optimized+3, yVal3, VSX_FUNC_NUM_TEST_ELEMENTS-6);\n    standardFloat_fill(    x_standard+517,  yVal0, VSX_FUNC_NUM_TEST_ELEMENTS-1029);\n    THFloatVector_fill_VSX(x_optimized+517, yVal0, VSX_FUNC_NUM_TEST_ELEMENTS-1029);\n    int r = rand() % 258;\n    standardFloat_fill(    x_standard+517+r,  yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));\n    THFloatVector_fill_VSX(x_optimized+517+r, yVal2, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));\n    for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)\n        assert(x_optimized[i] == x_standard[i]);\n    printf(\"All assertions PASSED for THFloatVector_fill_VSX() test.\\n\\n\");\n\n\n    free(x_standard);\n    free(x_optimized);\n}\n\n\nvoid test_THDoubleVector_cadd_VSX()\n{\n    clock_t start, end;\n    double elapsedSeconds_optimized, elapsedSeconds_standard;\n\n    double *z_standard  = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));\n    double *z_optimized = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));\n    double *x           = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));\n    double *y           = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));\n    double c            = randDouble();\n\n    // Initialize randomly\n    for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++)\n    {\n        x[i] = randDouble();\n        y[i] = randDouble();\n    }\n\n\n    //-------------------------------------------------\n    // Performance Test\n    //-------------------------------------------------\n    start = clock();\n    standardDouble_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS  );\n    standardDouble_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-1);\n    standardDouble_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-2);\n    standardDouble_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-3);\n    end = clock();\n\n    elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC;\n    printf(\"standardDouble_cadd() test took %.5lf seconds\\n\", elapsedSeconds_standard);\n\n    start = clock();\n    THDoubleVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS  );\n    THDoubleVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-1);\n    THDoubleVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-2);\n    THDoubleVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-3);\n    end = clock();\n\n    elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC;\n    printf(\"THDoubleVector_cadd_VSX() test took %.5lf seconds\\n\", elapsedSeconds_optimized);\n\n\n    //-------------------------------------------------\n    // Correctness Test\n    //-------------------------------------------------\n    standardDouble_cadd(    z_standard+1,  x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);\n    THDoubleVector_cadd_VSX(z_optimized+1, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);\n    standardDouble_cadd(    z_standard+2,  x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);\n    THDoubleVector_cadd_VSX(z_optimized+2, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);\n    standardDouble_cadd(    z_standard+3,  x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);\n    THDoubleVector_cadd_VSX(z_optimized+3, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);\n    standardDouble_cadd(    z_standard+517,  x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);\n    THDoubleVector_cadd_VSX(z_optimized+517, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);\n    int r = rand() % 258;\n    standardDouble_cadd(    z_standard+517+r,  x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));\n    THDoubleVector_cadd_VSX(z_optimized+517+r, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));\n    for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)\n    {\n        if(!near(z_optimized[i], z_standard[i]))\n            printf(\"%d %f %f\\n\", i, z_optimized[i], z_standard[i]);\n        assert(near(z_optimized[i], z_standard[i]));\n    }\n    printf(\"All assertions PASSED for THDoubleVector_cadd_VSX() test.\\n\\n\");\n\n\n    free(z_standard);\n    free(z_optimized);\n    free(x);\n}\n\nvoid test_THFloatVector_cadd_VSX()\n{\n    clock_t start, end;\n    double elapsedSeconds_optimized, elapsedSeconds_standard;\n\n    float *z_standard  = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));\n    float *z_optimized = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));\n    float *x           = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));\n    float *y           = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));\n    float c            = (float)randDouble();\n\n    // Initialize randomly\n    for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++)\n    {\n        x[i] = (float)randDouble();\n        y[i] = (float)randDouble();\n    }\n\n\n    //-------------------------------------------------\n    // Performance Test\n    //-------------------------------------------------\n    start = clock();\n    standardFloat_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS  );\n    standardFloat_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-1);\n    standardFloat_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-2);\n    standardFloat_cadd(z_standard, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-3);\n    end = clock();\n\n    elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC;\n    printf(\"standardFloat_cadd() test took %.5lf seconds\\n\", elapsedSeconds_standard);\n\n    start = clock();\n    THFloatVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS  );\n    THFloatVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-1);\n    THFloatVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-2);\n    THFloatVector_cadd_VSX(z_optimized, x, y, c, VSX_PERF_NUM_TEST_ELEMENTS-3);\n    end = clock();\n\n    elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC;\n    printf(\"THFloatVector_cadd_VSX() test took %.5lf seconds\\n\", elapsedSeconds_optimized);\n\n\n    //-------------------------------------------------\n    // Correctness Test\n    //-------------------------------------------------\n    standardFloat_cadd(    z_standard+1,  x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);\n    THFloatVector_cadd_VSX(z_optimized+1, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);\n    standardFloat_cadd(    z_standard+2,  x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);\n    THFloatVector_cadd_VSX(z_optimized+2, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);\n    standardFloat_cadd(    z_standard+3,  x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);\n    THFloatVector_cadd_VSX(z_optimized+3, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);\n    standardFloat_cadd(    z_standard+517,  x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);\n    THFloatVector_cadd_VSX(z_optimized+517, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);\n    int r = rand() % 258;\n    standardFloat_cadd(    z_standard+517+r,  x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));\n    THFloatVector_cadd_VSX(z_optimized+517+r, x, y, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));\n    for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)\n    {\n        if(!near(z_optimized[i], z_standard[i]))\n            printf(\"%d %f %f\\n\", i, z_optimized[i], z_standard[i]);\n        assert(near(z_optimized[i], z_standard[i]));\n    }\n    printf(\"All assertions PASSED for THFloatVector_cadd_VSX() test.\\n\\n\");\n\n\n    free(z_standard);\n    free(z_optimized);\n    free(x);\n}\n\nvoid test_THDoubleVector_adds_VSX()\n{\n    clock_t start, end;\n    double elapsedSeconds_optimized, elapsedSeconds_standard;\n\n    double *y_standard  = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));\n    double *y_optimized = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));\n    double *x           = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));\n    double c            = randDouble();\n\n    // Initialize randomly\n    for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++)\n        x[i] = randDouble();\n\n    //-------------------------------------------------\n    // Performance Test\n    //-------------------------------------------------\n    start = clock();\n    standardDouble_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS  );\n    standardDouble_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1);\n    standardDouble_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2);\n    standardDouble_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3);\n    end = clock();\n\n    elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC;\n    printf(\"standardDouble_adds() test took %.5lf seconds\\n\", elapsedSeconds_standard);\n\n    start = clock();\n    THDoubleVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS  );\n    THDoubleVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1);\n    THDoubleVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2);\n    THDoubleVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3);\n    end = clock();\n\n    elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC;\n    printf(\"THDoubleVector_adds_VSX() test took %.5lf seconds\\n\", elapsedSeconds_optimized);\n\n\n    //-------------------------------------------------\n    // Correctness Test\n    //-------------------------------------------------\n    standardDouble_adds(    y_standard+1,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);\n    THDoubleVector_adds_VSX(y_optimized+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);\n    standardDouble_adds(    y_standard+2,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);\n    THDoubleVector_adds_VSX(y_optimized+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);\n    standardDouble_adds(    y_standard+3,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);\n    THDoubleVector_adds_VSX(y_optimized+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);\n    standardDouble_adds(    y_standard+517,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);\n    THDoubleVector_adds_VSX(y_optimized+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);\n    int r = rand() % 258;\n    standardDouble_adds(    y_standard+517+r,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));\n    THDoubleVector_adds_VSX(y_optimized+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));\n    for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)\n    {\n        if(!near(y_optimized[i], y_standard[i]))\n            printf(\"%d %f %f\\n\", i, y_optimized[i], y_standard[i]);\n        assert(near(y_optimized[i], y_standard[i]));\n    }\n    printf(\"All assertions PASSED for THDoubleVector_adds_VSX() test.\\n\\n\");\n\n\n    free(y_standard);\n    free(y_optimized);\n    free(x);\n}\n\n\nvoid test_THFloatVector_adds_VSX()\n{\n    clock_t start, end;\n    double elapsedSeconds_optimized, elapsedSeconds_standard;\n\n    float *y_standard  = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));\n    float *y_optimized = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));\n    float *x           = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));\n    float c            = (float)randDouble();\n\n    // Initialize randomly\n    for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++)\n        x[i] = (float)randDouble();\n\n\n    //-------------------------------------------------\n    // Performance Test\n    //-------------------------------------------------\n    start = clock();\n    standardFloat_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS  );\n    standardFloat_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1);\n    standardFloat_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2);\n    standardFloat_adds(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3);\n    end = clock();\n\n    elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC;\n    printf(\"standardFloat_adds() test took %.5lf seconds\\n\", elapsedSeconds_standard);\n\n    start = clock();\n    THFloatVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS  );\n    THFloatVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1);\n    THFloatVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2);\n    THFloatVector_adds_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3);\n    end = clock();\n\n    elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC;\n    printf(\"THFloatVector_adds_VSX() test took %.5lf seconds\\n\", elapsedSeconds_optimized);\n\n\n    //-------------------------------------------------\n    // Correctness Test\n    //-------------------------------------------------\n    standardFloat_adds(    y_standard+1,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);\n    THFloatVector_adds_VSX(y_optimized+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);\n    standardFloat_adds(    y_standard+2,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);\n    THFloatVector_adds_VSX(y_optimized+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);\n    standardFloat_adds(    y_standard+3,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);\n    THFloatVector_adds_VSX(y_optimized+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);\n    standardFloat_adds(    y_standard+517,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);\n    THFloatVector_adds_VSX(y_optimized+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);\n    int r = rand() % 258;\n    standardFloat_adds(    y_standard+517+r,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));\n    THFloatVector_adds_VSX(y_optimized+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));\n    for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)\n    {\n        if(!near(y_optimized[i], y_standard[i]))\n            printf(\"%d %f %f\\n\", i, y_optimized[i], y_standard[i]);\n        assert(near(y_optimized[i], y_standard[i]));\n    }\n    printf(\"All assertions PASSED for THFloatVector_adds_VSX() test.\\n\\n\");\n\n\n    free(y_standard);\n    free(y_optimized);\n    free(x);\n}\n\n\nvoid test_THDoubleVector_cmul_VSX()\n{\n    clock_t start, end;\n    double elapsedSeconds_optimized, elapsedSeconds_standard;\n\n    double *z_standard  = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));\n    double *z_optimized = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));\n    double *x           = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));\n    double *y           = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));\n\n    // Initialize randomly\n    for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++)\n    {\n        x[i] = randDouble();\n        y[i] = randDouble();\n    }\n\n\n    //-------------------------------------------------\n    // Performance Test\n    //-------------------------------------------------\n    start = clock();\n    standardDouble_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS  );\n    standardDouble_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1);\n    standardDouble_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2);\n    standardDouble_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3);\n    end = clock();\n\n    elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC;\n    printf(\"standardDouble_cmul() test took %.5lf seconds\\n\", elapsedSeconds_standard);\n\n    start = clock();\n    THDoubleVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS  );\n    THDoubleVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1);\n    THDoubleVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2);\n    THDoubleVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3);\n    end = clock();\n\n    elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC;\n    printf(\"THDoubleVector_cmul_VSX() test took %.5lf seconds\\n\", elapsedSeconds_optimized);\n\n\n    //-------------------------------------------------\n    // Correctness Test\n    //-------------------------------------------------\n    standardDouble_cmul(    z_standard+1,  x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2);\n    THDoubleVector_cmul_VSX(z_optimized+1, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2);\n    standardDouble_cmul(    z_standard+2,  x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4);\n    THDoubleVector_cmul_VSX(z_optimized+2, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4);\n    standardDouble_cmul(    z_standard+3,  x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6);\n    THDoubleVector_cmul_VSX(z_optimized+3, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6);\n    standardDouble_cmul(    z_standard+517,  x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029);\n    THDoubleVector_cmul_VSX(z_optimized+517, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029);\n    int r = rand() % 258;\n    standardDouble_cmul(    z_standard+517+r,  x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));\n    THDoubleVector_cmul_VSX(z_optimized+517+r, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));\n    for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)\n    {\n        if(!near(z_optimized[i], z_standard[i]))\n            printf(\"%d %f %f\\n\", i, z_optimized[i], z_standard[i]);\n        assert(near(z_optimized[i], z_standard[i]));\n    }\n    printf(\"All assertions PASSED for THDoubleVector_cmul_VSX() test.\\n\\n\");\n\n\n    free(z_standard);\n    free(z_optimized);\n    free(x);\n}\n\nvoid test_THFloatVector_cmul_VSX()\n{\n    clock_t start, end;\n    double elapsedSeconds_optimized, elapsedSeconds_standard;\n\n    float *z_standard  = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));\n    float *z_optimized = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));\n    float *x           = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));\n    float *y           = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));\n\n    // Initialize randomly\n    for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++)\n    {\n        x[i] = (float)randDouble();\n        y[i] = (float)randDouble();\n    }\n\n\n    //-------------------------------------------------\n    // Performance Test\n    //-------------------------------------------------\n    start = clock();\n    standardFloat_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS  );\n    standardFloat_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1);\n    standardFloat_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2);\n    standardFloat_cmul(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3);\n    end = clock();\n\n    elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC;\n    printf(\"standardFloat_cmul() test took %.5lf seconds\\n\", elapsedSeconds_standard);\n\n    start = clock();\n    THFloatVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS  );\n    THFloatVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1);\n    THFloatVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2);\n    THFloatVector_cmul_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3);\n    end = clock();\n\n    elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC;\n    printf(\"THFloatVector_cmul_VSX() test took %.5lf seconds\\n\", elapsedSeconds_optimized);\n\n\n    //-------------------------------------------------\n    // Correctness Test\n    //-------------------------------------------------\n    standardFloat_cmul(    z_standard+1,  x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2);\n    THFloatVector_cmul_VSX(z_optimized+1, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2);\n    standardFloat_cmul(    z_standard+2,  x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4);\n    THFloatVector_cmul_VSX(z_optimized+2, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4);\n    standardFloat_cmul(    z_standard+3,  x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6);\n    THFloatVector_cmul_VSX(z_optimized+3, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6);\n    standardFloat_cmul(    z_standard+517,  x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029);\n    THFloatVector_cmul_VSX(z_optimized+517, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029);\n    int r = rand() % 258;\n    standardFloat_cmul(    z_standard+517+r,  x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));\n    THFloatVector_cmul_VSX(z_optimized+517+r, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));\n    for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)\n    {\n        if(!near(z_optimized[i], z_standard[i]))\n            printf(\"%d %f %f\\n\", i, z_optimized[i], z_standard[i]);\n        assert(near(z_optimized[i], z_standard[i]));\n    }\n    printf(\"All assertions PASSED for THFloatVector_cmul_VSX() test.\\n\\n\");\n\n\n    free(z_standard);\n    free(z_optimized);\n    free(x);\n}\n\nvoid test_THDoubleVector_muls_VSX()\n{\n    clock_t start, end;\n    double elapsedSeconds_optimized, elapsedSeconds_standard;\n\n    double *y_standard  = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));\n    double *y_optimized = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));\n    double *x           = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));\n    double c            = randDouble();\n\n    // Initialize randomly\n    for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++)\n    {\n        x[i] = randDouble();\n    }\n\n\n    //-------------------------------------------------\n    // Performance Test\n    //-------------------------------------------------\n    start = clock();\n    standardDouble_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS  );\n    standardDouble_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1);\n    standardDouble_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2);\n    standardDouble_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3);\n    end = clock();\n\n    elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC;\n    printf(\"standardDouble_muls() test took %.5lf seconds\\n\", elapsedSeconds_standard);\n\n    start = clock();\n    THDoubleVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS  );\n    THDoubleVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1);\n    THDoubleVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2);\n    THDoubleVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3);\n    end = clock();\n\n    elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC;\n    printf(\"THDoubleVector_muls_VSX() test took %.5lf seconds\\n\", elapsedSeconds_optimized);\n\n\n    //-------------------------------------------------\n    // Correctness Test\n    //-------------------------------------------------\n    standardDouble_muls(    y_standard+1,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);\n    THDoubleVector_muls_VSX(y_optimized+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);\n    standardDouble_muls(    y_standard+2,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);\n    THDoubleVector_muls_VSX(y_optimized+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);\n    standardDouble_muls(    y_standard+3,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);\n    THDoubleVector_muls_VSX(y_optimized+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);\n    standardDouble_muls(    y_standard+517,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);\n    THDoubleVector_muls_VSX(y_optimized+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);\n    int r = rand() % 258;\n    standardDouble_muls(    y_standard+517+r,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));\n    THDoubleVector_muls_VSX(y_optimized+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));\n\n    for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)\n    {\n        if(!near(y_optimized[i], y_standard[i]))\n            printf(\"%d %f %f\\n\", i, y_optimized[i], y_standard[i]);\n        assert(near(y_optimized[i], y_standard[i]));\n    }\n    printf(\"All assertions PASSED for THDoubleVector_muls_VSX() test.\\n\\n\");\n\n\n    free(y_standard);\n    free(y_optimized);\n    free(x);\n}\n\nvoid test_THFloatVector_muls_VSX()\n{\n    clock_t start, end;\n    double elapsedSeconds_optimized, elapsedSeconds_standard;\n\n    float *y_standard  = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));\n    float *y_optimized = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));\n    float *x           = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));\n    float c           = (float)randDouble();\n\n    // Initialize randomly\n    for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++)\n    {\n        x[i] = (float)randDouble();\n    }\n\n\n    //-------------------------------------------------\n    // Performance Test\n    //-------------------------------------------------\n    start = clock();\n    standardFloat_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS  );\n    standardFloat_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1);\n    standardFloat_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2);\n    standardFloat_muls(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3);\n    end = clock();\n\n    elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC;\n    printf(\"standardFloat_muls() test took %.5lf seconds\\n\", elapsedSeconds_standard);\n\n    start = clock();\n    THFloatVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS  );\n    THFloatVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1);\n    THFloatVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2);\n    THFloatVector_muls_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3);\n    end = clock();\n\n    elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC;\n    printf(\"THFloatVector_muls_VSX() test took %.5lf seconds\\n\", elapsedSeconds_optimized);\n\n\n    //-------------------------------------------------\n    // Correctness Test\n    //-------------------------------------------------\n    standardFloat_muls(    y_standard+1,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);\n    THFloatVector_muls_VSX(y_optimized+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);\n    standardFloat_muls(    y_standard+2,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);\n    THFloatVector_muls_VSX(y_optimized+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);\n    standardFloat_muls(    y_standard+3,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);\n    THFloatVector_muls_VSX(y_optimized+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);\n    standardFloat_muls(    y_standard+517,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);\n    THFloatVector_muls_VSX(y_optimized+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);\n    int r = rand() % 258;\n    standardFloat_muls(    y_standard+517+r,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));\n    THFloatVector_muls_VSX(y_optimized+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));\n    for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)\n    {\n        if(!near(y_optimized[i], y_standard[i]))\n            printf(\"%d %f %f\\n\", i, y_optimized[i], y_standard[i]);\n        assert(near(y_optimized[i], y_standard[i]));\n    }\n    printf(\"All assertions PASSED for THFloatVector_muls_VSX() test.\\n\\n\");\n\n\n    free(y_standard);\n    free(y_optimized);\n    free(x);\n}\n\n\n\nvoid test_THDoubleVector_cdiv_VSX()\n{\n    clock_t start, end;\n    double elapsedSeconds_optimized, elapsedSeconds_standard;\n\n    double *z_standard  = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));\n    double *z_optimized = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));\n    double *x           = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));\n    double *y           = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));\n\n    // Initialize randomly\n    for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++)\n    {\n        x[i] = randDouble();\n        y[i] = randDouble();\n    }\n\n\n    //-------------------------------------------------\n    // Performance Test\n    //-------------------------------------------------\n    start = clock();\n    standardDouble_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS  );\n    standardDouble_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1);\n    standardDouble_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2);\n    standardDouble_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3);\n    end = clock();\n\n    elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC;\n    printf(\"standardDouble_cdiv() test took %.5lf seconds\\n\", elapsedSeconds_standard);\n\n    start = clock();\n    THDoubleVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS  );\n    THDoubleVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1);\n    THDoubleVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2);\n    THDoubleVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3);\n    end = clock();\n\n    elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC;\n    printf(\"THDoubleVector_cdiv_VSX() test took %.5lf seconds\\n\", elapsedSeconds_optimized);\n\n\n    //-------------------------------------------------\n    // Correctness Test\n    //-------------------------------------------------\n    standardDouble_cdiv(    z_standard+1,  x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2);\n    THDoubleVector_cdiv_VSX(z_optimized+1, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2);\n    standardDouble_cdiv(    z_standard+2,  x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4);\n    THDoubleVector_cdiv_VSX(z_optimized+2, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4);\n    standardDouble_cdiv(    z_standard+3,  x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6);\n    THDoubleVector_cdiv_VSX(z_optimized+3, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6);\n    standardDouble_cdiv(    z_standard+517,  x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029);\n    THDoubleVector_cdiv_VSX(z_optimized+517, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029);\n    int r = rand() % 258;\n    standardDouble_cdiv(    z_standard+517+r,  x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));\n    THDoubleVector_cdiv_VSX(z_optimized+517+r, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));\n    for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)\n    {\n        if(!near(z_optimized[i], z_standard[i]))\n            printf(\"%d %f %f\\n\", i, z_optimized[i], z_standard[i]);\n        assert(near(z_optimized[i], z_standard[i]));\n    }\n    printf(\"All assertions PASSED for THDoubleVector_cdiv_VSX() test.\\n\\n\");\n\n\n    free(z_standard);\n    free(z_optimized);\n    free(x);\n}\n\nvoid test_THFloatVector_cdiv_VSX()\n{\n    clock_t start, end;\n    double elapsedSeconds_optimized, elapsedSeconds_standard;\n\n    float *z_standard  = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));\n    float *z_optimized = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));\n    float *x           = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));\n    float *y           = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));\n\n    // Initialize randomly\n    for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++)\n    {\n        x[i] = (float)randDouble();\n        y[i] = (float)randDouble();\n    }\n\n\n    //-------------------------------------------------\n    // Performance Test\n    //-------------------------------------------------\n    start = clock();\n    standardFloat_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS  );\n    standardFloat_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1);\n    standardFloat_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2);\n    standardFloat_cdiv(z_standard, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3);\n    end = clock();\n\n    elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC;\n    printf(\"standardFloat_cdiv() test took %.5lf seconds\\n\", elapsedSeconds_standard);\n\n    start = clock();\n    THFloatVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS  );\n    THFloatVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-1);\n    THFloatVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-2);\n    THFloatVector_cdiv_VSX(z_optimized, x, y, VSX_PERF_NUM_TEST_ELEMENTS-3);\n    end = clock();\n\n    elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC;\n    printf(\"THFloatVector_cdiv_VSX() test took %.5lf seconds\\n\", elapsedSeconds_optimized);\n\n\n    //-------------------------------------------------\n    // Correctness Test\n    //-------------------------------------------------\n    standardFloat_cdiv(    z_standard+1,  x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2);\n    THFloatVector_cdiv_VSX(z_optimized+1, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-2);\n    standardFloat_cdiv(    z_standard+2,  x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4);\n    THFloatVector_cdiv_VSX(z_optimized+2, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-4);\n    standardFloat_cdiv(    z_standard+3,  x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6);\n    THFloatVector_cdiv_VSX(z_optimized+3, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-6);\n    standardFloat_cdiv(    z_standard+517,  x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029);\n    THFloatVector_cdiv_VSX(z_optimized+517, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-1029);\n    int r = rand() % 258;\n    standardFloat_cdiv(    z_standard+517+r,  x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));\n    THFloatVector_cdiv_VSX(z_optimized+517+r, x, y, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));\n    for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)\n    {\n        if(!near(z_optimized[i], z_standard[i]))\n            printf(\"%d %f %f\\n\", i, z_optimized[i], z_standard[i]);\n        assert(near(z_optimized[i], z_standard[i]));\n    }\n    printf(\"All assertions PASSED for THFloatVector_cdiv_VSX() test.\\n\\n\");\n\n\n    free(z_standard);\n    free(z_optimized);\n    free(x);\n}\n\nvoid test_THDoubleVector_divs_VSX()\n{\n    clock_t start, end;\n    double elapsedSeconds_optimized, elapsedSeconds_standard;\n\n    double *y_standard  = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));\n    double *y_optimized = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));\n    double *x           = (double *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(double));\n    double c            = randDouble();\n\n    // Initialize randomly\n    for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++)\n    {\n        x[i] = randDouble();\n    }\n\n\n    //-------------------------------------------------\n    // Performance Test\n    //-------------------------------------------------\n    start = clock();\n    standardDouble_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS  );\n    standardDouble_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1);\n    standardDouble_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2);\n    standardDouble_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3);\n    end = clock();\n\n    elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC;\n    printf(\"standardDouble_divs() test took %.5lf seconds\\n\", elapsedSeconds_standard);\n\n    start = clock();\n    THDoubleVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS  );\n    THDoubleVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1);\n    THDoubleVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2);\n    THDoubleVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3);\n    end = clock();\n\n    elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC;\n    printf(\"THDoubleVector_divs_VSX() test took %.5lf seconds\\n\", elapsedSeconds_optimized);\n\n\n    //-------------------------------------------------\n    // Correctness Test\n    //-------------------------------------------------\n    standardDouble_divs(    y_standard+1,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);\n    THDoubleVector_divs_VSX(y_optimized+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);\n    standardDouble_divs(    y_standard+2,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);\n    THDoubleVector_divs_VSX(y_optimized+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);\n    standardDouble_divs(    y_standard+3,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);\n    THDoubleVector_divs_VSX(y_optimized+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);\n    standardDouble_divs(    y_standard+517,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);\n    THDoubleVector_divs_VSX(y_optimized+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);\n    int r = rand() % 258;\n    standardDouble_divs(    y_standard+517+r,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));\n    THDoubleVector_divs_VSX(y_optimized+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));\n\n    for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)\n    {\n        if(!near(y_optimized[i], y_standard[i]))\n            printf(\"%d %f %f\\n\", i, y_optimized[i], y_standard[i]);\n        assert(near(y_optimized[i], y_standard[i]));\n    }\n    printf(\"All assertions PASSED for THDoubleVector_divs_VSX() test.\\n\\n\");\n\n\n    free(y_standard);\n    free(y_optimized);\n    free(x);\n}\n\nvoid test_THFloatVector_divs_VSX()\n{\n    clock_t start, end;\n    double elapsedSeconds_optimized, elapsedSeconds_standard;\n\n    float *y_standard  = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));\n    float *y_optimized = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));\n    float *x           = (float *)malloc(VSX_PERF_NUM_TEST_ELEMENTS*sizeof(float));\n    float c            = (float)randDouble();\n\n    // Initialize randomly\n    for(int i = 0; i < VSX_PERF_NUM_TEST_ELEMENTS; i++)\n    {\n        x[i] = (float)randDouble();\n    }\n\n\n    //-------------------------------------------------\n    // Performance Test\n    //-------------------------------------------------\n    start = clock();\n    standardFloat_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS  );\n    standardFloat_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1);\n    standardFloat_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2);\n    standardFloat_divs(y_standard, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3);\n    end = clock();\n\n    elapsedSeconds_standard = (double)(end - start) / CLOCKS_PER_SEC;\n    printf(\"standardFloat_divs() test took %.5lf seconds\\n\", elapsedSeconds_standard);\n\n    start = clock();\n    THFloatVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS  );\n    THFloatVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-1);\n    THFloatVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-2);\n    THFloatVector_divs_VSX(y_optimized, x, c, VSX_PERF_NUM_TEST_ELEMENTS-3);\n    end = clock();\n\n    elapsedSeconds_optimized = (double)(end - start) / CLOCKS_PER_SEC;\n    printf(\"THFloatVector_divs_VSX() test took %.5lf seconds\\n\", elapsedSeconds_optimized);\n\n\n    //-------------------------------------------------\n    // Correctness Test\n    //-------------------------------------------------\n    standardFloat_divs(    y_standard+1,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);\n    THFloatVector_divs_VSX(y_optimized+1, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-2);\n    standardFloat_divs(    y_standard+2,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);\n    THFloatVector_divs_VSX(y_optimized+2, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-4);\n    standardFloat_divs(    y_standard+3,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);\n    THFloatVector_divs_VSX(y_optimized+3, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-6);\n    standardFloat_divs(    y_standard+517,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);\n    THFloatVector_divs_VSX(y_optimized+517, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-1029);\n    int r = rand() % 258;\n    standardFloat_divs(    y_standard+517+r,  x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));\n    THFloatVector_divs_VSX(y_optimized+517+r, x, c, VSX_FUNC_NUM_TEST_ELEMENTS-(1029+r+100));\n\n    for(int i = 0; i < VSX_FUNC_NUM_TEST_ELEMENTS; i++)\n    {\n        if(!near(y_optimized[i], y_standard[i]))\n            printf(\"%d %f %f\\n\", i, y_optimized[i], y_standard[i]);\n        assert(near(y_optimized[i], y_standard[i]));\n    }\n    printf(\"All assertions PASSED for THFloatVector_divs_VSX() test.\\n\\n\");\n\n\n    free(y_standard);\n    free(y_optimized);\n    free(x);\n}\n\n\n//--------------------------------------------------------------------------------------------------\n// Run tests:\n//--------------------------------------------------------------------------------------------------\nint main()\n{\n    printf(\"\\n\");\n\n\n    // First test utility functions\n\n    assert(!near(0.1, -0.1));\n    assert(!near(0.1f, -0.1f));\n    assert(!near(9, 10));\n    assert(near(0.1, 0.1000001));\n    assert(near(0.1f, 0.1000001f));\n    assert(near(100.764, 100.764));\n    assert(!near(NAN, 0.0));\n    assert(!near(-9.5, NAN));\n    assert(!near(NAN, 100));\n    assert(!near(-0.0, NAN));\n    assert(near(NAN, NAN));\n    assert(near(INFINITY, INFINITY));\n    assert(near(-INFINITY, -INFINITY));\n    assert(!near(INFINITY, NAN));\n    assert(!near(0, INFINITY));\n    assert(!near(-999.4324, INFINITY));\n    assert(!near(INFINITY, 982374.1));\n    assert(!near(-INFINITY, INFINITY));\n\n\n\n    // Then test each vectorized function\n\n    test_THDoubleVector_fill_VSX();\n    test_THFloatVector_fill_VSX();\n\n    test_THDoubleVector_cadd_VSX();\n    test_THFloatVector_cadd_VSX();\n\n    test_THDoubleVector_adds_VSX();\n    test_THFloatVector_adds_VSX();\n\n    test_THDoubleVector_cmul_VSX();\n    test_THFloatVector_cmul_VSX();\n\n    test_THDoubleVector_muls_VSX();\n    test_THFloatVector_muls_VSX();\n\n    test_THDoubleVector_cdiv_VSX();\n    test_THFloatVector_cdiv_VSX();\n\n    test_THDoubleVector_divs_VSX();\n    test_THFloatVector_divs_VSX();\n\n\n\n    printf(\"Finished running all tests. All tests PASSED.\\n\");\n    return 0;\n}\n\n\n#endif  // defined RUN_VSX_TESTS\n\n#endif  // defined __PPC64__\n\n"
  },
  {
    "path": "lib/luaT/CMakeLists.txt",
    "content": "# avoid some cmake warnings\nIF(POLICY CMP0026)\n CMAKE_POLICY(SET CMP0026 OLD)\nENDIF()\n\nINCLUDE_DIRECTORIES(${LUA_INCDIR})\nIF(LUALIB)\n  LINK_DIRECTORIES(${LUA_LIBDIR}) # note: must be done before defining target\nENDIF()\n\nADD_LIBRARY(luaT SHARED luaT.h luaT.c)\n\nIF (BUILD_STATIC OR \"$ENV{STATIC_TH}\" STREQUAL \"YES\")\n  ADD_LIBRARY(luaT_static STATIC luaT.h luaT.c)\n  SET_TARGET_PROPERTIES(luaT_static PROPERTIES\n    COMPILE_FLAGS \"-fPIC\")\n  SET_TARGET_PROPERTIES(luaT_static PROPERTIES\n    PREFIX \"lib\" IMPORT_PREFIX \"lib\" OUTPUT_NAME \"luaT\")\nENDIF()\n\nSET_TARGET_PROPERTIES(luaT PROPERTIES\n  VERSION   0\n  SOVERSION 0)\n\nIF(APPLE)\n  SET_TARGET_PROPERTIES(luaT PROPERTIES\n    LINK_FLAGS \"-undefined dynamic_lookup\")\nENDIF()\n\nIF(LUALIB)\n  TARGET_LINK_LIBRARIES(luaT ${LUALIB}) # must be done after ;)\nENDIF()\n\nINSTALL(TARGETS luaT\n  EXPORT torch-exports\n  RUNTIME DESTINATION \"${Torch_INSTALL_BIN_SUBDIR}\"\n  LIBRARY DESTINATION \"${Torch_INSTALL_LIB_SUBDIR}\"\n  ARCHIVE DESTINATION \"${Torch_INSTALL_LIB_SUBDIR}\")\n\nINSTALL(FILES luaT.h\n          DESTINATION \"${Torch_INSTALL_INCLUDE_SUBDIR}\")\n\n# Create luaT.cmake\nGET_TARGET_PROPERTY(LUAT_OUTPUT_NAME luaT LOCATION)\nGET_FILENAME_COMPONENT(LUAT_OUTPUT_NAME ${LUAT_OUTPUT_NAME} NAME)\nSET(LUAT_LIBRARIES \"${Torch_INSTALL_LIB}/${LUAT_OUTPUT_NAME}\")\nSET(LUAT_INCLUDE_DIR \"${Torch_INSTALL_INCLUDE}\")\nCONFIGURE_FILE(luaTConfig.cmake.in \"${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/luaTConfig.cmake\")\nINSTALL(FILES \"${CMAKE_CURRENT_BINARY_DIR}/cmake-exports/luaTConfig.cmake\"\n  DESTINATION \"${Torch_INSTALL_CMAKE_SUBDIR}\")\n"
  },
  {
    "path": "lib/luaT/README.md",
    "content": "<a name=\"luat.dok\"></a>\n# Lua Torch C API #\n\nluaT provides an API to interface Lua and C in Torch packages. It defines a\nconcept of _classes_ to Lua for Torch, and provides a mechanism to easily\nhandle these Lua classes from C.\n\nIt additionally provides few functions that `luaL` should have defined, and\ndefines several functions similar to `luaL` ones for better type error printing when using\n`luaT` classes.\n\n<a name=\"luat.memory.dok\"></a>\n## Memory functions ##\n\nClassical memory allocation functions which generate a Lua error in case of\nproblem.\n\n<a name=\"luaT_alloc\"></a>\n### void* luaT_alloc(lua_State *L, long size) ###\n\nAllocates `size` bytes, and return a pointer on the allocated\nmemory. A Lua error will be generated if running out of memory.\n\n<a name=\"luaT_realloc\"></a>\n### void* luaT_realloc(lua_State *L, void *ptr, long size) ###\n\nRealloc `ptr` to `size` bytes. `ptr` must have been previously\nallocated with [luaT_alloc](#luaT_alloc) or\n[luaT_realloc](#luaT_realloc), or the C `malloc` or `realloc`\nfunctions. A Lua error will be generated if running out of memory.\n\n<a name=\"luaT_free\"></a>\n### void luaT_free(lua_State *L, void *ptr) ###\n\nFree memory allocated at address `ptr`. The memory must have been\npreviously allocated with [luaT_alloc](#luaT_alloc) or\n[luaT_realloc](#luaT_realloc), or the C `malloc` or `realloc`\nfunctions.\n\n<a name=\"luat.classcreate\"></a>\n## Class creation and basic handling ##\n\nA `luaT` class is basically either a Lua _table_ or _userdata_ with\nan appropriate _metatable_. This appropriate metatable is created with\n[luaT_newmetatable](#luaT_newmetatable). Contrary to luaL userdata\nfunctions, luaT mechanism handles inheritance. If the class inherit from\nanother class, then the metatable will itself have a metatable\ncorresponding to the _parent metatable_: the metatables are cascaded\naccording to the class inheritance. Multiple inheritance is not supported.\n\n<a name=\"luat.operatoroverloading\"></a>\n### Operator overloading ###\n\nThe metatable of a `luaT` object contains `Lua` operators like\n`__index`, `__newindex`, `__tostring`, `__add`\n(etc...). These operators will respectively look for `__index__`,\n`__newindex__`, `__tostring__`, `__add__` (etc...) in the\nmetatable. If found, the corresponding function or value will be returned,\nelse a Lua error will be raised.\n\nIf one wants to provide `__index__` or `__newindex__` in the\nmetaclass, these operators must follow a particular scheme:\n\n  * `__index__` must either return a value _and_ `true` or return `false` only. In the first case, it means `__index__` was able to handle the given argument (for e.g., the type was correct). The second case means it was not able to do anything, so `__index` in the root metatable can then try to see if the metaclass contains the required value.\n\n  * `__newindex__` must either return `true` or `false`. As for `__index__`, `true` means it could handle the argument and `false` not. If not, the root metatable `__newindex` will then raise an error if the object was a userdata, or apply a rawset if the object was a Lua table.\n\nOther metaclass operators like `__tostring__`, `__add__`, etc... do not have any particular constraint.\n\n<a name=\"luat_newlocalmetatable\"></a>\n### const char* luaT_newlocalmetatable(lua_State *L, const char *tname, const char *parenttname, lua_CFunction constructor, lua_CFunction destructor, lua_CFunction factory, int moduleidx) ###\n\nThis function creates a new metatable, which is the Lua way to define a new\nobject class. As for `luaL_newmetatable`, the metatable is registered in\nthe Lua registry table, with the key `tname`. In addition, `tname` is\nalso registered in the Lua registry, with the metatable as key (the\ntypename of a given object can be thus easily retrieved).\n\nThe class name `tname` must be of the form `modulename.classname`. If not\nNULL, `parenttname` must be a valid typename corresponding to the parent\nclass of the new class.\n\nIf `constructor` is not NULL, a function `new` will be added to the\nmetatable, pointing to this given function.\n\nA \"constructor table\" will be created by `luaT_newlocalmetatable`: it will\ncontain all the class methods, and be callable, calling the `constructor`, if\na `constructor` has been passed. The constructor table is either stored into\n`modulename.classname` (that is in the global namespace) if `moduleidx <=\n0` or in the table at index `moduleidx` in the stack (if `moduleidx > 0`).\n\nIf not NULL, `destructor` will be called when garbage collecting the object.\n\nIf not NULL, `factory` must be a Lua C function creating an empty object\ninstance of the class. This functions are used in Torch for serialization.\n\nNote that classes can be partly defined in C and partly defined in Lua:\nonce the metatable is created in C, it can be filled up with additional\nmethods in Lua.\n\nThe return value is the value returned by [luaT_typenameid](#luat_typenameid).\n\n<a name=\"luat_newmetatable\"></a>\n### const char* luaT_newmetatable(lua_State *L, const char *tname, const char *parenttname, lua_CFunction constructor, lua_CFunction destructor, lua_CFunction factory) ###\n\nSame as [luaT_newlocalmetatable](#luat_newmetatable), but where the\nconstructor table is assigned in the global namespace (`moduleidx = 0`).\n\n<a name=\"luat_pushmetatable\"></a>\n### int luaT_pushmetatable(lua_State *L, const name *tname) ###\n\nPush the metatable with type name `tname` on the stack, if `tname` is a\nvalid Torch class name (previously registered with luaT_newmetatable).\n\nOn success, returns 1. If `tname` is invalid, nothing is pushed and it\nreturns 0.\n\n<a name=\"luat_typenameid\"></a>\n### const char* luaT_typenameid(lua_State *L, const char *tname) ###\n\nIf `tname` is a valid Torch class name, then returns a unique string (the\ncontents will be the same as `tname`) pointing to the string registered\nin the Lua registry. This string is thus valid as long as Lua is\nrunning. The returned string shall not be freed.\n\nIf `tname` is an invalid class name, returns NULL.\n\n<a name=\"luat_typename\"></a>\n### const char* luaT_typename(lua_State *L, int ud) ###\n\nReturns the typename of the object at index `ud` on the stack. If it is\nnot a valid Torch object, returns NULL.\n\n<a name=\"luat_pushudata\"></a>\n### void luaT_pushudata(lua_State *L, void *udata, const char *tname) ###\n\nGiven a C structure `udata`, push a userdata object on the stack with\nmetatable corresponding to `tname`. Obviously, `tname` must be a valid\nTorch name registered with [luaT_newmetatable](#luat_newmetatable).\n\n<a name=\"luat_toudata\"></a>\n### void *luaT_toudata(lua_State *L, int ud, const char *tname) ###\n\nReturns a pointer to the original C structure previously pushed on the\nstack with [luaT_pushudata](#luat_pushudata), if the object at index\n`ud` is a valid Torch class name. Returns NULL otherwise.\n\n<a name=\"luat_isudata\"></a>\n### int luaT_isudata(lua_State *L, int ud, const char *tname) ###\n\nReturns 1 if the object at index `ud` on the stack is a valid Torch class name `tname`.\nReturns 0 otherwise.\n\n<a name=\"luat_getfield\"></a>\n### Checking fields of a table ###\n\nThis functions check that the table at the given index `ud` on the Lua\nstack has a field named `field`, and that it is of the specified type.\nThese function raises a Lua error on failure.\n\n<a name=\"luat_getfieldcheckudata\"></a>\n## void *luaT_getfieldcheckudata(lua_State *L, int ud, const char *field, const char *tname) ##\n\nChecks that the field named `field` of the table at index `ud` is a\nTorch class name `tname`.  Returns the pointer of the C structure\npreviously pushed on the stack with [luaT_pushudata](#luat_pushudata) on\nsuccess. The function raises a Lua error on failure.\n\n<a name=\"luat_getfieldchecklightudata\"></a>\n## void *luaT_getfieldchecklightudata(lua_State *L, int ud, const char *field) ##\n\nChecks that the field named `field` of the table at index `ud` is a\nlightuserdata.  Returns the lightuserdata pointer on success. The function\nraises a Lua error on failure.\n\n<a name=\"luat_getfieldcheckint\"></a>\n## int luaT_getfieldcheckint(lua_State *L, int ud, const char *field) ##\n\nChecks that the field named `field` of the table at index `ud` is an\nint. Returns the int value pointer on success. The function raises a Lua\nerror on failure.\n\n<a name=\"luat_getfieldcheckstring\"></a>\n## const char* luaT_getfieldcheckstring(lua_State *L, int ud, const char *field) ##\n\nChecks that the field named `field` of the table at index `ud` is a\nstring. Returns a pointer to the string on success. The function raises a\nLua error on failure.\n\n<a name=\"luat_getfieldcheckboolean\"></a>\n## int luaT_getfieldcheckboolean(lua_State *L, int ud, const char *field) ##\n\nChecks that the field named `field` of the table at index `ud` is a\nboolean. On success, returns 1 if the boolean is `true`, 0 if it is\n`false`. The function raises a Lua error on failure.\n\n<a name=\"luat_getfieldchecktable\"></a>\n## void luaT_getfieldchecktable(lua_State *L, int ud, const char *field) ##\n\nChecks that the field named `field` of the table at index `ud` is a\ntable. On success, push the table on the stack. The function raises a Lua\nerror on failure.\n\n<a name=\"luat_typerror\"></a>\n### int luaT_typerror(lua_State *L, int ud, const char *tname) ###\n\nRaises a `luaL_argerror` (and returns its value), claiming that the\nobject at index `ud` on the stack is not of type `tname`. Note that\nthis function does not check the type, it only raises an error.\n\n<a name=\"luat_checkboolean\"></a>\n### int luaT_checkboolean(lua_State *L, int ud) ###\n\nChecks that the value at index `ud` is a boolean. On success, returns 1\nif the boolean is `true`, 0 if it is `false`. The function raises a Lua\nerror on failure.\n\n<a name=\"luat_optboolean\"></a>\n### int luaT_optboolean(lua_State *L, int ud, int def) ###\n\nChecks that the value at index `ud` is a boolean. On success, returns 1\nif the boolean is `true`, 0 if it is `false`. If there is no value at\nindex `ud`, returns `def`. In any other cases, raises an error.\n\n<a name=\"luat_registeratname\"></a>\n### void luaT_registeratname(lua_State *L, const struct luaL_Reg *methods, const char *name) ###\n\nThis function assume a table is on the stack. It creates a table field\n`name` in the table (if this field does not exist yet), and fill up\n`methods` in this table field.\n\n<a name=\"luat_classrootname\"></a>\n### const char *luaT_classrootname(const char *tname) ###\n\nAssuming `tname` is of the form `A.b.c`, returns 'c'. The returned value\nshall not be freed. It is a pointer inside `tname` string.\n\n<a name=\"luat_classmodulename\"></a>\n### int luaT_classmodulename(const char *tname, char *parent_name) ###\nAlias to `luaT_fullparentname ` for ensuring backwards compatibility; \nuse of `luaT_fullparentname` is preferred.\n\n<a name=\"luat_fullparentname\"></a>\n### int luaT_fullparentname(const char *tname, char *parent_name) ###\n\nReturns a 0-1 valued integer indicating whether `tname` has a parent module.\nAssuming `tname` is of the form `A.b.c`, sets `parent_name` to `A.b`.\n\n<a name=\"luat_classmodulename\"></a>\n### int luaT_outerparentname(const char *tname, char *parent_name) ###\n\nReturns a 0-1 valued integer indicating whether `tname` has a parent module.\nAssuming `tname` is of the form `A.b.c`, sets `parent_name` to `A`.\n\n<a name=\"luat_classmodulename\"></a>\n### int luaT_innerparentname(const char *tname, char *parent_name) ###\n\nReturns a 0-1 valued integer indicating whether `tname` has a parent module.\nAssuming `tname` is of the form `A.b.c`, sets `parent_name` to `b`.\n\n<a name=\"luat_stackdump\"></a>\n### void luaT_stackdump(lua_State *L) ###\n\nThis function print outs the state of the Lua stack. It is useful for debug\npurposes.\n\n"
  },
  {
    "path": "lib/luaT/luaT.c",
    "content": "#include <stdlib.h>\n#include <string.h>\n#include <stdint.h>\n\n#include \"luaT.h\"\n\nvoid* luaT_alloc(lua_State *L, ptrdiff_t size)\n{\n  void *ptr;\n\n  if(size == 0)\n    return NULL;\n\n  if(size < 0)\n    luaL_error(L, \"$ Torch: invalid memory size -- maybe an overflow?\");\n\n  ptr = malloc(size);\n  if(!ptr)\n    luaL_error(L, \"$ Torch: not enough memory: you tried to allocate %dGB. Buy new RAM!\", size/1073741824);\n\n  return ptr;\n}\n\nvoid* luaT_realloc(lua_State *L, void *ptr, ptrdiff_t size)\n{\n  if(!ptr)\n    return(luaT_alloc(L, size));\n\n  if(size == 0)\n  {\n    luaT_free(L, ptr);\n    return NULL;\n  }\n\n  if(size < 0)\n    luaL_error(L, \"$ Torch: invalid memory size -- maybe an overflow?\");\n\n  ptr = realloc(ptr, size);\n  if(!ptr)\n    luaL_error(L, \"$ Torch: not enough memory: you tried to reallocate %dGB. Buy new RAM!\", size/1073741824);\n  return ptr;\n}\n\nvoid luaT_free(lua_State *L, void *ptr)\n{\n  free(ptr);\n}\n\nvoid luaT_setfuncs(lua_State *L, const luaL_Reg *l, int nup)\n{\n#if LUA_VERSION_NUM == 501\n  luaL_checkstack(L, nup+1, \"too many upvalues\");\n  for (; l->name != NULL; l++) {  /* fill the table with given functions */\n    int i;\n    lua_pushstring(L, l->name);\n    for (i = 0; i < nup; i++)  /* copy upvalues to the top */\n      lua_pushvalue(L, -(nup+1));\n    lua_pushcclosure(L, l->func, nup);  /* closure with those upvalues */\n    lua_settable(L, -(nup + 3));\n  }\n  lua_pop(L, nup);  /* remove upvalues */\n#else\n  luaL_setfuncs(L, l, nup);\n#endif\n}\n\nvoid luaT_stackdump(lua_State *L)\n{\n  int i;\n  const char *tname = NULL;\n  int top = lua_gettop(L);\n  for(i = 1; i <= top; i++)\n  {\n    int t = lua_type(L, i);\n    printf(\"%3d. \", i);\n    switch(t)\n    {\n      case LUA_TSTRING:\n        printf(\"'%s'\", lua_tostring(L,i));\n        break;\n      case LUA_TBOOLEAN:\n        printf(lua_toboolean(L, i) ? \"true\" : \"false\");\n        break;\n      case LUA_TNUMBER:\n        printf(\"%g\", lua_tonumber(L,i));\n        break;\n      case LUA_TUSERDATA:\n        tname = luaT_typename(L, i);\n        printf(\"userdata %p [%s]\", lua_topointer(L, i), (tname ? tname : \"not a Torch object\"));\n        break;\n      case 10:\n        tname = luaT_typename(L, i);\n        printf(\"cdata %p [%s]\", lua_topointer(L, i), (tname ? tname : \"not a Torch object\"));\n        break;\n      case LUA_TTABLE:\n        lua_pushvalue(L, i);\n        lua_rawget(L, LUA_REGISTRYINDEX);\n        if(lua_isstring(L, -1))\n          tname = lua_tostring(L, -1); /*luaT_typenameid(L, lua_tostring(L, -1)); */\n        else\n          tname = NULL;\n        lua_pop(L, 1);\n        if(tname)\n          printf(\"metatable [%s]\", tname);\n        else\n        {\n          tname = luaT_typename(L, i);\n          printf(\"table %p [%s]\", lua_topointer(L, i), (tname ? tname : \"not a Torch object\"));\n        }\n        break;\n      default:\n        printf(\"Lua object type: %s\", lua_typename(L,t));\n        break;\n    }\n    printf(\"\\n\");\n  }\n  printf(\"---------------------------------------------\\n\");\n}\n\n/* metatable operator methods */\nstatic int luaT_mt__index(lua_State *L);\nstatic int luaT_mt__newindex(lua_State *L);\nstatic int luaT_mt__tostring(lua_State *L);\nstatic int luaT_mt__add(lua_State *L);\nstatic int luaT_mt__sub(lua_State *L);\nstatic int luaT_mt__mul(lua_State *L);\nstatic int luaT_mt__div(lua_State *L);\nstatic int luaT_mt__mod(lua_State *L);\nstatic int luaT_mt__pow(lua_State *L);\nstatic int luaT_mt__unm(lua_State *L);\nstatic int luaT_mt__concat(lua_State *L);\nstatic int luaT_mt__len(lua_State *L);\nstatic int luaT_mt__eq(lua_State *L);\nstatic int luaT_mt__lt(lua_State *L);\nstatic int luaT_mt__le(lua_State *L);\nstatic int luaT_mt__call(lua_State *L);\n\n/* Constructor-metatable methods */\nstatic int luaT_cmt__call(lua_State *L);\nstatic int luaT_cmt__newindex(lua_State *L);\n\nconst char* luaT_newmetatable(lua_State *L, const char *tname, const char *parent_tname,\n                              lua_CFunction constructor, lua_CFunction destructor, lua_CFunction factory)\n{\n  return luaT_newlocalmetatable(L, tname, parent_tname,\n                                constructor, destructor, factory, 0);\n}\n\nconst char* luaT_newlocalmetatable(lua_State *L, const char *tname, const char *parent_tname,\n                                   lua_CFunction constructor, lua_CFunction destructor, lua_CFunction factory, int moduleidx)\n{\n  lua_pushcfunction(L, luaT_lua_newmetatable);\n  lua_pushstring(L, tname);\n  (parent_tname ? (void)lua_pushstring(L, parent_tname) : lua_pushnil(L));\n  (constructor ? lua_pushcfunction(L, constructor) : lua_pushnil(L));\n  (destructor ? lua_pushcfunction(L, destructor) : lua_pushnil(L));\n  (factory ? lua_pushcfunction(L, factory) : lua_pushnil(L));\n  (moduleidx > 0 ? lua_pushvalue(L, moduleidx) : lua_pushnil(L));\n  lua_call(L, 6, 1);\n  return luaT_typenameid(L, tname);\n}\n\nint luaT_pushmetatable(lua_State *L, const char *tname)\n{\n  lua_getfield(L, LUA_REGISTRYINDEX, tname);\n  if(lua_isnil(L, -1))\n  {\n    lua_pop(L, 1);\n    return 0;\n  }\n  return 1;\n}\n\nconst char *luaT_typenameid(lua_State *L, const char *tname)\n{\n  if(luaT_pushmetatable(L, tname))\n  {\n    const char *tnameid = NULL;\n    lua_rawget(L, LUA_REGISTRYINDEX);\n    if(lua_isstring(L, -1))\n      tnameid = lua_tostring(L, -1);\n    lua_pop(L, 1); /* the string/nil */\n    return tnameid;\n  }\n  return NULL;\n}\n\nstatic const char cdataname[] = \"\"\n  \"local ok, ffi = pcall(require, 'ffi')\\n\"\n  \"if ok then\\n\"\n  \"  local id2name = {}\\n\"\n  \"  return function(cdata, name)\\n\"\n  \"    local id\\n\"\n  \"    if jit then\\n\"\n  \"      id = tonumber(ffi.typeof(cdata))\\n\"\n  \"    else\\n\"\n  \"      id = tostring(ffi.typeof(cdata))\\n\"\n  \"    end\\n\"\n  \"    if id then\\n\"\n  \"      if name then\\n\"\n  \"        id2name[id] = name\\n\"\n  \"        return name\\n\"\n  \"      else\\n\"\n  \"        return rawget(id2name, id)\\n\"\n  \"      end\\n\"\n  \"    end\\n\"\n  \"    return nil\\n\"\n  \"  end\\n\"\n  \"else\\n\"\n  \"  return function() end\\n\"\n  \"end\\n\";\n\nstatic const char* luaT_cdataname(lua_State *L, int ud, const char *tname)\n{\n  lua_pushstring(L, \"__cdataname\");\n  lua_rawget(L, LUA_REGISTRYINDEX);\n  if(lua_isnil(L,-1))\n  {\n    lua_pop(L, 1);\n\n    if(luaL_dostring(L, cdataname)) /* did something go wrong? */\n      luaL_error(L, \"internal error (could not load cdataname): %s\", lua_tostring(L, -1));\n\n    lua_pushstring(L, \"__cdataname\");\n    lua_pushvalue(L, -2);\n    lua_rawset(L, LUA_REGISTRYINDEX);\n  }\n  if(!lua_isfunction(L, -1)) /* should not happen */\n    luaL_error(L, \"internal error (cdataname is not a function)\");\n\n  lua_pushvalue(L, ud);\n  if(tname)\n    lua_pushstring(L, tname);\n  if(lua_pcall(L, (tname ? 2 : 1), 1, 0))\n    luaL_error(L, \"internal error (cdataname): %s\", lua_tostring(L, -1));\n\n  tname = lua_tostring(L, -1);\n  lua_pop(L, 1);\n\n  return tname;\n}\n\nstatic void* CDATA_MT_KEY = &CDATA_MT_KEY;\nstatic const char cdatamt[] = \"\"\n  \"local ok, ffi = pcall(require, 'ffi')\\n\"\n  \"if ok and not jit then\\n\"\n  \"  return ffi.debug().cdata_mt\\n\"\n  \"else\\n\"\n  \"  return {}\\n\"\n  \"end\\n\";\n\nstatic int luaT_iscdata(lua_State *L, int ud)\n{\n  int type = lua_type(L, ud);\n  if(type == 10)\n    return 1;\n  if(type != LUA_TUSERDATA)\n    return 0;\n  if(!lua_getmetatable(L, ud))\n    return 0;\n\n  lua_pushlightuserdata(L, CDATA_MT_KEY);\n  lua_rawget(L, LUA_REGISTRYINDEX);\n  if (lua_isnil(L, -1))\n  {\n    // initialize cdata metatable\n    lua_pop(L, 1);\n    if(luaL_dostring(L, cdatamt))\n      luaL_error(L, \"internal error (could not load cdata mt): %s\", lua_tostring(L, -1));\n\n    lua_pushlightuserdata(L, CDATA_MT_KEY);\n    lua_pushvalue(L, -2);\n    lua_rawset(L, LUA_REGISTRYINDEX);\n  }\n\n  int iscdata = lua_rawequal(L, -1, -2);\n  lua_pop(L, 2);\n  return iscdata;\n}\n\nconst char* luaT_typename(lua_State *L, int ud)\n{\n  if(luaT_iscdata(L, ud))\n    return luaT_cdataname(L, ud, NULL);\n  else if(lua_getmetatable(L, ud))\n  {\n    const char *tname = NULL;\n    lua_rawget(L, LUA_REGISTRYINDEX);\n    if(lua_isstring(L, -1))\n      tname = lua_tostring(L, -1);\n    lua_pop(L, 1); /* the string/nil */\n    return tname;\n  }\n  return NULL;\n}\n\nvoid luaT_pushudata(lua_State *L, void *udata, const char *tname)\n{\n  if(udata)\n  {\n    void **udata_p = lua_newuserdata(L, sizeof(void*));\n    *udata_p = udata;\n    if(!luaT_pushmetatable(L, tname))\n      luaL_error(L, \"Torch internal problem: cannot find metatable for type <%s>\", tname);\n    lua_setmetatable(L, -2);\n  }\n  else\n    lua_pushnil(L);\n}\n\nvoid *luaT_toudata(lua_State *L, int ud, const char *tname)\n{\n  void **p = lua_touserdata(L, ud);\n  if(p != NULL) /* value is a userdata? */\n  {\n    if(!luaT_pushmetatable(L, tname))\n      luaL_error(L, \"Torch internal problem: cannot find metatable for type <%s>\", tname);\n\n    /* initialize the table we want to get the metatable on */\n    /* note that we have to be careful with indices, as we just inserted stuff */\n    lua_pushvalue(L, (ud < 0 ? ud - 1 : ud));\n    while(lua_getmetatable(L, -1)) /* get the next metatable */\n    {\n      lua_remove(L, -2); /* remove the previous metatable [or object, if first time] */\n      if(lua_rawequal(L, -1, -2))\n      {\n        lua_pop(L, 2);  /* remove the two metatables */\n        return *p;\n      }\n    }\n    lua_pop(L, 2); /* remove the two metatables */\n  }\n  return NULL;\n}\n\nint luaT_isudata(lua_State *L, int ud, const char *tname)\n{\n  if(luaT_toudata(L, ud, tname))\n    return 1;\n  else\n    return 0;\n}\n\nvoid *luaT_checkudata(lua_State *L, int ud, const char *tname)\n{\n  void *p = luaT_toudata(L, ud, tname);\n  if(!p)\n    luaT_typerror(L, ud, tname);\n  return p;\n}\n\nvoid luaT_pushlong(lua_State *L, long n)\n{\n#if LUA_VERSION_NUM >= 503\n  /* Only push the value as an integer if it fits in lua_Integer,\n   or if the lua_Number representation will be even worse */\n  if (sizeof(lua_Integer) >= sizeof(long) || sizeof(lua_Number) <= sizeof(lua_Integer)) {\n    lua_pushinteger(L, n);\n  } else {\n    lua_pushnumber(L, (lua_Number)n);\n  }\n#else\n  lua_pushnumber(L, (lua_Number)n);\n#endif\n}\n\nlong luaT_checklong(lua_State *L, int idx)\n{\n#if LUA_VERSION_NUM >= 503\n  if (sizeof(lua_Integer) >= sizeof(long) || sizeof(lua_Number) <= sizeof(lua_Integer)) {\n    return (long)luaL_checkinteger(L, idx);\n  } else {\n    return (long)luaL_checknumber(L, idx);\n  }\n#else\n  return (long)luaL_checknumber(L, idx);\n#endif\n}\n\nlong luaT_tolong(lua_State *L, int idx)\n{\n#if LUA_VERSION_NUM == 503\n  if (sizeof(lua_Integer) >= sizeof(long) || sizeof(lua_Number) <= sizeof(lua_Integer)) {\n    return (long)lua_tointeger(L, idx);\n  } else {\n    return (long)lua_tonumber(L, idx);\n  }\n#else\n  return (long)lua_tonumber(L, idx);\n#endif\n}\n\nvoid luaT_pushinteger(lua_State *L, ptrdiff_t n)\n{\n#if LUA_VERSION_NUM >= 503\n  /* Only push the value as an integer if it fits in lua_Integer,\n   or if the lua_Number representation will be even worse */\n  if (sizeof(lua_Integer) >= sizeof(ptrdiff_t) || sizeof(lua_Number) <= sizeof(lua_Integer)) {\n    lua_pushinteger(L, n);\n  } else {\n    lua_pushnumber(L, (lua_Number)n);\n  }\n#else\n  lua_pushnumber(L, (lua_Number)n);\n#endif\n}\n\nptrdiff_t luaT_checkinteger(lua_State *L, int idx)\n{\n#if LUA_VERSION_NUM >= 503\n  if (sizeof(lua_Integer) >= sizeof(ptrdiff_t) || sizeof(lua_Number) <= sizeof(lua_Integer)) {\n    return (ptrdiff_t)luaL_checkinteger(L, idx);\n  } else {\n    return (ptrdiff_t)luaL_checknumber(L, idx);\n  }\n#else\n  return (ptrdiff_t)luaL_checknumber(L, idx);\n#endif\n}\n\nvoid *luaT_getfieldcheckudata(lua_State *L, int ud, const char *field, const char *tname)\n{\n  void *p;\n  lua_getfield(L, ud, field);\n  if(lua_isnil(L, -1))\n    luaL_error(L, \"bad argument #%d (field %s does not exist)\", ud, field);\n  p = luaT_toudata(L, -1, tname);\n  if(!p)\n    luaL_error(L, \"bad argument #%d (field %s is not a %s)\", ud, field, tname);\n  return p;\n}\n\nvoid *luaT_getfieldchecklightudata(lua_State *L, int ud, const char *field)\n{\n  void *p;\n  lua_getfield(L, ud, field);\n  if(lua_isnil(L, -1))\n    luaL_error(L, \"bad argument #%d (field %s does not exist)\", ud, field);\n\n  if(!lua_islightuserdata(L, -1))\n    luaL_error(L, \"bad argument #%d (field %s is not a light userdata)\", ud, field);\n\n  p = lua_touserdata(L, -1);\n\n  return p;\n}\n\ndouble luaT_getfieldchecknumber(lua_State *L, int ud, const char *field)\n{\n  lua_getfield(L, ud, field);\n  if(lua_isnil(L, -1))\n    luaL_error(L, \"bad argument #%d (field %s does not exist)\", ud, field);\n  if(!lua_isnumber(L, -1))\n    luaL_error(L, \"bad argument #%d (field %s is not a number)\", ud, field);\n  return lua_tonumber(L, -1);\n}\n\nint luaT_getfieldcheckint(lua_State *L, int ud, const char *field)\n{\n  lua_getfield(L, ud, field);\n  if(lua_isnil(L, -1))\n    luaL_error(L, \"bad argument #%d (field %s does not exist)\", ud, field);\n  if(!lua_isnumber(L, -1))\n    luaL_error(L, \"bad argument #%d (field %s is not a number)\", ud, field);\n  return (int)lua_tonumber(L, -1);\n}\n\nconst char* luaT_getfieldcheckstring(lua_State *L, int ud, const char *field)\n{\n  lua_getfield(L, ud, field);\n  if(lua_isnil(L, -1))\n    luaL_error(L, \"bad argument #%d (field %s does not exist)\", ud, field);\n  if(!lua_isstring(L, -1))\n    luaL_error(L, \"bad argument #%d (field %s is not a string)\", ud, field);\n  return lua_tostring(L, -1);\n}\n\nint luaT_getfieldcheckboolean(lua_State *L, int ud, const char *field)\n{\n  lua_getfield(L, ud, field);\n  if(lua_isnil(L, -1))\n    luaL_error(L, \"bad argument #%d (field %s does not exist)\", ud, field);\n  if(!lua_isboolean(L, -1))\n    luaL_error(L, \"bad argument #%d (field %s is not a boolean)\", ud, field);\n  return lua_toboolean(L, -1);\n}\n\nvoid luaT_getfieldchecktable(lua_State *L, int ud, const char *field)\n{\n  lua_getfield(L, ud, field);\n  if(lua_isnil(L, -1))\n    luaL_error(L, \"bad argument #%d (field %s does not exist)\", ud, field);\n  if(!lua_istable(L, -1))\n    luaL_error(L, \"bad argument #%d (field %s is not a table)\", ud, field);\n}\n\n/**** type checks as in luaL ****/\nint luaT_typerror(lua_State *L, int ud, const char *tname)\n{\n  const char *msg;\n  const char *tnameud = luaT_typename(L, ud);\n\n  if(!tnameud)\n    tnameud = lua_typename(L, ud);\n\n  msg = lua_pushfstring(L, \"%s expected, got %s\",\n                        tname,\n                        (tnameud ? tnameud : \"unknown object\"));\n\n  return luaL_argerror(L, ud, msg);\n}\n\nint luaT_checkboolean(lua_State *L, int ud)\n{\n  if(!lua_isboolean(L, ud))\n    luaT_typerror(L, ud, lua_typename(L, LUA_TBOOLEAN));\n  return lua_toboolean(L, ud);\n}\n\nint luaT_optboolean(lua_State *L, int ud, int def)\n{\n  if(lua_isnoneornil(L,ud))\n    return def;\n\n  return luaT_checkboolean(L, ud);\n}\n\nvoid luaT_registeratname(lua_State *L, const struct luaL_Reg *methods, const char *name)\n{\n  int idx = lua_gettop(L);\n\n  luaL_checktype(L, idx, LUA_TTABLE);\n  lua_pushstring(L, name);\n  lua_rawget(L, idx);\n\n  if(lua_isnil(L, -1))\n  {\n    lua_pop(L, 1);\n    lua_pushstring(L, name);\n    lua_newtable(L);\n    lua_rawset(L, idx);\n\n    lua_pushstring(L, name);\n    lua_rawget(L, idx);\n  }\n\n  luaT_setfuncs(L, methods, 0);\n  lua_pop(L, 1);\n}\n\n\n/* returns the name of the class itself (sans nesting) */\nconst char* luaT_classrootname(const char *tname)\n{\n  int idx;\n  int sz = strlen(tname);\n\n  for(idx = sz-1; idx >= 0 ; idx--)\n  {\n    if(tname[idx] == '.')\n      return tname+idx+1;\n  }\n  return tname;\n}\n\n/* parent_name must be a buffer at least as big as tname.\n * If class has a parent, returns true; and, sets\n * parent name to that of full parent hierarchy (e.g.\n * given class `A.b.c`, sets parent_name to `A.b`)\n */\nint luaT_fullparentname(const char *tname, char *parent_name)\n{\n  int sz = strlen(tname);\n  int idx;\n  for(idx = sz-1; idx > 0 ; idx--)\n    if(tname[idx] == '.' || tname[idx] == '\\0') break;\n\n  if (idx > 0) strncpy(parent_name, tname, idx);\n  parent_name[idx] = '\\0';\n  return tname[idx] == '.';\n}\n\n/* alias for ensuring backwards compatibilty;\n * use of luaT_fullparentname is preferred.\n */\nint luaT_classmodulename(const char *tname, char *parent_name)\n{\n  return luaT_fullparentname(tname, parent_name);\n}\n\n/* parent_name must be a buffer at least as big as tname.\n * If class has a parent, returns true; and, sets\n * parent name to that of outermost parent (e.g.\n * given class `A.b.c`, sets parent_name to `A`)\n */\nint luaT_outerparentname(const char *tname, char *parent_name)\n{\n  char chars[] = {'.', '\\0'};\n  size_t idx;\n  idx = strcspn(tname, chars);\n  strncpy(parent_name, tname, idx);\n  parent_name[idx] = '\\0';\n  return tname[idx] == '.';\n}\n\n/* parent_name must be a buffer at least as big as tname.\n * If class has a parent, returns true; and, sets parent\n * name to that of innermost parent (e.g. given class\n * `A.b.c`, sets parent_name to `b`). In the comments\n * below, the inner parent name is abbreviated as IPN.\n */\nint luaT_innerparentname(const char *tname, char *parent_name)\n{\n  int sz = strlen(tname);\n  int tail, head;\n  for(tail = sz-1; tail >= 0 ; tail--) // tail points to\n    if(tname[tail] == '.') break;      // just past IPN\n\n  if (tail == 0) return 0;\n\n  for(head = tail-1; head >= 0; head--) // head points to\n    if(tname[head] == '.') break;       // just before IPN\n\n  head += 1; // update head to start of IPN\n  tail -= head; // update tail to strlen(IPN)\n  strncpy(parent_name, tname+head, tail);\n  parent_name[tail] = '\\0';\n  return 1;\n}\n\n/* Method for pushing a class's immediate parent to the\n * stack (e.g. given class `A.b.c`, pushes `b` to the stack)\n */\nvoid luaT_getinnerparent(lua_State *L, const char *tname)\n{\n  /* Local variables */\n  char term[256];\n  char chars[] = {'.', '\\0'};\n  const char *tname_full = tname; // used for error case\n\n  /* Get outermost table from Lua */\n  int n = strcspn(tname, chars);\n  strncpy(term, tname, n);\n  term[n] = '\\0';\n  lua_getglobal(L, term);\n  tname  += n + 1;\n\n  /* Traverse hierarchy down to last table*/\n  n = strcspn(tname, chars);\n  while(n < strlen(tname))\n  {\n    /* Check that current parent is a table (i.e. a module) */\n    if(!lua_istable(L, -1)){\n      strncpy(term, tname_full, tname - tname_full - 1);\n      term[tname - tname_full] = '\\0';\n      luaL_error(L, \"while creating metatable %s: bad argument #1 (%s is an invalid module name)\", tname_full, term);\n    }\n    strncpy(term, tname, n);\n    term[n] = '\\0';\n    lua_getfield(L, -1, term);\n    lua_remove(L, -2);\n    tname += n + 1;\n    n = strcspn(tname, chars); // prepare for next\n  }\n\n  /* Check that resulting parent is a table (i.e. a module) */\n  if(!lua_istable(L, -1)){\n    strncpy(term, tname_full, tname - tname_full - 1);\n    term[tname - tname_full] = '\\0';\n    luaL_error(L, \"while creating metatable %s: bad argument #1 (%s is an invalid module name)\", tname_full, term);\n  }\n}\n\n\nint luaT_lua_newmetatable(lua_State *L)\n{\n  /* Local Variables */\n  const char* tname = luaL_checkstring(L, 1);\n  char parent_name[256];\n  int is_in_module = 0;\n\n  /* Argument Checking */\n  lua_settop(L, 6);\n  luaL_argcheck(L, lua_isnoneornil(L, 2) || lua_isstring(L, 2), 2, \"parent class name or nil expected\");\n  luaL_argcheck(L, lua_isnoneornil(L, 3) || lua_isfunction(L, 3), 3, \"constructor function or nil expected\");\n  luaL_argcheck(L, lua_isnoneornil(L, 4) || lua_isfunction(L, 4), 4, \"destructor function or nil expected\");\n  luaL_argcheck(L, lua_isnoneornil(L, 5) || lua_isfunction(L, 5), 5, \"factory function or nil expected\");\n  luaL_argcheck(L, lua_isnoneornil(L, 6) || lua_istable(L, 6), 6, \"module table or nil expected\");\n\n  /* Push immediate parent module to stack */\n  if(lua_isnoneornil(L, 6)) {\n    lua_pop(L, 1); /* remove the nil */\n    is_in_module = luaT_fullparentname(tname, parent_name);\n    if (is_in_module)\n      luaT_getinnerparent(L, tname);\n    else\n      lua_pushglobaltable(L);\n  }\n\n  if(!lua_istable(L, -1))\n    luaL_error(L, \"while creating metatable %s: bad argument #1 (%s is an invalid module name)\", tname, parent_name);\n\n  /* we first create the new metaclass if we have to */\n  if(!luaT_pushmetatable(L, tname))\n  {\n    /* create the metatable */\n    lua_newtable(L);\n\n    /* registry[name] = metatable */\n    lua_pushvalue(L, -1);\n    lua_setfield(L, LUA_REGISTRYINDEX, tname);\n\n    /* registry[metatable] = tname */\n    lua_pushvalue(L, -1);\n    lua_pushstring(L, tname);\n    lua_rawset(L, LUA_REGISTRYINDEX);\n\n    /* __index handling */\n    lua_pushcfunction(L, luaT_mt__index);\n    lua_setfield(L, -2, \"__index\");\n\n    /* __newindex handling */\n    lua_pushcfunction(L, luaT_mt__newindex);\n    lua_setfield(L, -2, \"__newindex\");\n\n    /* __typename contains the typename */\n    lua_pushstring(L, tname);\n    lua_setfield(L, -2, \"__typename\");\n\n    /* __metatable is self */\n    lua_pushvalue(L, -1);\n    lua_setfield(L, -2, \"__metatable\");\n\n    /* by default, __version equals 1 */\n    lua_pushnumber(L, 1);\n    lua_setfield(L, -2, \"__version\");\n\n    /* assign default operator functions */\n    lua_pushcfunction(L, luaT_mt__tostring);\n    lua_setfield(L, -2, \"__tostring\");\n\n    lua_pushcfunction(L, luaT_mt__add);\n    lua_setfield(L, -2, \"__add\");\n\n    lua_pushcfunction(L, luaT_mt__sub);\n    lua_setfield(L, -2, \"__sub\");\n\n    lua_pushcfunction(L, luaT_mt__mul);\n    lua_setfield(L, -2, \"__mul\");\n\n    lua_pushcfunction(L, luaT_mt__div);\n    lua_setfield(L, -2, \"__div\");\n\n    lua_pushcfunction(L, luaT_mt__mod);\n    lua_setfield(L, -2, \"__mod\");\n\n    lua_pushcfunction(L, luaT_mt__pow);\n    lua_setfield(L, -2, \"__pow\");\n\n    lua_pushcfunction(L, luaT_mt__unm);\n    lua_setfield(L, -2, \"__unm\");\n\n    lua_pushcfunction(L, luaT_mt__concat);\n    lua_setfield(L, -2, \"__concat\");\n\n    lua_pushcfunction(L, luaT_mt__len);\n    lua_setfield(L, -2, \"__len\");\n\n    lua_pushcfunction(L, luaT_mt__eq);\n    lua_setfield(L, -2, \"__eq\");\n\n    lua_pushcfunction(L, luaT_mt__lt);\n    lua_setfield(L, -2, \"__lt\");\n\n    lua_pushcfunction(L, luaT_mt__le);\n    lua_setfield(L, -2, \"__le\");\n\n    lua_pushcfunction(L, luaT_mt__call);\n    lua_setfield(L, -2, \"__call\");\n  }\n\n  /* we assign the parent class if necessary */\n  if(!lua_isnoneornil(L, 2))\n  {\n    if(lua_getmetatable(L, -1))\n      luaL_error(L, \"class %s has been already assigned a parent class\\n\", tname);\n    else\n    {\n      const char* parent_tname = luaL_checkstring(L, 2);\n      if(!luaT_pushmetatable(L, parent_tname))\n        luaL_error(L, \"bad argument #2 (invalid parent class name %s)\", parent_tname);\n      lua_setmetatable(L, -2);\n    }\n  }\n\n  /* register the destructor function  */\n  if(!lua_isnoneornil(L, 4))\n  {\n    /* does it exists already? */\n    lua_pushstring(L, \"__gc\");\n    lua_rawget(L, -2);\n\n    if(lua_isnil(L, -1))\n    {\n      lua_pop(L, 1); /* pop nil */\n      lua_pushstring(L, \"__gc\");\n      lua_pushvalue(L, 4);\n      lua_rawset(L, -3);\n    }\n    else\n      luaL_error(L, \"%s has been already assigned a destructor\", tname);\n  }\n\n  /* register the factory function  */\n  if(!lua_isnoneornil(L, 5))\n  {\n    /* does it exists already? */\n    lua_pushstring(L, \"__factory\");\n    lua_rawget(L, -2);\n\n    if(lua_isnil(L, -1))\n    {\n      lua_pop(L, 1); /* pop nil */\n      lua_pushstring(L, \"__factory\");\n      lua_pushvalue(L, 5);\n      lua_rawset(L, -3);\n    }\n    else\n      luaL_error(L, \"%s has been already assigned a factory\", tname);\n  }\n\n  /******** Constructor table and metatable ********/\n  lua_pushstring(L, \"__constructor\");\n  lua_rawget(L, -2);\n  if(lua_isnil(L, -1))\n  {\n    lua_pop(L, 1);                        /* pop nil */\n    lua_newtable(L);                      /* fancy table */\n    lua_newtable(L);                      /* fancy metatable */\n\n    lua_pushvalue(L, -3);                 /* metatable */\n    lua_setfield(L, -2, \"__index\");       /* so we can get the methods */\n\n    lua_pushcfunction(L, luaT_cmt__newindex);\n    lua_setfield(L, -2, \"__newindex\");    /* so we add new methods */\n\n    lua_pushcfunction(L, luaT_cmt__call);\n    lua_setfield(L, -2, \"__call\");        /* so we can create, we are here for only that */\n\n    lua_pushvalue(L, -3);\n    lua_setfield(L, -2, \"__metatable\");   /* redirect to metatable with methods */\n\n    lua_setmetatable(L, -2);              /* constructor metatable is ... this fancy metatable */\n\n    /* set metatable[__constructor] = constructor-metatable */\n    lua_pushstring(L, \"__constructor\");\n    lua_pushvalue(L, -2);\n    lua_rawset(L, -4);\n  }\n\n  /* register the constructor function  */\n  if(!lua_isnoneornil(L, 3))\n  {\n    /* get constructor metatable */\n    lua_getmetatable(L, -1);\n\n    /* does it exists already? */\n    lua_pushstring(L, \"__new\");\n    lua_rawget(L, -2);\n\n    if(lua_isnil(L, -1))\n    {\n      lua_pop(L, 1); /* pop nil */\n      lua_pushstring(L, \"__new\");\n      lua_pushvalue(L, 3);\n      lua_rawset(L, -3);\n\n      /* set \"new\" in the metatable too */\n      lua_pushstring(L, \"new\");\n      lua_pushvalue(L, 3);\n      lua_rawset(L, -5);\n    }\n    else\n      luaL_error(L, \"%s has been already assigned a constructor\", tname);\n\n    /* pop constructor metatable */\n    lua_pop(L, 1);\n  }\n\n  /* module.name = constructor metatable */\n  lua_setfield(L, 6, luaT_classrootname(tname));\n\n  return 1; /* returns the metatable */\n}\n\n/* Lua only utility functions */\n\n/* add any custom type, provided the object has a metatable */\nint luaT_lua_metatype(lua_State *L)\n{\n  if( (lua_gettop(L) != 2) && (lua_gettop(L) != 3) )\n    luaL_error(L, \"expecting: string table [ctype]\");\n\n  luaL_checkstring(L, 1);\n  luaL_checktype(L, 2, LUA_TTABLE);\n\n  if(lua_gettop(L) == 3)\n  {\n    if(!luaT_cdataname(L, 3, lua_tostring(L, 1)))\n      luaL_error(L, \"could not register cdata type -- missing ffi library?\");\n  }\n\n  /* registry[name] = metatable */\n  lua_pushvalue(L, 1);\n  lua_pushvalue(L, 2);\n  lua_rawset(L, LUA_REGISTRYINDEX);\n\n  /* registry[metatable] = tname */\n  lua_pushvalue(L, 2);\n  lua_pushvalue(L, 1);\n  lua_rawset(L, LUA_REGISTRYINDEX);\n\n  return 0;\n}\n\n/* return a userdata from a C pointer */\n/* you are better to know what you are doing */\nint luaT_lua_pushudata(lua_State *L)\n{\n  void *udata = NULL;\n  const char *tname = luaL_checkstring(L, 2);\n\n  if(lua_type(L, 1) == 10)\n    udata = *((void**)lua_topointer(L, 1));\n  else if(luaT_iscdata(L, 1))\n    udata = ((void**)lua_topointer(L, 1))[4];\n  else if(lua_isnumber(L, 1))\n    udata = (void*)(uintptr_t)lua_tonumber(L, 1);\n  else\n    luaL_argerror(L, 1, \"expecting number or cdata\");\n\n  luaT_pushudata(L, udata, tname);\n\n  return 1;\n}\n\nint luaT_lua_factory(lua_State *L)\n{\n  const char* tname = luaL_checkstring(L, 1);\n  if(luaT_pushmetatable(L, tname) && !lua_isnil(L, -1))\n  {\n    lua_pushstring(L, \"__factory\");\n    lua_rawget(L, -2);\n  }\n  else\n  {\n    lua_pushnil(L);\n  }\n  return 1;\n}\n\nint luaT_lua_getconstructortable(lua_State *L)\n{\n  const char* tname = luaL_checkstring(L, 1);\n  if(luaT_pushmetatable(L, tname))\n  {\n    lua_pushstring(L, \"__constructor\");\n    lua_rawget(L, -2);\n    return 1;\n  }\n  return 0;\n}\n\n\nint luaT_lua_typename(lua_State *L)\n{\n  const char* tname = NULL;\n  luaL_checkany(L, 1);\n  if((tname = luaT_typename(L, 1)))\n  {\n    lua_pushstring(L, tname);\n    return 1;\n  }\n  return 0;\n}\n\nint luaT_lua_isequal(lua_State *L)\n{\n  if(lua_isuserdata(L, 1) && lua_isuserdata(L, 2))\n  {\n    void **u1, **u2;\n    luaL_argcheck(L, luaT_typename(L, 1), 1, \"Torch object expected\");\n    luaL_argcheck(L, luaT_typename(L, 2), 2, \"Torch object expected\");\n\n    u1 = lua_touserdata(L, 1);\n    u2 = lua_touserdata(L, 2);\n    if(*u1 == *u2)\n      lua_pushboolean(L, 1);\n    else\n      lua_pushboolean(L, 0);\n  }\n  else if(lua_istable(L, 1) && lua_istable(L, 2))\n    lua_pushboolean(L, lua_rawequal(L, 1, 2));\n  else\n    lua_pushboolean(L, 0);\n  return 1;\n}\n\nstatic void luaT_pushpointer(lua_State *L, const void *ptr)\n{\n#if LUA_VERSION_NUM >= 503\n  // this assumes that lua_Integer is a ptrdiff_t\n  if (sizeof(void *) > sizeof(lua_Integer))\n    luaL_error(L, \"Pointer value can't be represented as a Lua integer (an overflow would occur)\");\n  lua_pushinteger(L, (uintptr_t)(ptr));\n#else\n  // 2^53 - this assumes that lua_Number is a double\n  if ((uintptr_t)ptr > 9007199254740992LLU)\n    luaL_error(L, \"Pointer value can't be represented as a Lua number (an overflow would occur)\");\n  lua_pushnumber(L, (uintptr_t)(ptr));\n#endif\n}\n\nint luaT_lua_pointer(lua_State *L)\n{\n  if(lua_type(L, 1) == 10) /* luajit cdata */\n  {\n    /* we want the pointer holded by cdata */\n    /* not the pointer on the cdata object */\n    const void* ptr = *((void**)lua_topointer(L, 1));\n    luaT_pushpointer(L, ptr);\n    return 1;\n  }\n  else if (luaT_iscdata(L, 1)) /* luaffi cdata */\n  {\n    void** ptr = (void**)lua_touserdata(L, 1);\n    luaT_pushpointer(L, ptr[4]);\n    return 1;\n  }\n  else if(lua_isuserdata(L, 1))\n  {\n    void **ptr;\n    luaL_argcheck(L, luaT_typename(L, 1), 1, \"Torch object expected\");\n    ptr = lua_touserdata(L, 1);\n    luaT_pushpointer(L, *ptr);\n    return 1;\n  }\n  else if(lua_istable(L, 1) || lua_isthread(L, 1) || lua_isfunction(L, 1))\n  {\n    const void* ptr = lua_topointer(L, 1);\n    luaT_pushpointer(L, ptr);\n    return 1;\n  }\n  else if(lua_isstring(L, 1))\n  {\n    const char* ptr = lua_tostring(L, 1);\n    luaT_pushpointer(L, ptr);\n    return 1;\n  }\n  else\n    luaL_error(L, \"Torch object, table, thread, cdata or function expected\");\n\n  return 0;\n}\n\nint luaT_lua_setenv(lua_State *L)\n{\n  if(!lua_isfunction(L, 1) && !lua_isuserdata(L, 1))\n    luaL_typerror(L, 1, \"function or userdata\");\n  luaL_checktype(L, 2, LUA_TTABLE);\n  lua_setuservalue(L, 1);\n  return 0;\n}\n\nint luaT_lua_getenv(lua_State *L)\n{\n  if(!lua_isfunction(L, 1) && !lua_isuserdata(L, 1))\n    luaL_typerror(L, 1, \"function or userdata\");\n  lua_getuservalue(L, 1);\n  if (lua_isnil(L, -1))\n    lua_newtable(L);\n  return 1;\n}\n\nint luaT_lua_getmetatable(lua_State *L)\n{\n  const char *tname = luaL_checkstring(L, 1);\n  if(luaT_pushmetatable(L, tname))\n    return 1;\n  return 0;\n}\n\nint luaT_lua_version(lua_State *L)\n{\n  luaL_checkany(L, 1);\n\n  if(luaT_iscdata(L, 1))\n  {\n    const char *tname = luaT_cdataname(L, 1, NULL);\n    if(tname)\n    {\n      luaT_pushmetatable(L, tname);\n      lua_pushstring(L, \"__version\");\n      lua_rawget(L, -2);\n      return 1;\n    }\n    return 0;\n  }\n  else if(lua_getmetatable(L, 1))\n  {\n    lua_pushstring(L, \"__version\");\n    lua_rawget(L, -2);\n    return 1;\n  }\n  return 0;\n}\n\nint luaT_lua_setmetatable(lua_State *L)\n{\n  const char *tname = luaL_checkstring(L, 2);\n  luaL_checktype(L, 1, LUA_TTABLE);\n\n  if(!luaT_pushmetatable(L, tname))\n    luaL_error(L, \"unknown typename %s\\n\", tname);\n  lua_setmetatable(L, 1);\n\n  return 1;\n}\n\n/* metatable operator methods */\nstatic int luaT_mt__index(lua_State *L)\n{\n  if(!lua_getmetatable(L, 1))\n    luaL_error(L, \"critical internal indexing error: no metatable found\");\n\n  if(!lua_istable(L, -1))\n    luaL_error(L, \"critical internal indexing error: not a metatable\");\n\n  /* test for __index__ method first */\n  lua_getfield(L, -1, \"__index__\");\n  if(!lua_isnil(L, -1))\n  {\n    int result;\n\n    if(!lua_isfunction(L, -1))\n      luaL_error(L, \"critical internal indexing error: __index__ is not a function\");\n\n    lua_pushvalue(L, 1);\n    lua_pushvalue(L, 2);\n\n    lua_call(L, 2, LUA_MULTRET); /* DEBUG: risque: faut vraiment retourner 1 ou 2 valeurs... */\n\n    result = lua_toboolean(L, -1);\n    lua_pop(L, 1);\n\n    if(result)\n      return 1;\n\n    /* on the stack: 1. the object 2. the value 3. the metatable */\n    /* apparently, __index wants only one element returned */\n    /* return lua_gettop(L)-3; */\n\n  }\n  else\n    lua_pop(L, 1); /* remove nil __index__ on the stack */\n\n  lua_pushvalue(L, 2);\n  lua_gettable(L, -2);\n\n  return 1;\n}\n\nstatic int luaT_mt__newindex(lua_State *L)\n{\n  if(!lua_getmetatable(L, 1))\n    luaL_error(L, \"critical internal indexing error: no metatable found\");\n\n  if(!lua_istable(L, -1))\n    luaL_error(L, \"critical internal indexing error: not a metatable\");\n\n  /* test for __newindex__ method first */\n  lua_getfield(L, -1, \"__newindex__\");\n  if(!lua_isnil(L, -1))\n  {\n    int result;\n\n    if(!lua_isfunction(L, -1))\n      luaL_error(L, \"critical internal indexing error: __newindex__ is not a function\");\n\n    lua_pushvalue(L, 1);\n    lua_pushvalue(L, 2);\n    lua_pushvalue(L, 3);\n\n    lua_call(L, 3, 1); /* DEBUG: risque: faut vraiment retourner qqch */\n\n    result = lua_toboolean(L, -1);\n    lua_pop(L, 1);\n\n    if(result)\n      return 0;\n  }\n  else\n    lua_pop(L, 1); /* remove nil __newindex__ on the stack */\n\n  lua_pop(L, 1);    /* pop the metatable */\n  if(lua_istable(L, 1))\n    lua_rawset(L, 1);\n  else\n    luaL_error(L, \"the class %s cannot be indexed\", luaT_typename(L, 1));\n\n  return 0;\n}\n\n\n#define MT_UNI_OPERATOR_GET_HANDLER(NAME)                               \\\n    if(!lua_getmetatable(L, 1))                                         \\\n      luaL_error(L, \"internal error in __\" #NAME \": no metatable\");\n\n#define MT_BIN_OPERATOR_GET_HANDLER(NAME)                               \\\n    if(!lua_getmetatable(L, 1) && !lua_getmetatable(L,2) )              \\\n      luaL_error(L, \"internal error in __\" #NAME                        \\\n              \": no metatable in both operands\");\n\n#define MT_DECLARE_OPERATOR_BODY(NAME, NIL_BEHAVIOR)                    \\\n                                                                        \\\n    lua_getfield(L, -1, \"__\" #NAME \"__\");                               \\\n    if(lua_isnil(L, -1))                                                \\\n    {                                                                   \\\n      NIL_BEHAVIOR;                                                     \\\n    }                                                                   \\\n    else                                                                \\\n    {                                                                   \\\n      if(lua_isfunction(L, -1))                                         \\\n      {                                                                 \\\n        lua_insert(L, 1); /* insert function */                         \\\n        lua_pop(L, 1); /* remove metatable */                           \\\n        lua_call(L, lua_gettop(L)-1, LUA_MULTRET);                      \\\n          /* we return the result of the call */                        \\\n        return lua_gettop(L);                                           \\\n      }                                                                 \\\n      /* we return the thing the user left in __tostring__ */           \\\n    }                                                                   \\\n    return 0;                                                           \\\n\n/* note: check dans metatable pour ca, donc necessaire */\n#define MT_DECLARE_OPERATOR(NAME, NIL_BEHAVIOR)                         \\\n  int luaT_mt__##NAME(lua_State *L)                                     \\\n  {                                                                     \\\n    MT_UNI_OPERATOR_GET_HANDLER(NAME)                                   \\\n    MT_DECLARE_OPERATOR_BODY(NAME,NIL_BEHAVIOR)                         \\\n  }\n\n#define MT_DECLARE_BIN_OPERATOR(NAME, NIL_BEHAVIOR)                     \\\n  int luaT_mt__##NAME(lua_State *L)                                     \\\n  {                                                                     \\\n    MT_BIN_OPERATOR_GET_HANDLER(NAME)                                   \\\n    MT_DECLARE_OPERATOR_BODY(NAME,NIL_BEHAVIOR)                         \\\n  }\n\n\n#define BIN_OPERATOR_ERROR(NAME)                                        \\\n    luaL_error(L, \"both %s and %s have no \" #NAME \" operator\",          \\\n            luaT_typename(L, 1), luaT_typename(L,2))\n\nMT_DECLARE_BIN_OPERATOR(add,    BIN_OPERATOR_ERROR(addition) )\nMT_DECLARE_BIN_OPERATOR(sub,    BIN_OPERATOR_ERROR(substraction) )\nMT_DECLARE_BIN_OPERATOR(mul,    BIN_OPERATOR_ERROR(multiplication) )\nMT_DECLARE_BIN_OPERATOR(div,    BIN_OPERATOR_ERROR(division) )\nMT_DECLARE_BIN_OPERATOR(mod,    BIN_OPERATOR_ERROR(modulo) )\nMT_DECLARE_BIN_OPERATOR(pow,    BIN_OPERATOR_ERROR(power) )\nMT_DECLARE_BIN_OPERATOR(concat, BIN_OPERATOR_ERROR(concat) )\nMT_DECLARE_BIN_OPERATOR(eq,\n                    lua_settop(L, 2);\n                    lua_pushcfunction(L, luaT_lua_isequal);\n                    lua_insert(L, 1);\n                    lua_call(L, 2, 1);\n                    return 1;)\nMT_DECLARE_BIN_OPERATOR(lt, BIN_OPERATOR_ERROR(less-than) )\nMT_DECLARE_BIN_OPERATOR(le, BIN_OPERATOR_ERROR(less-equal) )\n\nMT_DECLARE_OPERATOR(tostring,\n                    lua_pushstring(L, luaT_typename(L, 1));\n                    return 1;)\nMT_DECLARE_OPERATOR(call, luaL_error(L, \"%s has no call operator\", luaT_typename(L, 1)))\nMT_DECLARE_OPERATOR(unm, luaL_error(L, \"%s has no negation operator\", luaT_typename(L, 1)))\nMT_DECLARE_OPERATOR(len, luaL_error(L, \"%s has no length operator\", luaT_typename(L, 1)))\n\n\n/* constructor metatable methods */\nint luaT_cmt__call(lua_State *L)\n{\n  if(!lua_istable(L, 1))\n    luaL_error(L, \"internal error in __call: not a constructor table\");\n\n  if(!lua_getmetatable(L, 1))\n    luaL_error(L, \"internal error in __call: no metatable available\");\n\n  lua_pushstring(L, \"__new\");\n  lua_rawget(L, -2);\n\n  if(lua_isnil(L, -1))\n    luaL_error(L, \"no constructor available\");\n\n  lua_remove(L, 1); /* remove constructor atable */\n  lua_insert(L, 1); /* insert constructor */\n  lua_pop(L, 1);    /* remove fancy metatable */\n\n  lua_call(L, lua_gettop(L)-1, LUA_MULTRET);\n  return lua_gettop(L);\n}\n\nint luaT_cmt__newindex(lua_State *L)\n{\n  if(!lua_istable(L, 1))\n    luaL_error(L, \"internal error in __newindex: not a constructor table\");\n\n  if(!lua_getmetatable(L, 1))\n    luaL_error(L, \"internal error in __newindex: no metatable available\");\n\n  lua_pushstring(L, \"__metatable\");\n  lua_rawget(L, -2);\n\n  if(!lua_istable(L, -1))\n    luaL_error(L, \"internal error in __newindex: no metaclass available\");\n\n  lua_insert(L, 2);\n  lua_pop(L, 1); /* remove the metatable over the constructor table */\n\n  lua_rawset(L, -3);\n\n  return 0;\n}\n\n/******************** deprecated functions ********************/\nint luaT_pushmetaclass(lua_State *L, const char *tname)\n{\n  return luaT_pushmetatable(L, tname);\n}\n\nconst char* luaT_id(lua_State *L, int ud)\n{\n  return luaT_typename(L, ud);\n}\n\nconst char* luaT_id2typename(lua_State *L, const char *id)\n{\n  return id;\n}\n\nconst char* luaT_typename2id(lua_State *L, const char *tname)\n{\n  return luaT_typenameid(L, tname);\n}\n\nint luaT_getmetaclass(lua_State *L, int index)\n{\n  return lua_getmetatable(L, index);\n}\n\nconst char* luaT_checktypename2id(lua_State *L, const char *tname)\n{\n  const char* id = luaT_typenameid(L, tname);\n  if(!id)\n    luaL_error(L, \"unknown class <%s>\", tname);\n  return id;\n}\n\nvoid luaT_registeratid(lua_State *L, const struct luaL_Reg *methods, const char *id)\n{\n  luaT_registeratname(L, methods, id);\n}\n\n/**************************************************************/\n"
  },
  {
    "path": "lib/luaT/luaT.h",
    "content": "#ifndef LUAT_UTILS_INC\n#define LUAT_UTILS_INC\n\n#ifdef __cplusplus\nextern \"C\" {\n#endif\n#include <lua.h>\n#include <lauxlib.h>\n#ifdef __cplusplus\n}\n#endif\n\n#ifndef LUA_EXTERNC\n# ifdef __cplusplus\n#  define LUA_EXTERNC extern \"C\"\n# else\n#  define LUA_EXTERNC extern\n# endif\n#endif\n\n#if (defined(_MSC_VER) || defined(__MINGW32__))\n# define DLL_EXPORT __declspec(dllexport)\n# define DLL_IMPORT __declspec(dllimport)\n# ifdef luaT_EXPORTS\n#  define LUAT_API LUA_EXTERNC DLL_EXPORT\n# else\n#  define LUAT_API LUA_EXTERNC DLL_IMPORT\n# endif\n#else\n# define DLL_EXPORT\n# define DLL_IMPORT\n# define LUAT_API LUA_EXTERNC\n#endif\n\n#if LUA_VERSION_NUM == 501\n# define lua_pushglobaltable(L) lua_pushvalue(L, LUA_GLOBALSINDEX)\n# define lua_setuservalue lua_setfenv\n# define lua_getuservalue lua_getfenv\n#else\n# define lua_objlen lua_rawlen\nstatic int luaL_typerror(lua_State *L, int narg, const char *tname)\n{\n  return luaL_error(L, \"%s expected, got %s\", tname, luaL_typename(L, narg));\n}\n#endif\n\n\n/* C functions */\n\nLUAT_API void* luaT_alloc(lua_State *L, ptrdiff_t size);\nLUAT_API void* luaT_realloc(lua_State *L, void *ptr, ptrdiff_t size);\nLUAT_API void luaT_free(lua_State *L, void *ptr);\n\nLUAT_API void luaT_setfuncs(lua_State *L, const luaL_Reg *l, int nup);\n\nLUAT_API const char* luaT_newlocalmetatable(lua_State *L, const char *tname, const char *parent_tname,\n                                            lua_CFunction constructor, lua_CFunction destructor, lua_CFunction factory, int moduleidx);\n\nLUAT_API const char* luaT_newmetatable(lua_State *L, const char *tname, const char *parenttname,\n                                       lua_CFunction constructor, lua_CFunction destructor, lua_CFunction factory);\n\nLUAT_API int luaT_pushmetatable(lua_State *L, const char *tname);\n\nLUAT_API const char* luaT_typenameid(lua_State *L, const char *tname);\nLUAT_API const char* luaT_typename(lua_State *L, int ud);\n\nLUAT_API void luaT_pushudata(lua_State *L, void *udata, const char *tname);\nLUAT_API void *luaT_toudata(lua_State *L, int ud, const char *tname);\nLUAT_API int luaT_isudata(lua_State *L, int ud, const char *tname);\nLUAT_API void *luaT_checkudata(lua_State *L, int ud, const char *tname);\n\nLUAT_API void luaT_pushlong(lua_State *L, long n);\nLUAT_API long luaT_checklong(lua_State *L, int idx);\nLUAT_API long luaT_tolong(lua_State *L, int idx);\n\nLUAT_API void luaT_pushinteger(lua_State *L, ptrdiff_t n);\nLUAT_API ptrdiff_t luaT_checkinteger(lua_State *L, int idx);\n\nLUAT_API void *luaT_getfieldcheckudata(lua_State *L, int ud, const char *field, const char *tname);\nLUAT_API void *luaT_getfieldchecklightudata(lua_State *L, int ud, const char *field);\nLUAT_API double luaT_getfieldchecknumber(lua_State *L, int ud, const char *field);\nLUAT_API int luaT_getfieldcheckint(lua_State *L, int ud, const char *field);\nLUAT_API const char* luaT_getfieldcheckstring(lua_State *L, int ud, const char *field);\nLUAT_API int luaT_getfieldcheckboolean(lua_State *L, int ud, const char *field);\nLUAT_API void luaT_getfieldchecktable(lua_State *L, int ud, const char *field);\n\nLUAT_API int luaT_typerror(lua_State *L, int ud, const char *tname);\n\nLUAT_API int luaT_checkboolean(lua_State *L, int ud);\nLUAT_API int luaT_optboolean(lua_State *L, int ud, int def);\n\nLUAT_API void luaT_registeratname(lua_State *L, const struct luaL_Reg *methods, const char *name);\n\n/* utility functions */\nLUAT_API const char *luaT_classrootname(const char *tname);\nLUAT_API int luaT_classmodulename(const char *tname, char *module_name);\n\n/* debug */\nLUAT_API void luaT_stackdump(lua_State *L);\n\n/* Lua functions */\nLUAT_API int luaT_lua_newmetatable(lua_State *L);\nLUAT_API int luaT_lua_factory(lua_State *L);\nLUAT_API int luaT_lua_getconstructortable(lua_State *L);\nLUAT_API int luaT_lua_typename(lua_State *L);\nLUAT_API int luaT_lua_isequal(lua_State *L);\nLUAT_API int luaT_lua_pointer(lua_State *L);\nLUAT_API int luaT_lua_setenv(lua_State *L);\nLUAT_API int luaT_lua_getenv(lua_State *L);\nLUAT_API int luaT_lua_getmetatable(lua_State *L);\nLUAT_API int luaT_lua_version(lua_State *L);\nLUAT_API int luaT_lua_setmetatable(lua_State *L);\nLUAT_API int luaT_lua_metatype(lua_State *L);\nLUAT_API int luaT_lua_pushudata(lua_State *L);\n\n/* deprecated functions */\n/* ids have been replaced by string names to identify classes */\n/* comments show what function (that you should use) they call now */\n#if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))\n#define LUAT_DEPRECATED  __attribute__((__deprecated__))\n#elif (defined(_MSC_VER) || defined(__MINGW32__))\n#define LUAT_DEPRECATED __declspec(deprecated)\n#else\n#define LUAT_DEPRECATED\n#endif\n\nLUAT_API LUAT_DEPRECATED int luaT_pushmetaclass(lua_State *L, const char *tname); /* same as luaT_pushmetatable */\nLUAT_API LUAT_DEPRECATED const char* luaT_id(lua_State *L, int ud); /* same as luaT_typename */\nLUAT_API LUAT_DEPRECATED const char* luaT_id2typename(lua_State *L, const char *id); /*  same as luaT_typenameid */\nLUAT_API LUAT_DEPRECATED const char* luaT_typename2id(lua_State *L, const char*); /* same as luaT_typenameid */\nLUAT_API LUAT_DEPRECATED int luaT_getmetaclass(lua_State *L, int index); /* same as luaT_getmetatable */\nLUAT_API LUAT_DEPRECATED const char* luaT_checktypename2id(lua_State *L, const char *tname);  /* same as luaT_typenameid */\nLUAT_API LUAT_DEPRECATED void luaT_registeratid(lua_State *L, const struct luaL_Reg *methods, const char *id); /* same as luaT_registeratname */\n\n#endif\n"
  },
  {
    "path": "lib/luaT/luaTConfig.cmake.in",
    "content": "# Find the luaT includes and library\n#\n# LUAT_INCLUDE_DIR -- where to find the includes\n# LUAT_LIBRARIES -- list of libraries to link against\n# LUAT_FOUND -- set to 1 if found\n\nSET(LUAT_FOUND 1)\nSET(LUAT_INCLUDE_DIR \"@LUAT_INCLUDE_DIR@\")\nSET(LUAT_LIBRARIES \"@LUAT_LIBRARIES@\")\n"
  },
  {
    "path": "mkdocs.yml",
    "content": "site_name: torch7\ntheme : simplex\nrepo_url : https://github.com/torch/torch7\nuse_directory_urls : false\nmarkdown_extensions: [extra]\ndocs_dir : doc\npages:\n- [index.md, Home]\n- [tensor.md, Tensor Library, Tensor]\n- [maths.md, Tensor Library, Tensor Math]\n- [storage.md, Tensor Library, Storage]\n- [file.md, File I/O Library, File Interface]\n- [diskfile.md, File I/O Library, Disk File]\n- [memoryfile.md, File I/O Library, Memory File]\n- [pipefile.md, File I/O Library, Pipe File]\n- [serialization.md, File I/O Library, Serialization]\n- [utility.md, Useful Utilities, Class]\n- [timer.md, Useful Utilities, Timer]\n- [tester.md, Useful Utilities, Tester]\n- [cmdline.md, Useful Utilities, CmdLine]\n- [random.md, Useful Utilities, Random]\n"
  },
  {
    "path": "paths.lua.in",
    "content": "local paths = {}\n\npaths.install_prefix = [[@Torch_INSTALL_PREFIX@]]\npaths.install_bin = [[@Torch_INSTALL_BIN@]]\npaths.install_man = [[@Torch_INSTALL_MAN@]]\npaths.install_lib = [[@Torch_INSTALL_LIB@]]\npaths.install_share = [[@Torch_INSTALL_SHARE@]]\npaths.install_include = [[@Torch_INSTALL_INCLUDE@]]\npaths.install_cmake = [[@Torch_INSTALL_CMAKE@]]\n\nreturn paths\n"
  },
  {
    "path": "random.lua",
    "content": "local wrap = require 'cwrap'\n\nrequire 'torchcwrap'\n\nlocal interface = wrap.CInterface.new()\n\ninterface:print(\n   [[\n#include \"luaT.h\"\n#include \"TH.h\"\n\nextern void torch_Generator_init(lua_State *L);\nextern void torch_Generator_new(lua_State *L);\n   ]])\n\nfor _,name in ipairs({\"seed\", \"initialSeed\"}) do\n   interface:wrap(name,\n                  string.format(\"THRandom_%s\",name),\n                  {{name='Generator', default=true},\n                   {name=\"long\", creturned=true}})\nend\n\ninterface:wrap('manualSeed',\n               'THRandom_manualSeed',\n               {{name='Generator', default=true},\n                {name=\"long\"}})\n\ninterface:wrap('getRNGState',\n                'THByteTensor_getRNGState',\n                {{name='Generator', default=true},\n                 {name='ByteTensor',default=true,returned=true,method={default='nil'}}\n                 })\n\ninterface:wrap('setRNGState',\n                'THByteTensor_setRNGState',\n                {{name='Generator', default=true},\n                 {name='ByteTensor',default=true,returned=true,method={default='nil'}}\n                 })\n\ninterface:register(\"random__\")\n                \ninterface:print(\n   [[\nvoid torch_random_init(lua_State *L)\n{\n  torch_Generator_init(L);\n  torch_Generator_new(L);\n  lua_setfield(L, -2, \"_gen\");\n  luaT_setfuncs(L, random__, 0);\n}\n]])\n\ninterface:tofile(arg[1])\n"
  },
  {
    "path": "rocks/torch-scm-1.rockspec",
    "content": "package = \"torch\"\nversion = \"scm-1\"\n\nsource = {\n   url = \"git://github.com/torch/torch7.git\",\n}\n\ndescription = {\n   summary = \"Torch7\",\n   detailed = [[\n   ]],\n   homepage = \"https://github.com/torch/torch7\",\n   license = \"BSD\"\n}\n\ndependencies = {\n   \"lua >= 5.1\",\n   \"paths >= 1.0\",\n   \"cwrap >= 1.0\"\n}\n\nbuild = {\n   type = \"command\",\n   build_command = [[\ncmake -E make_directory build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release -DLUA=$(LUA) -DLUALIB=$(LUALIB) -DLUA_BINDIR=\"$(LUA_BINDIR)\" -DLUA_INCDIR=\"$(LUA_INCDIR)\" -DLUA_LIBDIR=\"$(LUA_LIBDIR)\" -DLUADIR=\"$(LUADIR)\" -DLIBDIR=\"$(LIBDIR)\" -DCMAKE_INSTALL_PREFIX=\"$(PREFIX)\" && $(MAKE) -j$(getconf _NPROCESSORS_ONLN)\n]],\n\t platforms = {\n      windows = {\n           build_command = [[\ncmake -E make_directory build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release -DLUA=$(LUA) -DLUALIB=$(LUALIB) -DLUA_BINDIR=\"$(LUA_BINDIR)\" -DLUA_INCDIR=\"$(LUA_INCDIR)\" -DLUA_LIBDIR=\"$(LUA_LIBDIR)\" -DLUADIR=\"$(LUADIR)\" -DLIBDIR=\"$(LIBDIR)\" -DCMAKE_INSTALL_PREFIX=\"$(PREFIX)\" && $(MAKE)\n]]\n      }\n   },\n   install_command = \"cd build && $(MAKE) install\"\n}\n\n"
  },
  {
    "path": "test/longSize.lua",
    "content": "require 'torch'\n\nlocal tester = torch.Tester()\nlocal tests = torch.TestSuite()\n\nlocal tensor = torch.rand(2,3)\n\nfunction tests.diskFileLongSize8()\n  f = torch.DiskFile('tensor8.bin','w')\n  f:binary()\n  f:longSize(8)\n  f:writeObject(tensor)\n  f:close()\n  f = torch.DiskFile('tensor8.bin','r')\n  f:binary()\n  f:longSize(8)\n  tensor2 = f:readObject()\n  f:close()\n  tester:assert(tensor:norm()==tensor2:norm())\n  os.remove('tensor8.bin')\nend\n\nfunction tests.diskFileLongSize4()\n  f = torch.DiskFile('tensor4.bin','w')\n  f:binary()\n  f:longSize(4)\n  f:writeObject(tensor)\n  f:close()\n  f = torch.DiskFile('tensor4.bin','r')\n  f:binary()\n  f:longSize(4)\n  tensor2 = f:readObject()\n  f:close()\n  tester:assert(tensor:norm()==tensor2:norm())\n  os.remove('tensor4.bin')\nend\n\nfunction tests.memoryFileLongSize8()\n  f = torch.MemoryFile()\n  f:binary()\n  f:longSize(8)\n  f:writeObject(tensor)\n  f:seek(1)\n  tensor2 = f:readObject()\n  f:close()\n  tester:assert(tensor:norm()==tensor2:norm())\nend\n\nfunction tests.memoryFileLongSize4()\n  f = torch.MemoryFile()\n  f:binary()\n  f:longSize(4)\n  f:writeObject(tensor)\n  f:seek(1)\n  tensor2 = f:readObject()\n  f:close()\n  tester:assert(tensor:norm()==tensor2:norm())\nend\n\ntester:add(tests)\ntester:run()\n"
  },
  {
    "path": "test/test.lua",
    "content": "--require 'torch'\n\nlocal mytester\nlocal torchtest = torch.TestSuite()\nlocal msize = 100\nlocal precision\n\n-- Lua 5.2 compatibility\nlocal loadstring = loadstring or load\nlocal unpack = unpack or table.unpack\n\nlocal function maxdiff(x,y)\n   local d = x-y\n   if x:type() == 'torch.DoubleTensor' or x:type() == 'torch.FloatTensor' then\n      return d:abs():max()\n   else\n      local dd = torch.Tensor():resize(d:size()):copy(d)\n      return dd:abs():max()\n   end\nend\n\n-- workarounds for non-existant functions\nfunction torch.HalfTensor:__sub(other)\n   return (self:real() - other:real()):half()\nend\n\nfunction torch.HalfTensor:mean(dim)\n   return self:real():mean(dim):half()\nend\n\nfunction torch.HalfTensor:abs()\n   return self:real():abs():half()\nend\n\nfunction torch.HalfTensor:max()\n   return self:real():max()\nend\n\nfunction torch.HalfTensor:add(a, b)\n   return (self:real():add(a, b:real())):half()\nend\n\nfunction torch.HalfTensor:reshape(a, b)\n   return (self:real():reshape(a, b)):half()\nend\n\nfunction torch.HalfTensor:fill(a)\n   return self:real():fill(a):half()\nend\n\nfunction torchtest.dot()\n   local types = {\n      ['torch.DoubleTensor'] = 1e-8, -- for ddot\n      ['torch.FloatTensor']  = 1e-4, -- for sdot\n   }\n   for tname, prec in pairs(types) do\n      local v1 = torch.randn(100):type(tname)\n      local v2 = torch.randn(100):type(tname)\n\n      local res1 = torch.dot(v1,v2)\n\n      local res2 = 0\n      for i = 1,v1:size(1) do\n         res2 = res2 + v1[i] * v2[i]\n      end\n\n      local err = math.abs(res1-res2)\n\n      mytester:assertlt(err, prec, 'error in torch.dot (' .. tname .. ')')\n   end\nend\n\nlocal genericSingleOpTest = [[\n   -- [res] torch.functionname([res,] x)\n   -- contiguous\n   local m1 = torch.randn(100,100)\n   local res1 = torch.functionname(m1[{ 4,{} }])\n   local res2 = res1:clone():zero()\n   for i = 1,res1:size(1) do\n      res2[i] = math.functionname(m1[4][i])\n   end\n   local err = res1:clone():zero()\n   -- find absolute error\n   for i = 1, res1:size(1) do\n      err[i] = math.abs(res1[i] - res2[i])\n   end\n   -- find maximum element of error\n   local maxerrc = 0\n   for i = 1, err:size(1) do\n      if err[i] > maxerrc then\n         maxerrc = err[i]\n      end\n   end\n\n   -- non-contiguous\n   local m1 = torch.randn(100,100)\n   local res1 = torch.functionname(m1[{ {}, 4 }])\n   local res2 = res1:clone():zero()\n   for i = 1,res1:size(1) do\n      res2[i] = math.functionname(m1[i][4])\n   end\n   local err = res1:clone():zero()\n   -- find absolute error\n   for i = 1, res1:size(1) do\n      err[i] = math.abs(res1[i] - res2[i])\n   end\n   -- find maximum element of error\n   local maxerrnc = 0\n   for i = 1, err:size(1) do\n      if err[i] > maxerrnc then\n         maxerrnc = err[i]\n      end\n   end\n   return maxerrc, maxerrnc\n--]]\n\nfunction torchtest.sin()\n   local f = loadstring(string.gsub(genericSingleOpTest, 'functionname', 'sin'))\n   local maxerrc, maxerrnc = f()\n   mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous')\n   mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous')\nend\n\nfunction torchtest.sinh()\n   local f = loadstring(string.gsub(genericSingleOpTest, 'functionname', 'sinh'))\n   local maxerrc, maxerrnc = f()\n   mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous')\n   mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous')\nend\n\nfunction torchtest.asin()\n   local f = loadstring(string.gsub(genericSingleOpTest, 'functionname', 'asin'))\n   local maxerrc, maxerrnc = f()\n   mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous')\n   mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous')\nend\n\nfunction torchtest.cos()\n   local f = loadstring(string.gsub(genericSingleOpTest, 'functionname', 'cos'))\n   local maxerrc, maxerrnc = f()\n   mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous')\n   mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous')\nend\n\nfunction torchtest.cosh()\n   local f = loadstring(string.gsub(genericSingleOpTest, 'functionname', 'cosh'))\n   local maxerrc, maxerrnc = f()\n   mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous')\n   mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous')\nend\n\nfunction torchtest.acos()\n   local f = loadstring(string.gsub(genericSingleOpTest, 'functionname', 'acos'))\n   local maxerrc, maxerrnc = f()\n   mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous')\n   mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous')\nend\n\nfunction torchtest.tan()\n   local f = loadstring(string.gsub(genericSingleOpTest, 'functionname', 'tan'))\n   local maxerrc, maxerrnc = f()\n   mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous')\n   mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous')\nend\n\nfunction torchtest.tanh()\n   local f = loadstring(string.gsub(genericSingleOpTest, 'functionname', 'tanh'))\n   local maxerrc, maxerrnc = f()\n   mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous')\n   mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous')\nend\n\nfunction torchtest.atan()\n   local f = loadstring(string.gsub(genericSingleOpTest, 'functionname', 'atan'))\n   local maxerrc, maxerrnc = f()\n   mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous')\n   mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous')\nend\n\nfunction torchtest.log()\n   local f = loadstring(string.gsub(genericSingleOpTest, 'functionname', 'log'))\n   local maxerrc, maxerrnc = f()\n   mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous')\n   mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous')\nend\n\nfunction torchtest.sqrt()\n   local f = loadstring(string.gsub(genericSingleOpTest, 'functionname', 'sqrt'))\n   local maxerrc, maxerrnc = f()\n   mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous')\n   mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous')\nend\n\nfunction torchtest.rsqrt()\n   local function TH_rsqrt(x)\n      return 1 / math.sqrt(x)\n   end\n\n   local f\n   local t = genericSingleOpTest:gsub('functionname', 'rsqrt'):gsub('math.rsqrt', 'TH_rsqrt')\n   local env = { TH_rsqrt=TH_rsqrt, torch=torch, math=math }\n   if not setfenv then -- Lua 5.2\n      f = load(t, 'test', 't', env)\n   else\n      f = loadstring(t)\n      setfenv(f, env)\n   end\n\n   local maxerrc, maxerrnc = f()\n   mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous')\n   mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous')\nend\n\nfunction torchtest.sigmoid()\n   -- can't use genericSingleOpTest, since `math.sigmoid` doesn't exist, have to use\n   -- `torch.sigmoid` instead\n   local inputValues = {-1000,-1,0,0.5,1,2,1000}\n   local expectedOutput = {0.0000, 0.2689, 0.5, 0.6225, 0.7311, 0.8808, 1.000}\n\n   local precision_4dps = 0.0002\n\n   -- float\n   local inputFT = torch.FloatTensor(inputValues)\n   local expectedFT = torch.FloatTensor(expectedOutput)\n   mytester:assertlt((torch.sigmoid(inputFT) - expectedFT):abs():max(), precision_4dps, 'error in torch.sigmoid - single')\n   mytester:assertlt((inputFT - torch.FloatTensor(inputValues)):abs():max(), precision_4dps, 'error in torch.sigmoid - single')\n   local sigmoidFT = torch.FloatTensor(inputValues):sigmoid()\n   mytester:assertlt((sigmoidFT - expectedFT):abs():max(), precision_4dps, 'error in torch.sigmoid - single')\n\n   -- double\n   local inputDT = torch.DoubleTensor(inputValues)\n   local expectedDT = torch.DoubleTensor(expectedOutput)\n   mytester:assertlt((torch.sigmoid(inputDT) - expectedDT):abs():max(), precision_4dps, 'error in torch.sigmoid - double')\n   mytester:assertlt((inputDT - torch.DoubleTensor(inputValues)):abs():max(), precision_4dps, 'error in torch.sigmoid - double')\n   local sigmoidDT = torch.DoubleTensor(inputValues):sigmoid()\n   mytester:assertlt((sigmoidDT - expectedDT):abs():max(), precision_4dps, 'error in torch.sigmoid - double')\nend\n\nfunction torchtest.exp()\n   local f = loadstring(string.gsub(genericSingleOpTest, 'functionname', 'exp'))\n   local maxerrc, maxerrnc = f()\n   mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous')\n   mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous')\nend\n\nfunction torchtest.floor()\n   local f = loadstring(string.gsub(genericSingleOpTest, 'functionname', 'floor'))\n   local maxerrc, maxerrnc = f()\n   mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous')\n   mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous')\nend\n\nfunction torchtest.ceil()\n   local f = loadstring(string.gsub(genericSingleOpTest, 'functionname', 'ceil'))\n   local maxerrc, maxerrnc = f()\n   mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous')\n   mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous')\nend\n\nfunction torchtest.frac()\n   local function TH_frac(x)\n      return math.fmod(x, 1)\n   end\n\n   local f\n   local t = genericSingleOpTest:gsub('functionname', 'frac'):gsub('math.frac', 'TH_frac')\n   local env = { TH_frac=TH_frac, torch=torch, math=math }\n   if not setfenv then -- Lua 5.2\n      f = load(t, 'test', 't', env)\n   else\n      f = loadstring(t)\n      setfenv(f, env)\n   end\n\n   local maxerrc, maxerrnc = f()\n   mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous')\n   mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous')\nend\n\nfunction torchtest.trunc()\n   local function TH_trunc(x)\n      return x - math.fmod(x, 1)\n   end\n\n   local f\n   local t = genericSingleOpTest:gsub('functionname', 'trunc'):gsub('math.trunc', 'TH_trunc')\n   local env = { TH_trunc=TH_trunc, torch=torch, math=math }\n   if not setfenv then -- Lua 5.2\n      f = load(t, 'test', 't', env)\n   else\n      f = loadstring(t)\n      setfenv(f, env)\n   end\n\n   local maxerrc, maxerrnc = f()\n   mytester:assertlt(maxerrc, precision, 'error in torch.functionname - contiguous')\n   mytester:assertlt(maxerrnc, precision, 'error in torch.functionname - non-contiguous')\nend\n\nfunction torchtest.round()\n   -- [res] torch.round([res,] x)\n   -- contiguous\n   local m1 = torch.randn(100,100)\n   local res1 = torch.round(m1[{ 4,{} }])\n   local res2 = res1:clone():zero()\n   for i = 1,res1:size(1) do\n      res2[i] = math.floor(m1[4][i]+0.5)\n   end\n   local err = res1:clone():zero()\n   -- find absolute error\n   for i = 1, res1:size(1) do\n      err[i] = math.abs(res1[i] - res2[i])\n   end\n   -- find maximum element of error\n   local maxerrc = 0\n   for i = 1, err:size(1) do\n      if err[i] > maxerrc then\n         maxerrc = err[i]\n      end\n   end\n   mytester:assertlt(maxerrc, precision, 'error in torch.round - contiguous')\n\n   -- non-contiguous\n   local m1 = torch.randn(100,100)\n   local res1 = torch.round(m1[{ {}, 4 }])\n   local res2 = res1:clone():zero()\n   for i = 1,res1:size(1) do\n      res2[i] = math.floor(m1[i][4]+0.5)\n   end\n   local err = res1:clone():zero()\n   -- find absolute error\n   for i = 1, res1:size(1) do\n      err[i] = math.abs(res1[i] - res2[i])\n   end\n   -- find maximum element of error\n   local maxerrnc = 0\n   for i = 1, err:size(1) do\n      if err[i] > maxerrnc then\n         maxerrnc = err[i]\n      end\n   end\n   mytester:assertlt(maxerrnc, precision, 'error in torch.round - non-contiguous')\nend\n\nfunction torchtest.max()  -- torch.max([resval, resind,] x [,dim])\n\n   -- TH_TENSOR_BASE\n   local m1 = torch.Tensor(8,2):fill(3):select(2, 1)\n   local resval, resind = torch.max(m1, 1)\n   mytester:assert(resind[1] == 1)\n\n   -- torch.max( x )\n   -- contiguous\n   local m1 = torch.randn(100,100)\n   local res1 = torch.max(m1)\n   local res2 = m1[1][1]\n   for i = 1,m1:size(1) do\n      for j = 1,m1:size(2) do\n         if m1[i][j] > res2 then\n            res2 = m1[i][j]\n         end\n      end\n   end\n   local err = res1 - res2\n   mytester:assertlt(err, precision, 'error in torch.max - contiguous')\n\n   -- non-contiguous\n   local m1 = torch.randn(10,10,10)\n   local m2 = m1[{{}, 4, {}}]\n   local res1 = torch.max(m2)\n   local res2 = m2[1][1]\n   for i = 1,m2:size(1) do\n      for j = 1,m2:size(2) do\n         if m2[i][j] > res2 then\n            res2 = m2[i][j]\n         end\n      end\n   end\n   local err = res1 - res2\n   mytester:assertlt(err, precision, 'error in torch.max - non-contiguous')\n\n   -- torch.max([resval, resind,] x ,dim])\n   function lua_max(t, dim)\n      assert(t:nDimension() == 2)\n      max_val = t:narrow(dim, 1, 1):clone()\n      max_ind = t:narrow(dim, 1, 1):clone():long():fill(1)\n      other = 3 - dim\n      for i = 1, t:size(other) do\n         for j = 1, t:size(dim) do\n            val = t:select(other, i):select(dim, j)\n            max = max_val:select(other, i):select(dim, 1)\n            if val > max then\n               max_val:select(other, i):fill(val)\n               max_ind:select(other, i):fill(j)\n            end\n         end\n      end\n      return max_val, max_ind\n   end\n\n   local m1 = torch.randn(100,100)\n   for dim = 1,2 do\n      local res1val, res1ind = torch.max(m1, dim)\n      local res2val, res2ind = lua_max(m1, dim)\n      mytester:asserteq((res1val-res2val):abs():max(), 0, 'error in torch.max')\n      mytester:asserteq((res1ind-res2ind):abs():max(), 0, 'error in torch.max')\n   end\n\n   -- NaNs\n   for index in pairs{1, 5, 100} do\n      local m1 = torch.randn(100)\n      m1[index] = 0/0\n      local res1val, res1ind = torch.max(m1, 1)\n      mytester:assert(res1val[1] ~= res1val[1], 'error in torch.max (value) - NaNs')\n      mytester:assert(res1ind[1] == index, 'error in torch.max (index) - NaNs')\n      local res1val = torch.max(m1)\n      mytester:assert(res1val ~= res1val, 'error in torch.max - NaNs')\n   end\n\n   -- dim == nDim -1\n   local a = torch.Tensor({{1,2},{3,4}}):select(2, 1)\n   local aval, aind = torch.max(a, 1)\n   mytester:assert(aval[1] == 3)\n   mytester:assert(aind[1] == 2)\n\n   local b = torch.Tensor({{{1,2},{3,4}},{{5,6},{7,8}}}):select(3, 1)\n   local bval, bind = torch.max(b, 2)\n   mytester:assert(bval[1][1] == 3)\n   mytester:assert(bind[1][1] == 2)\n   mytester:assert(bval[2][1] == 7)\n   mytester:assert(bind[2][1] == 2)\nend\n\nfunction torchtest.min()  -- torch.min([resval, resind,] x [,dim])\n   -- torch.min( x )\n   -- contiguous\n   local m1 = torch.randn(100,100)\n   local res1 = torch.min(m1)\n   local res2 = m1[1][1]\n   for i = 1,m1:size(1) do\n      for j = 1,m1:size(2) do\n         if m1[i][j] < res2 then\n            res2 = m1[i][j]\n         end\n      end\n   end\n   local err = res1 - res2\n   mytester:assertlt(err, precision, 'error in torch.min - contiguous')\n   -- non-contiguous\n   local m1 = torch.randn(10,10,10)\n   local m2 = m1[{{}, 4, {}}]\n   local res1 = torch.min(m2)\n   local res2 = m2[1][1]\n   for i = 1,m2:size(1) do\n      for j = 1,m2:size(2) do\n         if m2[i][j] < res2 then\n            res2 = m2[i][j]\n         end\n      end\n   end\n   local err = res1 - res2\n   mytester:assertlt(err, precision, 'error in torch.min - non-contiguous')\n\n   -- torch.max([resval, resind,] x ,dim])\n   function lua_min(t, dim)\n      assert(t:nDimension() == 2)\n      max_val = t:narrow(dim, 1, 1):clone()\n      max_ind = t:narrow(dim, 1, 1):clone():long():fill(1)\n      other = 3 - dim\n      for i = 1, t:size(other) do\n         for j = 1, t:size(dim) do\n            val = t:select(other, i):select(dim, j)\n            max = max_val:select(other, i):select(dim, 1)\n            if val < max then\n               max_val:select(other, i):fill(val)\n               max_ind:select(other, i):fill(j)\n            end\n         end\n      end\n      return max_val, max_ind\n   end\n\n   local m1 = torch.randn(100,100)\n   for dim = 1,2 do\n      local res1val, res1ind = torch.min(m1, dim)\n      local res2val, res2ind = lua_min(m1, dim)\n      mytester:asserteq((res1val-res2val):abs():max(), 0, 'error in torch.max')\n      mytester:asserteq((res1ind-res2ind):abs():max(), 0, 'error in torch.max')\n   end\n\n   -- NaNs\n   for index in pairs{1, 5, 100} do\n      local m1 = torch.randn(100)\n      m1[index] = 0/0\n      local res1val, res1ind = torch.min(m1, 1)\n      mytester:assert(res1val[1] ~= res1val[1], 'error in torch.min (value) - NaNs')\n      mytester:assert(res1ind[1] == index, 'error in torch.min (index) - NaNs')\n      local res1val = torch.min(m1)\n      mytester:assert(res1val ~= res1val, 'error in torch.min - NaNs')\n   end\n\n   -- TH_TENSOR_BASE\n   local m1 = torch.Tensor(4):fill(3)\n   local resval, resind = torch.min(m1, 1)\n   mytester:assert(resind[1] == 1)\nend\n\nfunction torchtest.cmax()\n  -- Two tensors.\n  local a = torch.rand(msize, msize)\n  local b = torch.rand(msize, msize)\n  local c = torch.cmax(a, b)\n  local expected_c = torch.zeros(msize, msize)\n  expected_c:map2(a, b, function(_, a, b) return math.max(a, b) end)\n  mytester:assertTensorEq(expected_c, c, 0,\n                          'error in torch.cmax(tensor, tensor)')\n\n  -- Tensor and scalar.\n  local v = torch.uniform()\n  c = torch.cmax(a, v)\n  expected_c:map(a, function(_, a) return math.max(a, v) end)\n  mytester:assertTensorEq(expected_c, c, 0,\n                          'error in torch.cmax(tensor, scalar).')\nend\n\nfunction torchtest.cmin()\n  -- Two tensors.\n  local a = torch.rand(msize, msize)\n  local b = torch.rand(msize, msize)\n  local c = torch.cmin(a, b)\n  local expected_c = torch.zeros(msize, msize)\n  expected_c:map2(a, b, function(_, a, b) return math.min(a, b) end)\n  mytester:assertTensorEq(expected_c, c, 0,\n                          'error in torch.cmin(tensor, tensor)')\n\n  -- Tensor and scalar.\n  local v = torch.uniform()\n  c = torch.cmin(a, v)\n  expected_c:map(a, function(_, a) return math.min(a, v) end)\n  mytester:assertTensorEq(expected_c, c, 0,\n                          'error in torch.cmin(tensor, scalar).')\nend\n\nfunction torchtest.lerp()\n   local function TH_lerp(a, b, weight)\n      return a + weight * (b-a);\n   end\n\n   local a = torch.rand(msize, msize)\n   local b = torch.rand(msize, msize)\n   local w = math.random()\n   local result = torch.lerp(a, b, w)\n   local expected = a:new()\n   expected:map2(a, b, function(_, a, b) return TH_lerp(a, b, w) end)\n   mytester:assertTensorEq(expected, result, precision, 'error in torch.lerp(tensor, tensor, weight)')\n\n   local a = (math.random()*2-1) * 100000\n   local b = (math.random()*2-1) * 100000\n   local w = math.random()\n   local result = torch.lerp(a, b, w)\n   local expected = TH_lerp(a, b, w)\n   mytester:assertalmosteq(expected, result, precision, 'error in torch.lerp(scalar, scalar, weight)')\nend\n\nfor i, v in ipairs{{10}, {5, 5}} do\n   torchtest['allAndAny' .. i] =\n      function ()\n           local x = torch.ones(unpack(v)):byte()\n           mytester:assert(x:all(), 'error in all()')\n           mytester:assert(x:any(), 'error in any()')\n\n           x[3] = 0\n           mytester:assert(not x:all(), 'error in all()')\n           mytester:assert(x:any(), 'error in any()')\n\n           x:zero()\n           mytester:assert(not x:all(), 'error in all()')\n           mytester:assert(not x:any(), 'error in any()')\n\n           x:fill(2)\n           mytester:assert(x:all(), 'error in all()')\n           mytester:assert(x:any(), 'error in any()')\n       end\nend\n\nfunction torchtest.mv()\n   local m1 = torch.randn(100,100)\n   local v1 = torch.randn(100)\n\n   local res1 = torch.mv(m1,v1)\n\n   local res2 = res1:clone():zero()\n   for i = 1,m1:size(1) do\n      for j = 1,m1:size(2) do\n         res2[i] = res2[i] + m1[i][j] * v1[j]\n      end\n   end\n\n   local err = (res1-res2):abs():max()\n\n   mytester:assertlt(err, precision, 'error in torch.mv')\nend\n\nfunction torchtest.fill()\n   local types = {\n      'torch.ByteTensor',\n      'torch.CharTensor',\n      'torch.ShortTensor',\n      'torch.IntTensor',\n      'torch.FloatTensor',\n      'torch.DoubleTensor',\n      'torch.LongTensor',\n   }\n\n   for k,t in ipairs(types) do\n      -- [res] torch.fill([res,] tensor, value)\n      local m1 = torch.ones(100,100):type(t)\n      local res1 = m1:clone()\n      res1[{ 3,{} }]:fill(2)\n\n      local res2 = m1:clone()\n      for i = 1,m1:size(1) do\n\t res2[{ 3,i }] = 2\n      end\n\n      local err = (res1-res2):double():abs():max()\n\n      mytester:assertlt(err, precision, 'error in torch.fill - contiguous')\n\n      local m1 = torch.ones(100,100):type(t)\n      local res1 = m1:clone()\n      res1[{ {},3 }]:fill(2)\n\n      local res2 = m1:clone()\n      for i = 1,m1:size(1) do\n\t res2[{ i,3 }] = 2\n      end\n\n      local err = (res1-res2):double():abs():max()\n\n      mytester:assertlt(err, precision, 'error in torch.fill - non contiguous')\n   end\nend\n\nfunction torchtest.add()\n   local types = {\n      'torch.ByteTensor',\n      'torch.CharTensor',\n      'torch.ShortTensor',\n      'torch.IntTensor',\n      'torch.FloatTensor',\n      'torch.DoubleTensor',\n      'torch.LongTensor',\n   }\n\n   for k,t in ipairs(types) do\n       -- [res] torch.add([res,] tensor1, tensor2)\n       local m1 = torch.randn(100,100):type(t)\n       local v1 = torch.randn(100):type(t)\n\n       local res1 = torch.add(m1[{ 4,{} }],v1)\n\n       local res2 = res1:clone():zero()\n       for i = 1,m1:size(2) do\n           res2[i] = m1[4][i] + v1[i]\n       end\n\n       local err = (res1-res2):double():abs():max()\n\n       mytester:assertlt(err, precision, 'error in torch.add - contiguous' .. ' ' .. t)\n\n       local m1 = torch.randn(100,100):type(t)\n       local v1 = torch.randn(100):type(t)\n\n       local res1 = torch.add(m1[{ {},4 }],v1)\n\n       local res2 = res1:clone():zero()\n       for i = 1,m1:size(1) do\n           res2[i] = m1[i][4] + v1[i]\n       end\n\n       local err = (res1-res2):double():abs():max()\n\n       mytester:assertlt(err, precision, 'error in torch.add - non contiguous' .. ' ' .. t)\n\n       -- [res] torch.add([res,] tensor, value)\n       local m1 = torch.randn(10,10):type(t)\n       local res1 = m1:clone()\n       res1[{ 3,{} }]:add(2)\n\n       local res2 = m1:clone()\n       for i = 1,m1:size(1) do\n           res2[{ 3,i }] = res2[{ 3,i }] + 2\n       end\n\n       local err = (res1-res2):double():abs():max()\n\n       mytester:assertlt(err, precision, 'error in torch.add - scalar, contiguous' .. ' ' .. t)\n\n       local m1 = torch.randn(10,10)\n       local res1 = m1:clone()\n       res1[{ {},3 }]:add(2)\n\n       local res2 = m1:clone()\n       for i = 1,m1:size(1) do\n           res2[{ i,3 }] = res2[{ i,3 }] + 2\n       end\n\n       local err = (res1-res2):abs():max()\n\n       mytester:assertlt(err, precision, 'error in torch.add - scalar, non contiguous' .. ' ' .. t)\n\n       -- [res] torch.add([res,] tensor1, value, tensor2)\n   end\nend\n\nfunction torchtest.csub()\n   local rngState = torch.getRNGState()\n   torch.manualSeed(123)\n\n   local a = torch.randn(100,90)\n   local b = a:clone():normal()\n\n   local res_add = torch.add(a, -1, b)\n   local res_csub = a:clone()\n   res_csub:csub(b)\n\n   mytester:assertlt((res_add - res_csub):abs():max(), 0.00001)\n\n   local _ = torch.setRNGState(rngState)\nend\n\nfunction torchtest.csub_scalar()\n   local rngState = torch.getRNGState()\n   torch.manualSeed(123)\n\n   local a = torch.randn(100,100)\n\n   local scalar = 123.5\n   local res_add = torch.add(a, -scalar)\n   local res_csub = a:clone()\n   res_csub:csub(scalar)\n\n   mytester:assertlt((res_add - res_csub):abs():max(), 0.00001)\n\n   local _ = torch.setRNGState(rngState)\nend\n\nfunction torchtest.neg()\n   local rngState = torch.getRNGState()\n   torch.manualSeed(123)\n\n   local a = torch.randn(100,90)\n   local zeros = torch.Tensor():resizeAs(a):zero()\n\n   local res_add = torch.add(zeros, -1, a)\n   local res_neg = a:clone()\n   res_neg:neg()\n\n   mytester:assertlt((res_add - res_neg):abs():max(), 0.00001)\n\n   local _ = torch.setRNGState(rngState)\nend\n\nfunction torchtest.cinv()\n   local rngState = torch.getRNGState()\n   torch.manualSeed(123)\n\n   local a = torch.randn(100,89)\n   local zeros = torch.Tensor():resizeAs(a):zero()\n\n   local res_pow = torch.pow(a, -1)\n   local res_inv = a:clone()\n   res_inv:cinv()\n\n   mytester:assertlt((res_pow - res_inv):abs():max(), 0.00001)\n\n   local _ = torch.setRNGState(rngState)\nend\n\nfunction torchtest.mul()\n   local types = {\n      'torch.ByteTensor',\n      'torch.CharTensor',\n      'torch.ShortTensor',\n      'torch.IntTensor',\n      'torch.FloatTensor',\n      'torch.DoubleTensor',\n      'torch.LongTensor',\n   }\n\n   for k,t in ipairs(types) do\n       local m1 = torch.randn(10,10):type(t)\n       local res1 = m1:clone()\n\n       res1[{ {},3 }]:mul(2)\n\n       local res2 = m1:clone()\n       for i = 1,m1:size(1) do\n           res2[{ i,3 }] = res2[{ i,3 }] * 2\n       end\n\n       local err = (res1-res2):double():abs():max()\n\n       mytester:assertlt(err, precision, 'error in torch.mul - scalar, non contiguous' .. ' ' .. t)\n   end\nend\n\nfunction torchtest.div()\n    local types = {\n        'torch.ByteTensor',\n        'torch.CharTensor',\n        'torch.ShortTensor',\n        'torch.IntTensor',\n        'torch.FloatTensor',\n        'torch.DoubleTensor',\n        'torch.LongTensor',\n    }\n\n    for k,t in ipairs(types) do\n\n        local m1 = torch.Tensor(10,10):uniform(0,10):type(t)\n        local res1 = m1:clone()\n\n        res1[{ {},3 }]:div(2)\n\n        local res2 = m1:clone()\n        for i = 1,m1:size(1) do\n            local ok = pcall(function() res2[{ i,3 }] = res2[{ i,3 }] / 2 end)\n            if not ok then\n               res2[{ i,3 }] = torch.floor(res2[{ i,3 }] / 2)\n            end\n        end\n\n        local err = (res1-res2):double():abs():max()\n\n        mytester:assertlt(err, precision, 'error in torch.div - scalar, non contiguous' .. ' ' .. t)\n    end\nend\n\nfunction torchtest.lshift()\n   local m1 = torch.LongTensor(10,10):random(0,100)\n   local res1 = m1:clone()\n\n   local q = 2\n   local f = math.pow(2, q)\n   res1[{ {},3 }]:lshift(q)\n\n   local res2 = m1:clone()\n   for i = 1,m1:size(1) do\n      res2[{ i,3 }] = res2[{ i,3 }] * f\n   end\n\n   local err = (res1-res2):abs():max()\n\n   mytester:assertlt(err, precision, 'error in torch.lshift - scalar, non contiguous')\n\n   local m1 = torch.LongTensor(10,10):random(0,100)\n   local res1 = m1:clone()\n\n   local q = 2\n   res1:lshift(q)\n\n   local res2 = m1:clone()\n   for i = 1,m1:size(1) do\n      for j = 1,m1:size(1) do\n         res2[{ i,j }] = res2[{ i,j }] * f\n      end\n   end\n\n   local err = (res1-res2):abs():max()\n\n   mytester:assertlt(err, precision, 'error in torch.lshift - scalar, contiguous')\nend\n\nfunction torchtest.rshift()\n   local m1 = torch.LongTensor(10,10):random(0,100)\n   local res1 = m1:clone()\n\n   local q = 2\n   local f = math.pow(2, q)\n   res1[{ {},3 }]:rshift(q)\n\n   local res2 = m1:clone()\n   for i = 1,m1:size(1) do\n      res2[{ i,3 }] = math.floor(res2[{ i,3 }] / f)\n   end\n\n   local err = (res1-res2):abs():max()\n\n   mytester:assertlt(err, precision, 'error in torch.rshift - scalar, non contiguous')\n\n   local m1 = torch.LongTensor(10,10):random(0,100)\n   local res1 = m1:clone()\n\n   local q = 2\n   res1:rshift(q)\n\n   local res2 = m1:clone()\n   for i = 1,m1:size(1) do\n      for j = 1,m1:size(1) do\n         res2[{ i,j }] = math.floor(res2[{ i,j }] / f)\n      end\n   end\n\n   local err = (res1-res2):abs():max()\n\n   mytester:assertlt(err, precision, 'error in torch.rshift - scalar, contiguous')\nend\n\nfunction torchtest.fmod()\n   local m1 = torch.Tensor(10,10):uniform(-10, 10)\n   local res1 = m1:clone()\n\n   local q = 2.1\n   res1[{ {},3 }]:fmod(q)\n\n   local res2 = m1:clone()\n   for i = 1,m1:size(1) do\n      res2[{ i,3 }] = math.fmod(res2[{ i,3 }], q)\n   end\n\n   local err = (res1-res2):abs():max()\n\n   mytester:assertlt(err, precision, 'error in torch.fmod - scalar, non contiguous')\nend\n\nfunction torchtest.remainder()\n   local m1 = torch.Tensor(10, 10):uniform(-10, 10)\n   local res1 = m1:clone()\n\n   local q = 2.1\n   res1[{ {},3 }]:remainder(q)\n\n   local res2 = m1:clone()\n   for i = 1,m1:size(1) do\n      res2[{ i,3 }] = res2[{ i,3 }] % q\n   end\n\n   local err = (res1-res2):abs():max()\n\n   mytester:assertlt(err, precision, 'error in torch.remainder - scalar, non contiguous')\nend\n\nfunction torchtest.bitand()\n   local m1 = torch.LongTensor(10,10):random(0,100)\n   local res1 = m1:clone()\n\n   local val = 32 -- This should be a power of 2\n   res1[{ {},3 }]:bitand(val - 1)\n\n   local res2 = m1:clone()\n   for i = 1,m1:size(1) do\n      res2[{ i,3 }] = res2[{ i,3 }] % val\n   end\n\n   local err = (res1-res2):abs():max()\n\n   mytester:assertlt(err, precision, 'error in torch.bitand - scalar, non contiguous')\n\n   local m1 = torch.LongTensor(10,10):random(0,100)\n   local res1 = m1:clone()\n\n   res1:bitand(val - 1)\n\n   local res2 = m1:clone()\n   for i = 1,m1:size(1) do\n      for j = 1,m1:size(1) do\n         res2[{ i,j }] = res2[{ i,j }] % val\n      end\n   end\n\n   local err = (res1-res2):abs():max()\n\n   mytester:assertlt(err, precision, 'error in torch.bitand - scalar, contiguous')\nend\n\nfunction torchtest.bitor()\n   local m1 = torch.LongTensor(10,10):random(0,10000)\n   local res1 = m1:clone()\n\n   local val = 32 -- This should be a power of 2\n   res1[{ {},3 }]:bitor(val-1)\n\n   local res2 = m1:clone()\n   for i = 1,m1:size(1) do\n      res2[{ i,3 }] = math.floor(res2[{ i,3 }] / val) * val + (val - 1)\n   end\n\n   local err = (res1-res2):abs():max()\n\n   mytester:assertlt(err, precision, 'error in torch.bitor - scalar, non contiguous')\n\n   local m1 = torch.LongTensor(10,10):random(0,10000)\n   local res1 = m1:clone()\n\n   res1:bitor(val - 1)\n\n   local res2 = m1:clone()\n   for i = 1,m1:size(1) do\n      for j = 1,m1:size(1) do\n         res2[{ i,j }] = math.floor(res2[{ i,j }] / val) * val + (val - 1)\n      end\n   end\n\n   local err = (res1-res2):abs():max()\n\n   mytester:assertlt(err, precision, 'error in torch.bitor - scalar, contiguous')\nend\n\nfunction torchtest.cbitxor()\n   local t1 = torch.LongTensor(10,10):random(0,10000)\n   local t2 = torch.LongTensor(10,10):random(10001,20000)\n\n   -- Perform xor swap and check results\n   local t3 = torch.cbitxor(t1, t2)\n   local r1 = torch.cbitxor(t3, t2)\n   local r2 = torch.cbitxor(t3, t1)\n\n   local err1 = (r1 - t1):abs():max()\n   local err2 = (r2 - t2):abs():max()\n   mytester:assertlt(err1 + err2, precision, 'error in torch.cbitxor contiguous')\nend\n\nfunction torchtest.mm()\n   -- helper function\n   local function matrixmultiply(mat1,mat2)\n      local n = mat1:size(1)\n      local m = mat1:size(2)\n      local p = mat2:size(2)\n      local res = torch.zeros(n,p)\n      for i = 1, n do\n         for j = 1, p do\n            local sum = 0\n            for k = 1, m do\n               sum = sum + mat1[i][k]*mat2[k][j]\n            end\n            res[i][j] = sum\n         end\n      end\n      return res\n   end\n\n   -- contiguous case\n   local n, m, p = 10, 10, 5\n   local mat1 = torch.randn(n,m)\n   local mat2 = torch.randn(m,p)\n   local res = torch.mm(mat1,mat2)\n\n   local res2 = matrixmultiply(mat1,mat2)\n   mytester:assertTensorEq(res,res2,precision,'error in torch.mm')\n\n   -- non contiguous case 1\n   local n, m, p = 10, 10, 5\n   local mat1 = torch.randn(n,m)\n   local mat2 = torch.randn(p,m):t()\n   local res = torch.mm(mat1,mat2)\n\n   local res2 = matrixmultiply(mat1,mat2)\n   mytester:assertTensorEq(res,res2,precision,'error in torch.mm, non contiguous')\n\n   -- non contiguous case 2\n   local n, m, p = 10, 10, 5\n   local mat1 = torch.randn(m,n):t()\n   local mat2 = torch.randn(m,p)\n   local res = torch.mm(mat1,mat2)\n\n   local res2 = matrixmultiply(mat1,mat2)\n   mytester:assertTensorEq(res,res2,precision,'error in torch.mm, non contiguous')\n\n   -- non contiguous case 3\n   local n, m, p = 10, 10, 5\n   local mat1 = torch.randn(m,n):t()\n   local mat2 = torch.randn(p,m):t()\n   local res = torch.mm(mat1,mat2)\n\n   local res2 = matrixmultiply(mat1,mat2)\n   mytester:assertTensorEq(res,res2,precision,'error in torch.mm, non contiguous')\n\n   -- test with zero stride\n   local n, m, p = 10, 10, 5\n   local mat1 = torch.randn(n,m)\n   local mat2 = torch.randn(m,1):expand(m,p)\n   local res = torch.mm(mat1,mat2)\n\n   local res2 = matrixmultiply(mat1,mat2)\n   mytester:assertTensorEq(res,res2,precision,'error in torch.mm, non contiguous, zero stride')\n\nend\n\nfunction torchtest.bmm()\n   local num_batches = 10\n   local M, N, O = 23, 8, 12\n   local b1 = torch.randn(num_batches, M, N)\n   local b2 = torch.randn(num_batches, N, O)\n   local res = torch.bmm(b1, b2)\n\n   for i = 1, num_batches do\n     local r = torch.mm(b1[i], b2[i])\n     mytester:assertTensorEq(r, res[i], precision, 'result matrix ' .. i .. ' wrong')\n   end\nend\n\nfunction torchtest.addbmm()\n   local num_batches = 10\n   local M, N, O = 12, 8, 5\n   local b1 = torch.randn(num_batches, M, N)\n   local b2 = torch.randn(num_batches, N, O)\n   local res = torch.bmm(b1, b2)\n   local res2 = torch.Tensor():resizeAs(res[1]):zero()\n\n   res2:addbmm(b1,b2)\n   mytester:assertTensorEq(res2, res:sum(1)[1], precision, 'addbmm result wrong')\n\n   res2:addbmm(1,b1,b2)\n   mytester:assertTensorEq(res2, res:sum(1)[1]*2, precision, 'addbmm result wrong')\n\n   res2:addbmm(1,res2,.5,b1,b2)\n   mytester:assertTensorEq(res2, res:sum(1)[1]*2.5, precision, 'addbmm result wrong')\n\n   local res3 = torch.addbmm(1,res2,0,b1,b2)\n   mytester:assertTensorEq(res3, res2, precision, 'addbmm result wrong')\n\n   local res4 = torch.addbmm(1,res2,.5,b1,b2)\n   mytester:assertTensorEq(res4, res:sum(1)[1]*3, precision, 'addbmm result wrong')\n\n   local res5 = torch.addbmm(0,res2,1,b1,b2)\n   mytester:assertTensorEq(res5, res:sum(1)[1], precision, 'addbmm result wrong')\n\n   local res6 = torch.addbmm(.1,res2,.5,b1,b2)\n   mytester:assertTensorEq(res6, res2*.1 + res:sum(1)*.5, precision, 'addbmm result wrong')\nend\n\nfunction torchtest.baddbmm()\n   local num_batches = 10\n   local M, N, O = 12, 8, 5\n   local b1 = torch.randn(num_batches, M, N)\n   local b2 = torch.randn(num_batches, N, O)\n   local res = torch.bmm(b1, b2)\n   local res2 = torch.Tensor():resizeAs(res):zero()\n\n   res2:baddbmm(b1,b2)\n   mytester:assertTensorEq(res2, res, precision, 'baddbmm result wrong')\n\n   res2:baddbmm(1,b1,b2)\n   mytester:assertTensorEq(res2, res*2, precision, 'baddbmm result wrong')\n\n   res2:baddbmm(1,res2,.5,b1,b2)\n   mytester:assertTensorEq(res2, res*2.5, precision, 'baddbmm result wrong')\n\n   local res3 = torch.baddbmm(1,res2,0,b1,b2)\n   mytester:assertTensorEq(res3, res2, precision, 'baddbmm result wrong')\n\n   local res4 = torch.baddbmm(1,res2,.5,b1,b2)\n   mytester:assertTensorEq(res4, res*3, precision, 'baddbmm result wrong')\n\n   local res5 = torch.baddbmm(0,res2,1,b1,b2)\n   mytester:assertTensorEq(res5, res, precision, 'baddbmm result wrong')\n\n   local res6 = torch.baddbmm(.1,res2,.5,b1,b2)\n   mytester:assertTensorEq(res6, res2*.1 + res*.5, precision, 'baddbmm result wrong')\nend\n\nfunction torchtest.clamp()\n   local m1 = torch.rand(100):mul(5):add(-2.5)  -- uniform in [-2.5, 2.5]\n   -- just in case we're extremely lucky:\n   local min_val = -1\n   local max_val = 1\n   m1[1] = min_val\n   m1[2] = max_val\n   local res1 = m1:clone()\n\n   res1:clamp(min_val, max_val)\n\n   local res2 = m1:clone()\n   for i = 1,m1:size(1) do\n      if res2[i] > max_val then\n         res2[i] = max_val\n      elseif res2[i] < min_val then\n         res2[i] = min_val\n      end\n   end\n\n   local err = (res1-res2):abs():max()\n\n   mytester:assertlt(err, precision, 'error in torch.clamp - scalar, non contiguous')\nend\n\nfunction torchtest.pow() -- [res] torch.pow([res,] x)\n   -- base - tensor, exponent - number\n   -- contiguous\n   local m1 = torch.randn(100,100)\n   local res1 = torch.pow(m1[{ 4,{} }], 3)\n   local res2 = res1:clone():zero()\n   for i = 1,res1:size(1) do\n      res2[i] = math.pow(m1[4][i], 3)\n   end\n   local err = res1:clone():zero()\n   -- find absolute error\n   for i = 1, res1:size(1) do\n      err[i] = math.abs(res1[i] - res2[i])\n   end\n   -- find maximum element of error\n   local maxerr = 0\n   for i = 1, err:size(1) do\n      if err[i] > maxerr then\n         maxerr = err[i]\n      end\n   end\n   mytester:assertlt(maxerr, precision, 'error in torch.pow - contiguous')\n\n   -- non-contiguous\n   local m1 = torch.randn(100,100)\n   local res1 = torch.pow(m1[{ {}, 4 }], 3)\n   local res2 = res1:clone():zero()\n   for i = 1,res1:size(1) do\n      res2[i] = math.pow(m1[i][4], 3)\n   end\n   local err = res1:clone():zero()\n   -- find absolute error\n   for i = 1, res1:size(1) do\n      err[i] = math.abs(res1[i] - res2[i])\n   end\n   -- find maximum element of error\n   local maxerr = 0\n   for i = 1, err:size(1) do\n      if err[i] > maxerr then\n         maxerr = err[i]\n      end\n   end\n   mytester:assertlt(maxerr, precision, 'error in torch.pow - non-contiguous')\n\n   -- base - number, exponent - tensor\n   -- contiguous\n   local m1 = torch.randn(100,100)\n   local res1 = torch.pow(3, m1[{ 4,{} }])\n   local res2 = res1:clone():zero()\n   for i = 1,res1:size(1) do\n      res2[i] = math.pow(3, m1[4][i])\n   end\n   local err = res1:clone():zero()\n   -- find absolute error\n   for i = 1, res1:size(1) do\n      err[i] = math.abs(res1[i] - res2[i])\n   end\n   -- find maximum element of error\n   local maxerr = 0\n   for i = 1, err:size(1) do\n      if err[i] > maxerr then\n         maxerr = err[i]\n      end\n   end\n   mytester:assertlt(maxerr, precision, 'error in torch.pow - contiguous')\n\n   -- non-contiguous\n   local m1 = torch.randn(100,100)\n   local res1 = torch.pow(3, m1[{ {}, 4 }])\n   local res2 = res1:clone():zero()\n   for i = 1,res1:size(1) do\n      res2[i] = math.pow(3, m1[i][4])\n   end\n   local err = res1:clone():zero()\n   -- find absolute error\n   for i = 1, res1:size(1) do\n      err[i] = math.abs(res1[i] - res2[i])\n   end\n   -- find maximum element of error\n   local maxerr = 0\n   for i = 1, err:size(1) do\n      if err[i] > maxerr then\n         maxerr = err[i]\n      end\n   end\n   mytester:assertlt(maxerr, precision, 'error in torch.pow - non-contiguous')\nend\n\nfunction torchtest.cdiv()\n    local types = {\n        'torch.ByteTensor',\n        'torch.CharTensor',\n        'torch.ShortTensor',\n        'torch.IntTensor',\n        'torch.FloatTensor',\n        'torch.DoubleTensor',\n        'torch.LongTensor',\n    }\n\n    for k,t in ipairs(types) do\n\n        -- [res] torch.cdiv([res,] tensor1, tensor2)\n        -- contiguous\n        local m1 = torch.Tensor(10, 10, 10):uniform(0,10):type(t)\n        local m2 = torch.Tensor(10, 10 * 10):uniform(0,10):type(t)\n        m2[m2:eq(0)] = 2\n        local sm1 = m1[{4, {}, {}}]\n        local sm2 = m2[{4, {}}]\n        local res1 = torch.cdiv(sm1, sm2)\n        local res2 = res1:clone():zero()\n        for i = 1,sm1:size(1) do\n            for j = 1, sm1:size(2) do\n                local idx1d = (((i-1)*sm1:size(1)))+j\n                local ok = pcall(function() res2[i][j] = sm1[i][j] / sm2[idx1d] end)\n                if not ok then\n                   res2[i][j] = torch.floor(sm1[i][j] / sm2[idx1d])\n                end\n            end\n        end\n        local err = res1:clone():zero()\n        -- find absolute error\n        for i = 1, res1:size(1) do\n            for j = 1, res1:size(2) do\n                err[i][j] = math.abs(res1[i][j] - res2[i][j])\n            end\n        end\n        -- find maximum element of error\n        local maxerr = 0\n        for i = 1, err:size(1) do\n            for j = 1, err:size(2) do\n                if err[i][j] > maxerr then\n                    maxerr = err[i][j]\n                end\n            end\n        end\n        mytester:assertlt(maxerr, precision, 'error in torch.cdiv - contiguous' .. ' ' .. t)\n\n        -- non-contiguous\n        local m1 = torch.Tensor(10, 10, 10):uniform(0,10):type(t)\n        local m2 = torch.Tensor(10 * 10, 10 * 10):uniform(0,10):type(t)\n        m2[m2:eq(0)] = 2\n        local sm1 = m1[{{}, 4, {}}]\n        local sm2 = m2[{{}, 4}]\n        local res1 = torch.cdiv(sm1, sm2)\n        local res2 = res1:clone():zero()\n        for i = 1,sm1:size(1) do\n            for j = 1, sm1:size(2) do\n                local idx1d = (((i-1)*sm1:size(1)))+j\n                local ok = pcall(function() res2[i][j] = sm1[i][j] / sm2[idx1d] end)\n                if not ok then\n                   res2[i][j] = torch.floor(sm1[i][j] / sm2[idx1d])\n                end\n            end\n        end\n        local err = res1:clone():zero()\n        -- find absolute error\n        for i = 1, res1:size(1) do\n            for j = 1, res1:size(2) do\n                err[i][j] = math.abs(res1[i][j] - res2[i][j])\n            end\n        end\n        -- find maximum element of error\n        local maxerr = 0\n        for i = 1, err:size(1) do\n            for j = 1, err:size(2) do\n                if err[i][j] > maxerr then\n                    maxerr = err[i][j]\n                end\n            end\n        end\n        mytester:assertlt(maxerr, precision, 'error in torch.cdiv - non-contiguous' .. ' ' .. t)\n   end\nend\n\nfunction torchtest.cfmod()\n   -- contiguous\n   local m1 = torch.Tensor(10, 10, 10):uniform(-10, 10)\n   local m2 = torch.Tensor(10, 10 * 10):uniform(-3, 3)\n   local sm1 = m1[{4, {}, {}}]\n   local sm2 = m2[{4, {}}]\n   local res1 = torch.cfmod(sm1, sm2)\n   local res2 = res1:clone():zero()\n   for i = 1,sm1:size(1) do\n      for j = 1, sm1:size(2) do\n         local idx1d = (((i-1)*sm1:size(1)))+j\n         res2[i][j] = math.fmod(sm1[i][j], sm2[idx1d])\n      end\n   end\n   local err = res1:clone():zero()\n   -- find absolute error\n   for i = 1, res1:size(1) do\n      for j = 1, res1:size(2) do\n         err[i][j] = math.abs(res1[i][j] - res2[i][j])\n      end\n   end\n   -- find maximum element of error\n   local maxerr = 0\n   for i = 1, err:size(1) do\n      for j = 1, err:size(2) do\n         if err[i][j] > maxerr then\n            maxerr = err[i][j]\n         end\n      end\n   end\n   mytester:assertlt(maxerr, precision, 'error in torch.cfmod - contiguous')\n\n   -- non-contiguous\n   local m1 = torch.Tensor(10, 10, 10):uniform(-10, 10)\n   local m2 = torch.Tensor(10 * 10, 10 * 10):uniform(-3, 3)\n   local sm1 = m1[{{}, 4, {}}]\n   local sm2 = m2[{{}, 4}]\n   local res1 = torch.cfmod(sm1, sm2)\n   local res2 = res1:clone():zero()\n   for i = 1,sm1:size(1) do\n      for j = 1, sm1:size(2) do\n         local idx1d = (((i-1)*sm1:size(1)))+j\n         res2[i][j] = math.fmod(sm1[i][j], sm2[idx1d])\n      end\n   end\n   local err = res1:clone():zero()\n   -- find absolute error\n   for i = 1, res1:size(1) do\n      for j = 1, res1:size(2) do\n         err[i][j] = math.abs(res1[i][j] - res2[i][j])\n      end\n   end\n   -- find maximum element of error\n   local maxerr = 0\n   for i = 1, err:size(1) do\n      for j = 1, err:size(2) do\n         if err[i][j] > maxerr then\n            maxerr = err[i][j]\n         end\n      end\n   end\n   mytester:assertlt(maxerr, precision, 'error in torch.cfmod - non-contiguous')\nend\n\nfunction torchtest.cremainder()\n   -- contiguous\n   local m1 = torch.Tensor(10, 10, 10):uniform(-10, 10)\n   local m2 = torch.Tensor(10, 10 * 10):uniform(-3, 3)\n   local sm1 = m1[{4, {}, {}}]\n   local sm2 = m2[{4, {}}]\n   local res1 = torch.cremainder(sm1, sm2)\n   local res2 = res1:clone():zero()\n   for i = 1,sm1:size(1) do\n      for j = 1, sm1:size(2) do\n         local idx1d = (((i-1)*sm1:size(1)))+j\n         res2[i][j] = sm1[i][j] % sm2[idx1d]\n      end\n   end\n   local err = res1:clone():zero()\n   -- find absolute error\n   for i = 1, res1:size(1) do\n      for j = 1, res1:size(2) do\n         err[i][j] = math.abs(res1[i][j] - res2[i][j])\n      end\n   end\n   -- find maximum element of error\n   local maxerr = 0\n   for i = 1, err:size(1) do\n      for j = 1, err:size(2) do\n         if err[i][j] > maxerr then\n            maxerr = err[i][j]\n         end\n      end\n   end\n   mytester:assertlt(maxerr, precision, 'error in torch.cremainder - contiguous')\n\n   -- non-contiguous\n   local m1 = torch.Tensor(10, 10, 10):uniform(-10, 10)\n   local m2 = torch.Tensor(10 * 10, 10 * 10):uniform(-3, 3)\n   local sm1 = m1[{{}, 4, {}}]\n   local sm2 = m2[{{}, 4}]\n   local res1 = torch.cremainder(sm1, sm2)\n   local res2 = res1:clone():zero()\n   for i = 1,sm1:size(1) do\n      for j = 1, sm1:size(2) do\n         local idx1d = (((i-1)*sm1:size(1)))+j\n         res2[i][j] = sm1[i][j] % sm2[idx1d]\n      end\n   end\n   local err = res1:clone():zero()\n   -- find absolute error\n   for i = 1, res1:size(1) do\n      for j = 1, res1:size(2) do\n         err[i][j] = math.abs(res1[i][j] - res2[i][j])\n      end\n   end\n   -- find maximum element of error\n   local maxerr = 0\n   for i = 1, err:size(1) do\n      for j = 1, err:size(2) do\n         if err[i][j] > maxerr then\n            maxerr = err[i][j]\n         end\n      end\n   end\n   mytester:assertlt(maxerr, precision, 'error in torch.cremainder - non-contiguous')\nend\n\nfunction torchtest.cmul()\n    local types = {\n        'torch.ByteTensor',\n        'torch.CharTensor',\n        'torch.ShortTensor',\n        'torch.IntTensor',\n        'torch.FloatTensor',\n        'torch.DoubleTensor',\n        'torch.LongTensor',\n    }\n\n    for k,t in ipairs(types) do\n\n        -- [res] torch.cmul([res,] tensor1, tensor2)\n        -- contiguous\n        local m1 = torch.randn(10, 10, 10):type(t)\n        local m2 = torch.randn(10, 10 * 10):type(t)\n        local sm1 = m1[{4, {}, {}}]\n        local sm2 = m2[{4, {}}]\n        local res1 = torch.cmul(sm1, sm2)\n        local res2 = res1:clone():zero()\n        for i = 1,sm1:size(1) do\n            for j = 1, sm1:size(2) do\n                local idx1d = (((i-1)*sm1:size(1)))+j\n                res2[i][j] = sm1[i][j] * sm2[idx1d]\n            end\n        end\n        local err = res1:clone():zero()\n        -- find absolute error\n        for i = 1, res1:size(1) do\n            for j = 1, res1:size(2) do\n                err[i][j] = math.abs(res1[i][j] - res2[i][j])\n            end\n        end\n        -- find maximum element of error\n        local maxerr = 0\n        for i = 1, err:size(1) do\n            for j = 1, err:size(2) do\n                if err[i][j] > maxerr then\n                    maxerr = err[i][j]\n                end\n            end\n        end\n        mytester:assertlt(maxerr, precision, 'error in torch.cmul - contiguous' .. ' ' .. t)\n\n        -- non-contiguous\n        local m1 = torch.randn(10, 10, 10):type(t)\n        local m2 = torch.randn(10 * 10, 10 * 10):type(t)\n        local sm1 = m1[{{}, 4, {}}]\n        local sm2 = m2[{{}, 4}]\n        local res1 = torch.cmul(sm1, sm2)\n        local res2 = res1:clone():zero()\n        for i = 1,sm1:size(1) do\n            for j = 1, sm1:size(2) do\n                local idx1d = (((i-1)*sm1:size(1)))+j\n                res2[i][j] = sm1[i][j] * sm2[idx1d]\n            end\n        end\n        local err = res1:clone():zero()\n        -- find absolute error\n        for i = 1, res1:size(1) do\n            for j = 1, res1:size(2) do\n                err[i][j] = math.abs(res1[i][j] - res2[i][j])\n            end\n        end\n        -- find maximum element of error\n        local maxerr = 0\n        for i = 1, err:size(1) do\n            for j = 1, err:size(2) do\n                if err[i][j] > maxerr then\n                    maxerr = err[i][j]\n                end\n            end\n        end\n        mytester:assertlt(maxerr, precision, 'error in torch.cmul - non-contiguous' .. ' ' .. t)\n    end\nend\n\nfunction torchtest.cpow()  -- [res] torch.cpow([res,] tensor1, tensor2)\n   -- contiguous\n   local m1 = torch.rand(10, 10, 10)\n   local m2 = torch.rand(10, 10 * 10)\n   local sm1 = m1[{4, {}, {}}]\n   local sm2 = m2[{4, {}}]\n   local res1 = torch.cpow(sm1, sm2)\n   local res2 = res1:clone():zero()\n   for i = 1,sm1:size(1) do\n      for j = 1, sm1:size(2) do\n         local idx1d = (((i-1)*sm1:size(1)))+j\n         res2[i][j] = math.pow(sm1[i][j], sm2[idx1d])\n      end\n   end\n   local err = res1:clone():zero()\n   -- find absolute error\n   for i = 1, res1:size(1) do\n      for j = 1, res1:size(2) do\n         err[i][j] = math.abs(res1[i][j] - res2[i][j])\n      end\n   end\n   -- find maximum element of error\n   local maxerr = 0\n   for i = 1, err:size(1) do\n      for j = 1, err:size(2) do\n         if err[i][j] > maxerr then\n            maxerr = err[i][j]\n         end\n      end\n   end\n   mytester:assertlt(maxerr, precision, 'error in torch.cpow - contiguous')\n\n   -- non-contiguous\n   local m1 = torch.rand(10, 10, 10)\n   local m2 = torch.rand(10 * 10, 10 * 10)\n   local sm1 = m1[{{}, 4, {}}]\n   local sm2 = m2[{{}, 4}]\n   local res1 = torch.cpow(sm1, sm2)\n   local res2 = res1:clone():zero()\n   for i = 1,sm1:size(1) do\n      for j = 1, sm1:size(2) do\n         local idx1d = (((i-1)*sm1:size(1)))+j\n         res2[i][j] = math.pow(sm1[i][j],sm2[idx1d])\n      end\n   end\n   local err = res1:clone():zero()\n   -- find absolute error\n   for i = 1, res1:size(1) do\n      for j = 1, res1:size(2) do\n         err[i][j] = math.abs(res1[i][j] - res2[i][j])\n      end\n   end\n   -- find maximum element of error\n   local maxerr = 0\n   for i = 1, err:size(1) do\n      for j = 1, err:size(2) do\n         if err[i][j] > maxerr then\n            maxerr = err[i][j]\n         end\n      end\n   end\n   mytester:assertlt(maxerr, precision, 'error in torch.cpow - non-contiguous')\nend\n\nfunction torchtest.sum()\n   local x = torch.rand(msize,msize)\n   local mx = torch.sum(x,2)\n   local mxx = torch.Tensor()\n   torch.sum(mxx,x,2)\n   mytester:asserteq(maxdiff(mx,mxx),0,'torch.sum value')\n\n   local y = torch.rand(5, 5, 5)\n   for i=1,3 do\n      local a = y:sum(i)\n      local b = y:narrow(i, 1, 1):clone():zero()\n      for j = 1, 5 do\n         b:add(y:narrow(i, j, 1))\n      end\n      mytester:asserteq(maxdiff(a, b), 0, 'torch.sum value')\n   end\nend\nfunction torchtest.prod()\n   local x = torch.rand(msize,msize)\n   local mx = torch.prod(x,2)\n   local mxx = torch.Tensor()\n   torch.prod(mxx,x,2)\n   mytester:asserteq(maxdiff(mx,mxx),0,'torch.prod value')\n\n   local y = torch.rand(5, 5, 5)\n   for i=1,3 do\n      local a = y:prod(i)\n      local b = y:narrow(i, 1, 1):clone():fill(1)\n      for j = 1, 5 do\n         b:cmul(y:narrow(i, j, 1))\n      end\n      mytester:asserteq(maxdiff(a, b), 0, 'torch.sum value')\n   end\nend\nfunction torchtest.cumsum()\n   local x = torch.rand(msize,msize)\n   local mx = torch.cumsum(x,2)\n   local mxx = torch.Tensor()\n   torch.cumsum(mxx,x,2)\n   mytester:asserteq(maxdiff(mx,mxx),0,'torch.cumsum value')\nend\nfunction torchtest.cumprod()\n   local x = torch.rand(msize,msize)\n   local mx = torch.cumprod(x,2)\n   local mxx = torch.Tensor()\n   torch.cumprod(mxx,x,2)\n   mytester:asserteq(maxdiff(mx,mxx),0,'torch.cumprod value')\nend\nfunction torchtest.cross()\n   local x = torch.rand(msize,3,msize)\n   local y = torch.rand(msize,3,msize)\n   local mx = torch.cross(x,y)\n   local mxx = torch.Tensor()\n   torch.cross(mxx,x,y)\n   mytester:asserteq(maxdiff(mx,mxx),0,'torch.cross value')\nend\nfunction torchtest.zeros()\n   local mx = torch.zeros(msize,msize)\n   local mxx = torch.Tensor()\n   torch.zeros(mxx,msize,msize)\n   mytester:asserteq(maxdiff(mx,mxx),0,'torch.zeros value')\nend\nfunction torchtest.histc()\n   local x = torch.Tensor{ 2, 4, 2, 2, 5, 4 }\n   local y = torch.histc(x, 5, 1, 5) -- nbins, min, max\n   local z = torch.Tensor{ 0, 3, 0, 2, 1 }\n   mytester:assertTensorEq(y,z,precision,'error in torch.histc')\nend\nfunction torchtest.bhistc()\n   local x = torch.Tensor(3, 6)\n   x[1] = torch.Tensor{ 2, 4, 2, 2, 5, 4 }\n   x[2] = torch.Tensor{ 3, 5, 1, 5, 3, 5 }\n   x[3] = torch.Tensor{ 3, 4, 2, 5, 5, 1 }\n   local y = torch.bhistc(x, 5, 1, 5) -- nbins, min, max\n   local z = torch.Tensor(3, 5)\n   z[1] = torch.Tensor{ 0, 3, 0, 2, 1 }\n   z[2] = torch.Tensor{ 1, 0, 2, 0, 3 }\n   z[3] = torch.Tensor{ 1, 1, 1, 1, 2 }\n   mytester:assertTensorEq(y,z,precision,'error in torch.bhistc in last dimension')\nend\nfunction torchtest.ones()\n   local mx = torch.ones(msize,msize)\n   local mxx = torch.Tensor()\n   torch.ones(mxx,msize,msize)\n   mytester:asserteq(maxdiff(mx,mxx),0,'torch.ones value')\nend\nfunction torchtest.diag()\n   local x = torch.rand(msize,msize)\n   local mx = torch.diag(x)\n   local mxx = torch.Tensor()\n   torch.diag(mxx,x)\n   mytester:asserteq(maxdiff(mx,mxx),0,'torch.diag value')\nend\nfunction torchtest.eye()\n   local mx = torch.eye(msize,msize)\n   local mxx = torch.Tensor()\n   torch.eye(mxx,msize,msize)\n   mytester:asserteq(maxdiff(mx,mxx),0,'torch.eye value')\nend\nfunction torchtest.renorm()\n   local m1 = torch.randn(10,5)\n   local res1 = torch.Tensor()\n   local m2\n\n   local function renorm(matrix, value, dim, max_norm)\n      local m1 = matrix:transpose(dim, 1):contiguous()\n      -- collapse non-dim dimensions:\n      m2 = m1:reshape(m1:size(1), m1:nElement()/m1:size(1))\n      local norms = m2:norm(value,2)\n      -- clip\n      local new_norms = norms:clone()\n      new_norms[torch.gt(norms, max_norm)] = max_norm\n      new_norms:cdiv(norms:add(1e-7))\n      -- renormalize\n      m1:cmul(new_norms:expandAs(m1))\n      return m1:transpose(dim, 1)\n   end\n\n   -- note that the axis fed to torch.renorm is different (2~=1)\n   local maxnorm = m1:norm(2,1):mean()\n   m2 = renorm(m1,2,2,maxnorm)\n\n   m1:renorm(2,2,maxnorm)\n   mytester:assertTensorEq(m1, m2, 0.00001, 'error in renorm')\n   mytester:assertTensorEq(m1:norm(2,1), m2:norm(2,1), 0.00001, 'error in renorm')\n\n   m1 = torch.randn(3,4,5)\n   m2 = m1:transpose(2,3):contiguous():reshape(15,4)\n\n   maxnorm = m2:norm(2,1):mean()\n   m2 = renorm(m2,2,2,maxnorm)\n\n   m1:renorm(2,2,maxnorm)\n   local m3 = m1:transpose(2,3):contiguous():reshape(15,4)\n   mytester:assertTensorEq(m3, m2, 0.00001, 'error in renorm')\n   mytester:assertTensorEq(m3:norm(2,1), m2:norm(2,1), 0.00001, 'error in renorm')\nend\nfunction torchtest.multinomialwithreplacement()\n   local n_row = 3\n   for n_col=4,5 do\n      local t=os.time()\n      torch.manualSeed(t)\n      local prob_dist = torch.rand(n_row,n_col)\n      prob_dist:select(2,n_col):fill(0) --index n_col shouldn't be sampled\n      local n_sample = n_col\n      local sample_indices = torch.multinomial(prob_dist, n_sample, true)\n      mytester:assert(prob_dist:dim() == 2, \"wrong number of prob_dist dimensions\")\n      mytester:assert(sample_indices:size(2) == n_sample, \"wrong number of samples\")\n      for i=1,n_row do\n         for j=1,n_sample do\n            mytester:assert(sample_indices[{i,j}] ~= n_col, \"sampled an index with zero probability\")\n         end\n      end\n   end\nend\nfunction torchtest.multinomialwithoutreplacement()\n   local n_row = 3\n   for n_col=4,5 do\n      local t=os.time()\n      torch.manualSeed(t)\n      local prob_dist = torch.rand(n_row,n_col)\n      prob_dist:select(2,n_col):fill(0) --index n_col shouldn't be sampled\n      local n_sample = 3\n      local sample_indices = torch.multinomial(prob_dist, n_sample, false)\n      mytester:assert(prob_dist:dim() == 2, \"wrong number of prob_dist dimensions\")\n      mytester:assert(sample_indices:size(2) == n_sample, \"wrong number of samples\")\n      for i=1,n_row do\n         local row_samples = {}\n         for j=1,n_sample do\n            local sample_idx = sample_indices[{i,j}]\n            mytester:assert(\n                sample_idx ~= n_col, \"sampled an index with zero probability\"\n            )\n            mytester:assert(\n                not row_samples[sample_idx], \"sampled an index twice\"\n            )\n            row_samples[sample_idx] = true\n         end\n      end\n   end\nend\nfunction torchtest.aliasMultinomial()\n   for i =1,5 do\n      local n_class = 5\n      local t=os.time()\n      torch.manualSeed(t)\n      local probs = torch.Tensor(n_class):uniform(0,1)\n      probs:div(probs:sum())\n      local output = torch.LongTensor(1000, 10000)\n      local n_samples = output:nElement()\n      local prob_state = torch.multinomialAliasSetup(probs)\n      mytester:assert(prob_state[1]:min() > 0, \"Index =\"..prob_state[1]:min()..\"alias indices has an index below or equal to 0\")\n      mytester:assert(prob_state[1]:max() <= n_class, prob_state[1]:max()..\" alias indices has an index exceeding num_class\")\n      local prob_state = torch.multinomialAliasSetup(probs, prob_state)\n      mytester:assert(prob_state[1]:min() > 0, \"Index =\"..prob_state[1]:min()..\"alias indices has an index below or equal to 0(cold)\")\n      mytester:assert(prob_state[1]:max() <= n_class, prob_state[1]:max()..\",\"..prob_state[1]:min()..\" alias indices has an index exceeding num_class(cold)\")\n      local output = torch.LongTensor(n_samples)\n      output = torch.multinomialAlias(output, prob_state)\n      mytester:assert(output:nElement() == n_samples, \"wrong number of samples\")\n      mytester:assert(output:min() > 0, \"sampled indices has an index below or equal to 0\")\n      mytester:assert(output:max() <= n_class, \"indices has an index exceeding num_class\")\n   end\n\nend\nfunction torchtest.multinomialvector()\n   local n_col = 4\n   local t=os.time()\n   torch.manualSeed(t)\n   local prob_dist = torch.rand(n_col)\n   local n_sample = n_col\n   local sample_indices = torch.multinomial(prob_dist, n_sample, true)\n   local s_dim = sample_indices:dim()\n   mytester:assert(s_dim == 1, \"wrong number of returned dimensions: \"..s_dim)\n   mytester:assert(prob_dist:dim() == 1, \"wrong number of prob_dist dimensions\")\n   mytester:assert(sample_indices:size(1) == n_sample, \"wrong number of samples\")\nend\nfunction torchtest.range()\n   local mx = torch.range(0,1)\n   local mxx = torch.Tensor()\n   torch.range(mxx,0,1)\n   mytester:asserteq(maxdiff(mx,mxx),0,'torch.range value')\n\n   -- Check range for non-contiguous tensors.\n   local x = torch.zeros(2, 3)\n   local y = x:narrow(2, 2, 2)\n   y:range(0, 3)\n   mytester:assertTensorEq(x, torch.Tensor{{0, 0, 1}, {0, 2, 3}}, 1e-16,\n                           'non-contiguous range failed')\nend\nfunction torchtest.rangenegative()\n   local mx = torch.Tensor({1,0})\n   local mxx = torch.Tensor()\n   torch.range(mxx,1,0,-1)\n   mytester:asserteq(maxdiff(mx,mxx),0,'torch.range value for negative step')\nend\nfunction torchtest.rangeequalbounds()\n   local mx = torch.Tensor({1})\n   local mxx = torch.Tensor()\n   torch.range(mxx,1,1,-1)\n   mytester:asserteq(maxdiff(mx,mxx),0,'torch.range value for equal bounds step')\n   torch.range(mxx,1,1,1)\n   mytester:asserteq(maxdiff(mx,mxx),0,'torch.range value for equal bounds step')\nend\nfunction torchtest.rangefloat()\n   local mx = torch.FloatTensor():range(0.6, 0.9, 0.1)\n   mytester:asserteq(mx:size(1), 4, 'wrong size for FloatTensor range')\n   mx = torch.FloatTensor():range(1, 10, 0.3)\n   mytester:asserteq(mx:size(1), 31, 'wrong size for FloatTensor range')\nend\nfunction torchtest.rangedouble()\n   local mx = torch.DoubleTensor():range(0.6, 0.9, 0.1)\n   mytester:asserteq(mx:size(1), 4, 'wrong size for DoubleTensor range')\n   mx = torch.DoubleTensor():range(1, 10, 0.3)\n   mytester:asserteq(mx:size(1), 31, 'wrong size for DoubleTensor range')\nend\nfunction torchtest.randperm()\n   local t=os.time()\n   torch.manualSeed(t)\n   local mx = torch.randperm(msize)\n   local mxx = torch.Tensor()\n   torch.manualSeed(t)\n   torch.randperm(mxx,msize)\n   mytester:asserteq(maxdiff(mx,mxx),0,'torch.randperm value')\nend\nfunction torchtest.reshape()\n   local x = torch.rand(10,13,23)\n   local mx = torch.reshape(x,130,23)\n   local mxx = torch.Tensor()\n   torch.reshape(mxx,x,130,23)\n   mytester:asserteq(maxdiff(mx,mxx),0,'torch.reshape value')\nend\n\nlocal function assertIsOrdered(order, x, mxx, ixx, task)\n  local areOrdered\n  if order == 'descending' then\n    areOrdered = function(a, b) return a >= b end\n  elseif order == 'ascending' then\n    areOrdered = function(a, b) return a <= b end\n  else\n    error('unknown order \"' .. order .. '\", must be \"ascending\" or \"descending\"')\n  end\n\n  local decreasing = true\n  for j = 1,msize do\n    for k = 2,msize do\n      decreasing = decreasing and areOrdered(mxx[j][k-1], mxx[j][k])\n    end\n  end\n  mytester:assert(decreasing, 'torch.sort (' .. order .. ') values unordered for ' .. task)\n  local seen = torch.ByteTensor(msize)\n  local indicesCorrect = true\n  for k = 1,msize do\n    seen:zero()\n    for j = 1,msize do\n      indicesCorrect = indicesCorrect and (x[k][ixx[k][j]] == mxx[k][j])\n      seen[ixx[k][j]] = 1\n    end\n    indicesCorrect = indicesCorrect and (torch.sum(seen) == msize)\n  end\n  mytester:assert(indicesCorrect, 'torch.sort (' .. order .. ') indices wrong for ' .. task)\nend\n\nfunction torchtest.sortAscending()\n   local x = torch.rand(msize,msize)\n   local mx,ix = torch.sort(x)\n\n   -- Test use of result tensor\n   local mxx = torch.Tensor()\n   local ixx = torch.LongTensor()\n   torch.sort(mxx,ixx,x)\n   mytester:asserteq(maxdiff(mx,mxx),0,'torch.sort (ascending) value')\n   mytester:asserteq(maxdiff(ix,ixx),0,'torch.sort (ascending) index')\n\n   -- Test sorting of random numbers\n   assertIsOrdered('ascending', x, mxx, ixx, 'random')\n\n   mytester:assertTensorEq(\n           torch.sort(torch.Tensor{ 50, 40, 30, 20, 10 }),\n           torch.Tensor{ 10, 20, 30, 40, 50 },\n           1e-16,\n           \"torch.sort (ascending) simple sort\"\n       )\n   -- Test that we still have proper sorting with duplicate keys\n   local x = torch.floor(torch.rand(msize,msize)*10)\n   torch.sort(mxx,ixx,x)\n   assertIsOrdered('ascending', x, mxx, ixx, 'random with duplicate keys')\nend\n\nfunction torchtest.sortDescending()\n   local x = torch.rand(msize,msize)\n   local mx,ix = torch.sort(x,true)\n\n   -- Test use of result tensor\n   local mxx = torch.Tensor()\n   local ixx = torch.LongTensor()\n   torch.sort(mxx,ixx,x,true)\n   mytester:asserteq(maxdiff(mx,mxx),0,'torch.sort (descending) value')\n   mytester:asserteq(maxdiff(ix,ixx),0,'torch.sort (descending) index')\n\n   -- Test sorting of random numbers\n   assertIsOrdered('descending', x, mxx, ixx, 'random')\n\n   -- Test simple sort task\n   mytester:assertTensorEq(\n           torch.sort(torch.Tensor{ 10, 20, 30, 40, 50 },true),\n           torch.Tensor{ 50, 40, 30, 20, 10 },\n           1e-16,\n           \"torch.sort (descending) simple sort\"\n       )\n\n   -- Test that we still have proper sorting with duplicate keys\n   assertIsOrdered('descending', x, mxx, ixx, 'random with duplicate keys')\nend\n\nfunction torchtest.topK()\n   local function topKViaSort(t, k, dim, dir)\n      local sorted, indices = t:sort(dim, dir)\n      return sorted:narrow(dim, 1, k), indices:narrow(dim, 1, k)\n   end\n\n   local function compareTensors(t, res1, ind1, res2, ind2, dim, msg)\n      -- Values should be exactly equivalent\n      mytester:assertTensorEq(res1, res2, 0, msg)\n\n      -- Indices might differ based on the implementation, since there is\n      -- no guarantee of the relative order of selection\n      if ind1:eq(ind2):min() == 0 then\n         -- To verify that the indices represent equivalent elements,\n         -- gather from the input using the topk indices and compare against\n         -- the sort indices\n         local vals = t:gather(dim, ind2)\n         mytester:assertTensorEq(res1, vals, 0, msg)\n      end\n   end\n\n   local function compare(t, k, dim, dir, msg)\n      local topKVal, topKInd = t:topk(k, dim, dir, true)\n      local sortKVal, sortKInd = topKViaSort(t, k, dim, dir)\n\n      compareTensors(t, sortKVal, sortKInd, topKVal, topKInd, dim, msg)\n   end\n\n   local t = torch.rand(math.random(1, msize),\n                        math.random(1, msize),\n                        math.random(1, msize))\n\n   for kTries = 1, 3 do\n      for dimTries = 1, 3 do\n         for _, transpose in ipairs({true, false}) do\n            for _, dir in ipairs({true, false}) do\n               local testTensor = t\n\n               local transposeMsg = nil\n               if transpose then\n                  local dim1 = math.random(1, t:nDimension())\n                  local dim2 = dim1\n\n                  while dim1 == dim2 do\n                     dim2 = math.random(1, t:nDimension())\n                  end\n\n                  testTensor = t:transpose(dim1, dim2)\n                  transposeMsg = 'transpose(' .. dim1 .. ', ' .. dim2 .. ')'\n               end\n\n               local dim = math.random(1, testTensor:nDimension())\n               local k = math.random(1, testTensor:size(dim))\n               local msg = 'topk(' .. k .. ', ' .. dim .. ', ' .. tostring(dir) .. ', true)'\n               if transposeMsg then\n                  msg = msg .. ' ' .. transposeMsg\n               end\n\n               compare(testTensor, k, dim, dir, msg)\n            end\n         end\n      end\n   end\nend\n\nfunction torchtest.kthvalue()\n   local x = torch.rand(msize, msize, msize)\n   local x0 = x:clone()\n   do\n      local k = math.random(1, msize)\n      local mx, ix = torch.kthvalue(x, k)\n      local mxx, ixx = torch.sort(x)\n\n      mytester:assertTensorEq(mxx:select(3, k), mx:select(3, 1), 0,\n                              'torch.kthvalue value')\n      mytester:assertTensorEq(ixx:select(3, k), ix:select(3, 1), 0,\n                              'torch.kthvalue index')\n   end\n   do -- test use of result tensors\n      local k = math.random(1, msize)\n      local mx = torch.Tensor()\n      local ix = torch.LongTensor()\n      torch.kthvalue(mx, ix, x, k)\n      local mxx, ixx = torch.sort(x)\n      mytester:assertTensorEq(mxx:select(3, k), mx:select(3, 1), 0,\n                              'torch.kthvalue value')\n      mytester:assertTensorEq(ixx:select(3, k), ix:select(3, 1), 0,\n                              'torch.kthvalue index')\n   end\n   do -- test non-default dim\n      local k = math.random(1, msize)\n      local mx, ix = torch.kthvalue(x, k, 1)\n      local mxx, ixx = torch.sort(x, 1)\n      mytester:assertTensorEq(mxx:select(1, k), mx[1], 0,\n                              'torch.kthvalue value')\n      mytester:assertTensorEq(ixx:select(1, k), ix[1], 0,\n                              'torch.kthvalue index')\n   end\n   do -- non-contiguous\n      local y = x:narrow(2, 1, 1)\n      local y0 = y:clone()\n      local k = math.random(1, msize)\n      local my, ix = torch.kthvalue(y, k)\n      local my0, ix0 = torch.kthvalue(y0, k)\n      mytester:assertTensorEq(my, my0, 0, 'torch.kthvalue value')\n      mytester:assertTensorEq(ix, ix0, 0, 'torch.kthvalue index')\n   end\n   mytester:assertTensorEq(x, x0, 0, 'torch.kthvalue modified input')\n\n   -- simple test case (with repetitions)\n   local y = torch.Tensor{3,5,4,1,1,5}\n   mytester:assertTensorEq(torch.kthvalue(y, 3), torch.Tensor{3}, 1e-16,\n      'torch.kthvalue simple')\n   mytester:assertTensorEq(torch.kthvalue(y, 2), torch.Tensor{1}, 1e-16,\n      'torch.kthvalue simple')\nend\n\nfunction torchtest.median()\n   for _, msize in ipairs{155,156} do\n      local x = torch.rand(msize, msize)\n      local x0 = x:clone()\n\n      local mx, ix = torch.median(x)\n      local mxx, ixx = torch.sort(x)\n      local ind = math.floor((msize+1)/2)\n\n      mytester:assertTensorEq(mxx:select(2, ind), mx:select(2, 1), 0,\n                              'torch.median value')\n      mytester:assertTensorEq(ixx:select(2, ind), ix:select(2, 1), 0,\n                              'torch.median index')\n\n      -- Test use of result tensor\n      local mr = torch.Tensor()\n      local ir = torch.LongTensor()\n      torch.median(mr, ir, x)\n      mytester:assertTensorEq(mr, mx, 0, 'torch.median result tensor value')\n      mytester:assertTensorEq(ir, ix, 0, 'torch.median result tensor index')\n\n      -- Test non-default dim\n      mx, ix = torch.median(x, 1)\n      mxx, ixx = torch.sort(x, 1)\n      mytester:assertTensorEq(mxx:select(1, ind), mx[1], 0,\n                              'torch.median value')\n      mytester:assertTensorEq(ixx:select(1, ind), ix[1], 0,\n                              'torch.median index')\n\n      -- input unchanged\n      mytester:assertTensorEq(x, x0, 0, 'torch.median modified input')\n   end\nend\n\nfunction torchtest.mode()\n   local x = torch.range(1, msize * msize):reshape(msize, msize)\n   x:select(1, 1):fill(1)\n   x:select(1, 2):fill(1)\n   x:select(2, 1):fill(1)\n   x:select(2, 2):fill(1)\n   local x0 = x:clone()\n\n   -- Pre-calculated results.\n   local res = torch.Tensor(msize):fill(1)\n   -- The indices are the position of the last appearance of the mode element.\n   local resix = torch.LongTensor(msize):fill(2)\n   resix[1] = msize\n   resix[2] = msize\n\n   local mx, ix = torch.mode(x)\n\n   mytester:assertTensorEq(res:view(msize, 1), mx, 0, 'torch.mode value')\n   mytester:assertTensorEq(resix:view(msize, 1), ix, 0, 'torch.mode index')\n\n   -- Test use of result tensor\n   local mr = torch.Tensor()\n   local ir = torch.LongTensor()\n   torch.mode(mr, ir, x)\n   mytester:assertTensorEq(mr, mx, 0, 'torch.mode result tensor value')\n   mytester:assertTensorEq(ir, ix, 0, 'torch.mode result tensor index')\n\n   -- Test non-default dim\n   mx, ix = torch.mode(x, 1)\n   mytester:assertTensorEq(res:view(1, msize), mx, 0, 'torch.mode value')\n   mytester:assertTensorEq(resix:view(1, msize), ix, 0, 'torch.mode index')\n\n   local input = torch.Tensor({\n       {1, 2, 2, 2, 3, 2},\n       {1.5, 2, 2, 1.5, 1.5, 5},\n   })\n   local value, index = torch.mode(input)\n   local expected_value = torch.Tensor({{2}, {1.5}})\n   mytester:assertTensorEq(value, expected_value)\n\n   -- input unchanged\n   mytester:assertTensorEq(x, x0, 0, 'torch.mode modified input')\nend\n\n\nfunction torchtest.tril()\n   local x = torch.rand(msize,msize)\n   local mx = torch.tril(x)\n   local mxx = torch.Tensor()\n   torch.tril(mxx,x)\n   mytester:asserteq(maxdiff(mx,mxx),0,'torch.tril value')\nend\nfunction torchtest.triu()\n   local x = torch.rand(msize,msize)\n   local mx = torch.triu(x)\n   local mxx = torch.Tensor()\n   torch.triu(mxx,x)\n   mytester:asserteq(maxdiff(mx,mxx),0,'torch.tril value')\nend\nfunction torchtest.cat()\n   for dim = 1, 3 do\n      local x = torch.rand(13, msize, msize):transpose(1, dim)\n      local y = torch.rand(17, msize, msize):transpose(1, dim)\n      local mx = torch.cat(x, y, dim)\n      mytester:assertTensorEq(mx:narrow(dim, 1, 13), x, 0, 'torch.cat value')\n      mytester:assertTensorEq(mx:narrow(dim, 14, 17), y, 0, 'torch.cat value')\n\n      local mxx = torch.Tensor()\n      torch.cat(mxx, x, y, dim)\n      mytester:assertTensorEq(mx, mxx, 0, 'torch.cat value')\n\n      local x = torch.rand(1,2,3)\n      local y = torch.Tensor()\n      local mx = torch.cat(x,y,dim)\n      mytester:asserteq(mx:size(1),1,'torch.cat size')\n      mytester:asserteq(mx:size(2),2,'torch.cat size')\n      mytester:asserteq(mx:size(3),3,'torch.cat size')\n      mytester:assertTensorEq(mx, x, 0, 'torch.cat value')\n\n      local x = torch.Tensor()\n      local y = torch.Tensor()\n      local mx = torch.cat(x,y,dim)\n      mytester:asserteq(mx:dim(),0,'torch.cat dim')\n   end\n   local x = torch.Tensor()\n   local y = torch.rand(1,2,3)\n   local mx = torch.cat(x,y)\n   mytester:asserteq(mx:size(1),1,'torch.cat size')\n   mytester:asserteq(mx:size(2),2,'torch.cat size')\n   mytester:asserteq(mx:size(3),3,'torch.cat size')\n   mytester:assertTensorEq(mx, y, 0, 'torch.cat value')\n\n   local x = torch.Tensor()\n   local y = torch.Tensor()\n   local mx = torch.cat(x,y)\n   mytester:asserteq(mx:dim(),0,'torch.cat dim')\nend\nfunction torchtest.catArray()\n   for dim = 1, 3 do\n      local x = torch.rand(13, msize, msize):transpose(1, dim)\n      local y = torch.rand(17, msize, msize):transpose(1, dim)\n      local z = torch.rand(19, msize, msize):transpose(1, dim)\n\n      local mx = torch.cat({x, y, z}, dim)\n      mytester:assertTensorEq(mx:narrow(dim, 1, 13), x, 0, 'torch.cat value')\n      mytester:assertTensorEq(mx:narrow(dim, 14, 17), y, 0, 'torch.cat value')\n      mytester:assertTensorEq(mx:narrow(dim, 31, 19), z, 0, 'torch.cat value')\n\n      mytester:assertError(function() torch.cat{} end, 'torch.cat empty table')\n\n      local mxx = torch.Tensor()\n      torch.cat(mxx, {x, y, z}, dim)\n      mytester:assertTensorEq(mx, mxx, 0, 'torch.cat value')\n      torch.cat(mxx:float(), {x:float(), y:float(), z:float()}, dim)\n      mytester:assertTensorEq(mx, mxx, 0, 'torch.cat value')\n      torch.cat(mxx:double(), {x:double(), y:double(), z:double()}, dim)\n      mytester:assertTensorEq(mx, mxx, 0, 'torch.cat value')\n\n      local x = torch.rand(1,2,3)\n      local y = torch.Tensor()\n      local mx = torch.cat({x,y},dim)\n      mytester:asserteq(mx:size(1),1,'torch.cat size')\n      mytester:asserteq(mx:size(2),2,'torch.cat size')\n      mytester:asserteq(mx:size(3),3,'torch.cat size')\n      mytester:assertTensorEq(mx, x, 0, 'torch.cat value')\n\n      local x = torch.Tensor()\n      local y = torch.Tensor()\n      local mx = torch.cat({x,y},dim)\n      mytester:asserteq(mx:dim(),0,'torch.cat dim')\n   end\n   local x = torch.Tensor()\n   local y = torch.rand(1,2,3)\n   local mx = torch.cat({x,y})\n   mytester:asserteq(mx:size(1),1,'torch.cat size')\n   mytester:asserteq(mx:size(2),2,'torch.cat size')\n   mytester:asserteq(mx:size(3),3,'torch.cat size')\n   mytester:assertTensorEq(mx, y, 0, 'torch.cat value')\n\n   local x = torch.Tensor()\n   local y = torch.Tensor()\n   local mx = torch.cat({x,y})\n   mytester:asserteq(mx:dim(),0,'torch.cat dim')\nend\nfunction torchtest.catNoDim()\n   local a\n   local b\n   local c\n\n   a = torch.Tensor(msize):uniform()\n   b = torch.Tensor(msize):uniform()\n   c = torch.cat(a, b)\n   mytester:assertTensorEq(c:narrow(1, 1, msize), a, 0, 'torch.cat value')\n   mytester:assertTensorEq(c:narrow(1, msize + 1, msize), b, 0, 'torch.cat value')\n\n   a = torch.Tensor(1, msize):uniform()\n   b = torch.Tensor(1, msize):uniform()\n   c = torch.cat(a, b)\n   mytester:assertTensorEq(c:narrow(2, 1, msize), a, 0, 'torch.cat value')\n   mytester:assertTensorEq(c:narrow(2, msize + 1, msize), b, 0, 'torch.cat value')\n\n   a = torch.Tensor(10, msize):uniform()\n   b = torch.Tensor(10, msize):uniform()\n   c = torch.cat(a, b)\n   mytester:assertTensorEq(c:narrow(2, 1, msize), a, 0, 'torch.cat value')\n   mytester:assertTensorEq(c:narrow(2, msize + 1, msize), b, 0, 'torch.cat value')\nend\nfunction torchtest.sin_2()\n   local x = torch.rand(msize,msize,msize)\n   local mx = torch.sin(x)\n   local mxx  = torch.Tensor()\n   torch.sin(mxx,x)\n   mytester:asserteq(maxdiff(mx,mxx),0,'torch.sin value')\nend\nfunction torchtest.linspace()\n   local from = math.random()\n   local to = from+math.random()\n   local mx = torch.linspace(from,to,137)\n   local mxx = torch.Tensor()\n   torch.linspace(mxx,from,to,137)\n   mytester:asserteq(maxdiff(mx,mxx),0,'torch.linspace value')\n   mytester:assertError(function() torch.linspace(0,1,1) end, 'accepted 1 point between 2 distinct endpoints')\n   mytester:assertTensorEq(torch.linspace(0,0,1),torch.zeros(1),1e-16, 'failed to generate for torch.linspace(0,0,1)')\n\n   -- Check linspace for generating with start > end.\n   mytester:assertTensorEq(torch.linspace(2,0,3),\n                           torch.Tensor{2,1,0},\n                           1e-16,\n                           'failed to generate for torch.linspace(2,0,3)')\n\n   -- Check linspace for non-contiguous tensors.\n   local x = torch.zeros(2, 3)\n   local y = x:narrow(2, 2, 2)\n   y:linspace(0, 3, 4)\n   mytester:assertTensorEq(x, torch.Tensor{{0, 0, 1}, {0, 2, 3}}, 1e-16,\n                           'non-contiguous linspace failed')\nend\nfunction torchtest.logspace()\n   local from = math.random()\n   local to = from+math.random()\n   local mx = torch.logspace(from,to,137)\n   local mxx = torch.Tensor()\n   torch.logspace(mxx,from,to,137)\n   mytester:asserteq(maxdiff(mx,mxx),0,'torch.logspace value')\n   mytester:assertError(function() torch.logspace(0,1,1) end, 'accepted 1 point between 2 distinct endpoints')\n   mytester:assertTensorEq(torch.logspace(0,0,1),torch.ones(1),1e-16, 'failed to generate for torch.linspace(0,0,1)')\n\n   -- Check logspace for generating with start > end.\n   mytester:assertTensorEq(torch.logspace(1,0,2),\n                           torch.Tensor{10, 1},\n                           1e-16,\n                           'failed to generate for torch.logspace(1,0,2)')\n\n   -- Check logspace for non-contiguous tensors.\n   local x = torch.zeros(2, 3)\n   local y = x:narrow(2, 2, 2)\n   y:logspace(0, 3, 4)\n   mytester:assertTensorEq(x, torch.Tensor{{0, 1, 10}, {0, 100, 1000}}, 1e-16,\n                           'non-contiguous logspace failed')\nend\nfunction torchtest.rand()\n   torch.manualSeed(123456)\n   local mx = torch.rand(msize,msize)\n   local mxx = torch.Tensor()\n   torch.manualSeed(123456)\n   torch.rand(mxx,msize,msize)\n   mytester:asserteq(maxdiff(mx,mxx),0,'torch.rand value')\nend\nfunction torchtest.randn()\n   torch.manualSeed(123456)\n   local mx = torch.randn(msize,msize)\n   local mxx = torch.Tensor()\n   torch.manualSeed(123456)\n   torch.randn(mxx,msize,msize)\n   mytester:asserteq(maxdiff(mx,mxx),0,'torch.randn value')\nend\nfunction torchtest.gesv()\n   if not torch.gesv then return end\n   local a=torch.Tensor({{6.80, -2.11,  5.66,  5.97,  8.23},\n                         {-6.05, -3.30,  5.36, -4.44,  1.08},\n                         {-0.45,  2.58, -2.70,  0.27,  9.04},\n                         {8.32,  2.71,  4.35, -7.17,  2.14},\n                         {-9.67, -5.14, -7.26,  6.08, -6.87}}):t()\n   local b=torch.Tensor({{4.02,  6.19, -8.22, -7.57, -3.03},\n                         {-1.56,  4.00, -8.67,  1.75,  2.86},\n                         {9.81, -4.09, -4.57, -8.61,  8.99}}):t()\n   local mx = torch.gesv(b,a)\n   mytester:assertlt(b:dist(a*mx),1e-12,'torch.gesv')\n   local ta = torch.Tensor()\n   local tb = torch.Tensor()\n   local mxx = torch.gesv(tb,ta,b,a)\n   local mxxx = torch.gesv(b,a,b,a)\n   mytester:asserteq(maxdiff(mx,tb),0,'torch.gesv value temp')\n   mytester:asserteq(maxdiff(mx,b),0,'torch.gesv value flag')\n   mytester:asserteq(maxdiff(mx,mxx),0,'torch.gesv value out1')\n   mytester:asserteq(maxdiff(mx,mxxx),0,'torch.gesv value out2')\nend\nfunction torchtest.gesv_reuse()\n   if not torch.gesv then return end\n   local a=torch.Tensor({{6.80, -2.11,  5.66,  5.97,  8.23},\n                         {-6.05, -3.30,  5.36, -4.44,  1.08},\n                         {-0.45,  2.58, -2.70,  0.27,  9.04},\n                         {8.32,  2.71,  4.35, -7.17,  2.14},\n                         {-9.67, -5.14, -7.26,  6.08, -6.87}}):t()\n   local b=torch.Tensor({{4.02,  6.19, -8.22, -7.57, -3.03},\n                         {-1.56,  4.00, -8.67,  1.75,  2.86},\n                         {9.81, -4.09, -4.57, -8.61,  8.99}}):t()\n   local mx = torch.gesv(b,a)\n   local ta = torch.Tensor()\n   local tb = torch.Tensor()\n   torch.gesv(tb,ta,b,a)\n   mytester:asserteq(maxdiff(mx,tb),0,'torch.gesv value temp')\n   torch.gesv(tb,ta,b,a)\n   mytester:asserteq(maxdiff(mx,tb),0,'torch.gesv value reuse')\nend\nfunction torchtest.trtrs()\n   if not torch.trtrs then return end\n   local a=torch.Tensor({{6.80, -2.11,  5.66,  5.97,  8.23},\n                         {-6.05, -3.30,  5.36, -4.44,  1.08},\n                         {-0.45,  2.58, -2.70,  0.27,  9.04},\n                         {8.32,  2.71,  4.35, -7.17,  2.14},\n                         {-9.67, -5.14, -7.26,  6.08, -6.87}}):t()\n   local b=torch.Tensor({{4.02,  6.19, -8.22, -7.57, -3.03},\n                         {-1.56,  4.00, -8.67,  1.75,  2.86},\n                         {9.81, -4.09, -4.57, -8.61,  8.99}}):t()\n\n   local U = torch.triu(a)\n   local L = torch.tril(a)\n\n   -- solve Ux = b\n   local x = torch.trtrs(b, U)\n   mytester:assertlt(b:dist(U*x),1e-12,'torch.trtrs')\n   x = torch.trtrs(b, U, 'U', 'N', 'N')\n   mytester:assertlt(b:dist(U*x),1e-12,'torch.trtrs')\n\n   -- solve Lx = b\n   x = torch.trtrs(b, L, 'L')\n   mytester:assertlt(b:dist(L*x),1e-12,'torch.trtrs')\n   x = torch.trtrs(b, L, 'L', 'N', 'N')\n   mytester:assertlt(b:dist(L*x),1e-12,'torch.trtrs')\n\n   -- solve U'x = b\n   x = torch.trtrs(b, U, 'U', 'T')\n   mytester:assertlt(b:dist(U:t()*x),1e-12,'torch.trtrs')\n   x = torch.trtrs(b, U, 'U', 'T', 'N')\n   mytester:assertlt(b:dist(U:t()*x),1e-12,'torch.trtrs')\n\n   -- solve U'x = b by manual transposition\n   y = torch.trtrs(b, U:t(), 'L', 'N')\n   mytester:assertlt(x:dist(y),1e-12,'torch.trtrs')\n\n   -- solve L'x = b\n   x = torch.trtrs(b, L, 'L', 'T')\n   mytester:assertlt(b:dist(L:t()*x),1e-12,'torch.trtrs')\n   x = torch.trtrs(b, L, 'L', 'T', 'N')\n   mytester:assertlt(b:dist(L:t()*x),1e-12,'torch.trtrs')\n\n   -- solve L'x = b by manual transposition\n   y = torch.trtrs(b, L:t(), 'U', 'N')\n   mytester:assertlt(x:dist(y),1e-12,'torch.trtrs')\nend\nfunction torchtest.trtrs_reuse()\n   if not torch.trtrs then return end\n   local a=torch.Tensor({{6.80, -2.11,  5.66,  5.97,  8.23},\n                         {-6.05, -3.30,  5.36, -4.44,  1.08},\n                         {-0.45,  2.58, -2.70,  0.27,  9.04},\n                         {8.32,  2.71,  4.35, -7.17,  2.14},\n                         {-9.67, -5.14, -7.26,  6.08, -6.87}}):t()\n   local b=torch.Tensor({{4.02,  6.19, -8.22, -7.57, -3.03},\n                         {-1.56,  4.00, -8.67,  1.75,  2.86},\n                         {9.81, -4.09, -4.57, -8.61,  8.99}}):t()\n   local mx = torch.trtrs(b,a)\n   local ta = torch.Tensor()\n   local tb = torch.Tensor()\n   torch.trtrs(tb,ta,b,a)\n   mytester:asserteq(maxdiff(mx,tb),0,'torch.trtrs value temp')\n   tb:zero()\n   torch.trtrs(tb,ta,b,a)\n   mytester:asserteq(maxdiff(mx,tb),0,'torch.trtrs value reuse')\nend\nfunction torchtest.gels_uniquely_determined()\n   if not torch.gels then return end\n   local expectedNorm = 0\n   local a=torch.Tensor({{ 1.44, -9.96, -7.55,  8.34},\n                         {-7.84, -0.28,  3.24,  8.09},\n                         {-4.39, -3.24,  6.27,  5.28},\n                         {4.53,  3.83, -6.64,  2.06}}):t()\n   local b=torch.Tensor({{8.58,  8.26,  8.48, -5.28},\n                         {9.35, -4.43, -0.70, -0.26}}):t()\n   local a_copy = a:clone()\n   local b_copy = b:clone()\n   local mx = torch.gels(b,a)\n   mytester:asserteq(maxdiff(a,a_copy),0,'torch.gels changed a')\n   mytester:asserteq(maxdiff(b,b_copy),0,'torch.gels changed b')\n   mytester:assertalmosteq((torch.mm(a,mx)-b):norm(), expectedNorm, 1e-8, 'torch.gels wrong answer')\n\n   local ta = torch.Tensor()\n   local tb = torch.Tensor()\n   local mxx = torch.gels(tb,ta,b,a)\n   mytester:asserteq(maxdiff(a,a_copy),0,'torch.gels changed a')\n   mytester:asserteq(maxdiff(b,b_copy),0,'torch.gels changed b')\n   mytester:assertalmosteq((torch.mm(a,tb)-b):norm(), expectedNorm, 1e-8, 'torch.gels wrong answer')\n\n   local mxxx = torch.gels(b,a,b,a)\n   mytester:assertalmosteq((torch.mm(a_copy,b)-b_copy):norm(), expectedNorm, 1e-8, 'torch.gels wrong answer')\n   mytester:asserteq(maxdiff(mx,tb),0,'torch.gels value temp')\n   mytester:asserteq(maxdiff(mx,b),0,'torch.gels value flag')\n   mytester:asserteq(maxdiff(mx,mxx),0,'torch.gels value out1')\n   mytester:asserteq(maxdiff(mx,mxxx),0,'torch.gels value out2')\nend\nfunction torchtest.gels_reuse()\n   if not torch.gels then return end\n   local expectedNorm = 0\n   local a=torch.Tensor({{ 1.44, -9.96, -7.55,  8.34},\n                         {-7.84, -0.28,  3.24,  8.09},\n                         {-4.39, -3.24,  6.27,  5.28},\n                         {4.53,  3.83, -6.64,  2.06}}):t()\n   local b=torch.Tensor({{8.58,  8.26,  8.48, -5.28},\n                         {9.35, -4.43, -0.70, -0.26}}):t()\n   local ta = torch.Tensor()\n   local tb = torch.Tensor()\n   torch.gels(tb,ta,b,a)\n   mytester:assertalmosteq((torch.mm(a,tb)-b):norm(), expectedNorm, 1e-8, 'torch.gels wrong answer')\n   torch.gels(tb,ta,b,a)\n   mytester:assertalmosteq((torch.mm(a,tb)-b):norm(), expectedNorm, 1e-8, 'torch.gels wrong answer')\n   torch.gels(tb,ta,b,a)\n   mytester:assertalmosteq((torch.mm(a,tb)-b):norm(), expectedNorm, 1e-8, 'torch.gels wrong answer')\nend\nfunction torchtest.gels_overdetermined()\n   if not torch.gels then return end\n   local expectedNorm = 17.390200628863\n   local a=torch.Tensor({{ 1.44, -9.96, -7.55,  8.34,  7.08, -5.45},\n                         {-7.84, -0.28,  3.24,  8.09,  2.52, -5.70},\n                         {-4.39, -3.24,  6.27,  5.28,  0.74, -1.19},\n                         {4.53,  3.83, -6.64,  2.06, -2.47,  4.70}}):t()\n   local b=torch.Tensor({{8.58,  8.26,  8.48, -5.28,  5.72,  8.93},\n                         {9.35, -4.43, -0.70, -0.26, -7.36, -2.52}}):t()\n   local a_copy = a:clone()\n   local b_copy = b:clone()\n   local mx = torch.gels(b,a)\n   mytester:asserteq(maxdiff(a,a_copy),0,'torch.gels changed a')\n   mytester:asserteq(maxdiff(b,b_copy),0,'torch.gels changed b')\n   mytester:assertalmosteq((torch.mm(a, mx)-b):norm(), expectedNorm, 1e-8, 'torch.gels wrong answer')\n\n   local ta = torch.Tensor()\n   local tb = torch.Tensor()\n   local mxx = torch.gels(tb,ta,b,a)\n   mytester:asserteq(maxdiff(a,a_copy),0,'torch.gels changed a')\n   mytester:asserteq(maxdiff(b,b_copy),0,'torch.gels changed b')\n   mytester:assertalmosteq((torch.mm(a,tb)-b):norm(), expectedNorm, 1e-8, 'torch.gels wrong answer')\n\n   local mxxx = torch.gels(b,a,b,a)\n   mytester:assertalmosteq((torch.mm(a_copy,b)-b_copy):norm(), expectedNorm, 1e-8, 'torch.gels wrong answer')\n   mytester:asserteq(maxdiff(mx,tb),0,'torch.gels value temp')\n   mytester:asserteq(maxdiff(mx,b),0,'torch.gels value flag')\n   mytester:asserteq(maxdiff(mx,mxx),0,'torch.gels value out1')\n   mytester:asserteq(maxdiff(mx,mxxx),0,'torch.gels value out2')\nend\nfunction torchtest.gels_underdetermined()\n   if not torch.gels then return end\n   local expectedNorm = 0\n   local a=torch.Tensor({{ 1.44, -9.96, -7.55},\n                         {-7.84, -0.28,  3.24},\n                         {-4.39, -3.24,  6.27},\n                         {4.53,  3.83, -6.64}}):t()\n   local b=torch.Tensor({{8.58,  8.26,  8.48},\n                         {9.35, -4.43, -0.70}}):t()\n\n   local a_copy = a:clone()\n   local b_copy = b:clone()\n   local mx = torch.gels(b,a)\n   mytester:asserteq(maxdiff(a,a_copy),0,'torch.gels changed a')\n   mytester:asserteq(maxdiff(b,b_copy),0,'torch.gels changed b')\n   mytester:assertalmosteq((torch.mm(a,mx)-b):norm(), expectedNorm, 1e-8, 'torch.gels wrong answer')\n\n   local ta = torch.Tensor()\n   local tb = torch.Tensor()\n   local mxx = torch.gels(tb,ta,b,a)\n   mytester:asserteq(maxdiff(a,a_copy),0,'torch.gels changed a')\n   mytester:asserteq(maxdiff(b,b_copy),0,'torch.gels changed b')\n   mytester:assertalmosteq((torch.mm(a,tb)-b):norm(), expectedNorm, 1e-8, 'torch.gels wrong answer')\n\n   local mxxx = torch.gels(b,a,b,a)\n   mytester:assertalmosteq((torch.mm(a_copy,b)-b_copy):norm(), expectedNorm, 1e-8, 'torch.gels wrong answer')\n   mytester:asserteq(maxdiff(mx,tb),0,'torch.gels value temp')\n   mytester:asserteq(maxdiff(mx,b),0,'torch.gels value flag')\n   mytester:asserteq(maxdiff(mx,mxx),0,'torch.gels value out1')\n   mytester:asserteq(maxdiff(mx,mxxx),0,'torch.gels value out2')\nend\nfunction torchtest.eig()\n   if not torch.eig then return end\n   local a=torch.Tensor({{ 1.96,  0.00,  0.00,  0.00,  0.00},\n                         {-6.49,  3.80,  0.00,  0.00,  0.00},\n                         {-0.47, -6.39,  4.17,  0.00,  0.00},\n                         {-7.20,  1.50, -1.51,  5.70,  0.00},\n                         {-0.65, -6.34,  2.67,  1.80, -7.10}}):t():clone()\n   local e = torch.eig(a)\n   local ee,vv = torch.eig(a,'V')\n   local te = torch.Tensor()\n   local tv = torch.Tensor()\n   local eee,vvv = torch.eig(te,tv,a,'V')\n   mytester:assertlt(maxdiff(e,ee),1e-12,'torch.eig value')\n   mytester:assertlt(maxdiff(ee,eee),1e-12,'torch.eig value')\n   mytester:assertlt(maxdiff(ee,te),1e-12,'torch.eig value')\n   mytester:assertlt(maxdiff(vv,vvv),1e-12,'torch.eig value')\n   mytester:assertlt(maxdiff(vv,tv),1e-12,'torch.eig value')\nend\nfunction torchtest.eig_reuse()\n   if not torch.eig then return end\n   local X = torch.randn(4,4)\n   X = X:t()*X\n   local e, v = torch.zeros(4,2), torch.zeros(4,4)\n   torch.eig(e, v, X,'V')\n   local Xhat = v * torch.diag(e:select(2,1)) * v:t()\n   mytester:assertTensorEq(X, Xhat, 1e-8, 'VeV\\' wrong')\n   mytester:assert(not v:isContiguous(), 'V is contiguous')\n\n   torch.eig(e, v, X, 'V')\n   local Xhat = torch.mm(v, torch.mm(e:select(2,1):diag(), v:t()))\n   mytester:assertTensorEq(X, Xhat, 1e-8, 'VeV\\' wrong')\n   mytester:assert(not v:isContiguous(), 'V is contiguous')\nend\nfunction torchtest.eig_noncontig()\n   if not torch.eig then return end\n   local X = torch.randn(4,4)\n   X = X:t()*X\n   local e = torch.zeros(4,2,2)[{ {}, 2, {} }]\n   local v = torch.zeros(4,2,4)[{ {}, 2, {} }]\n   mytester:assert(not v:isContiguous(), 'V is contiguous')\n   mytester:assert(not e:isContiguous(), 'E is contiguous')\n   torch.eig(e, v, X,'V')\n   local Xhat = v * torch.diag(e:select(2,1)) * v:t()\n   mytester:assertTensorEq(X, Xhat, 1e-8, 'VeV\\' wrong')\nend\nfunction torchtest.test_symeig()\n  if not torch.symeig then return end\n  local xval = torch.rand(100,3)\n  local cov = torch.mm(xval:t(), xval)\n  local rese = torch.zeros(3)\n  local resv = torch.zeros(3,3)\n\n  -- First call to symeig\n  mytester:assert(resv:isContiguous(), 'resv is not contiguous') -- PASS\n  torch.symeig(rese, resv, cov:clone(), 'V')\n  local ahat = resv*torch.diag(rese)*resv:t()\n  mytester:assertTensorEq(cov, ahat, 1e-8, 'VeV\\' wrong') -- PASS\n\n  -- Second call to symeig\n  mytester:assert(not resv:isContiguous(), 'resv is contiguous') -- FAIL\n  torch.symeig(rese, resv, cov:clone(), 'V')\n  local ahat = torch.mm(torch.mm(resv, torch.diag(rese)), resv:t())\n  mytester:assertTensorEq(cov, ahat, 1e-8, 'VeV\\' wrong') -- FAIL\nend\nfunction  torchtest.symeig_noncontig()\n  if not torch.symeig then return end\n   local X = torch.rand(5,5)\n   X = X:t()*X\n   local e = torch.zeros(4,2):select(2,2)\n   local v = torch.zeros(4,2,4)[{ {}, 2, {} }]\n   mytester:assert(not v:isContiguous(), 'V is contiguous')\n   mytester:assert(not e:isContiguous(), 'E is contiguous')\n   torch.symeig(e, v, X,'V')\n   local Xhat = v * torch.diag(e) * v:t()\n   mytester:assertTensorEq(X, Xhat, 1e-8, 'VeV\\' wrong')\nend\nfunction torchtest.svd()\n   if not torch.svd then return end\n   local a=torch.Tensor({{8.79,  6.11, -9.15,  9.57, -3.49,  9.84},\n                         {9.93,  6.91, -7.93,  1.64,  4.02,  0.15},\n                         {9.83,  5.04,  4.86,  8.83,  9.80, -8.99},\n                         {5.45, -0.27,  4.85,  0.74, 10.00, -6.02},\n                         {3.16,  7.98,  3.01,  5.80,  4.27, -5.31}}):t():clone()\n   local u,s,v = torch.svd(a)\n   local uu = torch.Tensor()\n   local ss = torch.Tensor()\n   local vv = torch.Tensor()\n   local uuu,sss,vvv = torch.svd(uu,ss,vv,a)\n   mytester:asserteq(maxdiff(u,uu),0,'torch.svd')\n   mytester:asserteq(maxdiff(u,uuu),0,'torch.svd')\n   mytester:asserteq(maxdiff(s,ss),0,'torch.svd')\n   mytester:asserteq(maxdiff(s,sss),0,'torch.svd')\n   mytester:asserteq(maxdiff(v,vv),0,'torch.svd')\n   mytester:asserteq(maxdiff(v,vvv),0,'torch.svd')\nend\nfunction torchtest.svd_reuse()\n   if not torch.svd then return end\n   local X = torch.randn(4,4)\n   local U, S, V = torch.svd(X)\n   local Xhat = torch.mm(U, torch.mm(S:diag(), V:t()))\n   mytester:assertTensorEq(X, Xhat, 1e-8, 'USV\\' wrong')\n\n   mytester:assert(not U:isContiguous(), 'U is contiguous')\n   torch.svd(U, S, V, X)\n   local Xhat = torch.mm(U, torch.mm(S:diag(), V:t()))\n   mytester:assertTensorEq(X, Xhat, 1e-8, 'USV\\' wrong')\nend\nfunction torchtest.svd_noncontig()\n   if not torch.svd then return end\n   local X = torch.randn(5,5)\n   local U = torch.zeros(5,2,5)[{ {}, 2, {} }]\n   local S = torch.zeros(5,2)[{ {}, 2 }]\n   local V = torch.zeros(5,2,5)[{ {}, 2, {} }]\n\n   mytester:assert(not U:isContiguous(), 'U is contiguous')\n   mytester:assert(not S:isContiguous(), 'S is contiguous')\n   mytester:assert(not V:isContiguous(), 'V is contiguous')\n   torch.svd(U, S, V, X)\n   local Xhat = torch.mm(U, torch.mm(S:diag(), V:t()))\n   mytester:assertTensorEq(X, Xhat, 1e-8, 'USV\\' wrong')\nend\nfunction torchtest.inverse()\n   if not torch.inverse then return end\n   local M = torch.randn(5,5)\n   local MI = torch.inverse(M)\n   local E = torch.eye(5)\n   mytester:assert(not MI:isContiguous(), 'MI is contiguous')\n   mytester:assertalmosteq(maxdiff(E,torch.mm(M,MI)), 0, 1e-8, 'inverse value')\n   mytester:assertalmosteq(maxdiff(E,torch.mm(MI,M)), 0, 1e-8, 'inverse value')\n\n   local MII = torch.Tensor(5,5)\n   torch.inverse(MII, M)\n   mytester:assert(not MII:isContiguous(), 'MII is contiguous')\n   mytester:asserteq(maxdiff(MII, MI), 0, 'inverse value in-place')\n   -- second call, now that MII is transposed\n   torch.inverse(MII, M)\n   mytester:assert(not MII:isContiguous(), 'MII is contiguous')\n   mytester:asserteq(maxdiff(MII, MI), 0, 'inverse value in-place')\nend\nfunction torchtest.conv2()\n   local x = torch.rand(math.floor(torch.uniform(50,100)),math.floor(torch.uniform(50,100)))\n   local k = torch.rand(math.floor(torch.uniform(10,20)),math.floor(torch.uniform(10,20)))\n   local imvc = torch.conv2(x,k)\n   local imvc2 = torch.conv2(x,k,'V')\n   local imfc = torch.conv2(x,k,'F')\n\n   local ki = k:clone();\n   local ks = k:storage()\n   local kis = ki:storage()\n   for i=ks:size(),1,-1 do kis[ks:size()-i+1]=ks[i] end\n   local imvx = torch.xcorr2(x,ki)\n   local imvx2 = torch.xcorr2(x,ki,'V')\n   local imfx = torch.xcorr2(x,ki,'F')\n\n   mytester:asserteq(maxdiff(imvc,imvc2),0,'torch.conv2')\n   mytester:asserteq(maxdiff(imvc,imvx),0,'torch.conv2')\n   mytester:asserteq(maxdiff(imvc,imvx2),0,'torch.conv2')\n   mytester:asserteq(maxdiff(imfc,imfx),0,'torch.conv2')\n   mytester:assertlt(math.abs(x:dot(x)-torch.xcorr2(x,x)[1][1]),1e-10,'torch.conv2')\n\n   local xx = torch.Tensor(2,x:size(1),x:size(2))\n   xx[1]:copy(x)\n   xx[2]:copy(x)\n   local kk = torch.Tensor(2,k:size(1),k:size(2))\n   kk[1]:copy(k)\n   kk[2]:copy(k)\n\n   local immvc = torch.conv2(xx,kk)\n   local immvc2 = torch.conv2(xx,kk,'V')\n   local immfc = torch.conv2(xx,kk,'F')\n\n   mytester:asserteq(maxdiff(immvc[1],immvc[2]),0,'torch.conv2')\n   mytester:asserteq(maxdiff(immvc[1],imvc),0,'torch.conv2')\n   mytester:asserteq(maxdiff(immvc2[1],imvc2),0,'torch.conv2')\n   mytester:asserteq(maxdiff(immfc[1],immfc[2]),0,'torch.conv2')\n   mytester:asserteq(maxdiff(immfc[1],imfc),0,'torch.conv2')\nend\n\nfunction torchtest.conv3()\n   local x = torch.rand(math.floor(torch.uniform(20,40)),\n                        math.floor(torch.uniform(20,40)),\n                        math.floor(torch.uniform(20,40)))\n   local k = torch.rand(math.floor(torch.uniform(5,10)),\n                        math.floor(torch.uniform(5,10)),\n                        math.floor(torch.uniform(5,10)))\n   local imvc = torch.conv3(x,k)\n   local imvc2 = torch.conv3(x,k,'V')\n   local imfc = torch.conv3(x,k,'F')\n\n   local ki = k:clone();\n   local ks = k:storage()\n   local kis = ki:storage()\n   for i=ks:size(),1,-1 do kis[ks:size()-i+1]=ks[i] end\n   local imvx = torch.xcorr3(x,ki)\n   local imvx2 = torch.xcorr3(x,ki,'V')\n   local imfx = torch.xcorr3(x,ki,'F')\n\n   mytester:asserteq(maxdiff(imvc,imvc2),0,'torch.conv3')\n   mytester:asserteq(maxdiff(imvc,imvx),0,'torch.conv3')\n   mytester:asserteq(maxdiff(imvc,imvx2),0,'torch.conv3')\n   mytester:asserteq(maxdiff(imfc,imfx),0,'torch.conv3')\n   mytester:assertlt(math.abs(x:dot(x)-torch.xcorr3(x,x)[1][1][1]),4*1e-10,'torch.conv3')\n\n   local xx = torch.Tensor(2,x:size(1),x:size(2),x:size(3))\n   xx[1]:copy(x)\n   xx[2]:copy(x)\n   local kk = torch.Tensor(2,k:size(1),k:size(2),k:size(3))\n   kk[1]:copy(k)\n   kk[2]:copy(k)\n\n   local immvc = torch.conv3(xx,kk)\n   local immvc2 = torch.conv3(xx,kk,'V')\n   local immfc = torch.conv3(xx,kk,'F')\n\n   mytester:asserteq(maxdiff(immvc[1],immvc[2]),0,'torch.conv3')\n   mytester:asserteq(maxdiff(immvc[1],imvc),0,'torch.conv3')\n   mytester:asserteq(maxdiff(immvc2[1],imvc2),0,'torch.conv3')\n   mytester:asserteq(maxdiff(immfc[1],immfc[2]),0,'torch.conv3')\n   mytester:asserteq(maxdiff(immfc[1],imfc),0,'torch.conv3')\nend\n\nfunction torchtest.xcorr3_xcorr2_eq()\n    local ix = math.floor(torch.uniform(20,40))\n    local iy = math.floor(torch.uniform(20,40))\n    local iz = math.floor(torch.uniform(20,40))\n    local kx = math.floor(torch.uniform(5,10))\n    local ky = math.floor(torch.uniform(5,10))\n    local kz = math.floor(torch.uniform(5,10))\n\n    local x = torch.rand(ix,iy,iz)\n    local k = torch.rand(kx,ky,kz)\n\n    local o3 = torch.xcorr3(x,k)\n    local o32 = torch.zeros(o3:size())\n\n    for i=1,o3:size(1) do\n        for j=1,k:size(1) do\n            o32[i]:add(torch.xcorr2(x[i+j-1],k[j]))\n        end\n    end\n\n    mytester:assertlt(maxdiff(o3,o32),precision,'torch.conv3_conv2_eq')\nend\n\nfunction torchtest.fxcorr3_fxcorr2_eq()\n    local ix = math.floor(torch.uniform(20,40))\n    local iy = math.floor(torch.uniform(20,40))\n    local iz = math.floor(torch.uniform(20,40))\n    local kx = math.floor(torch.uniform(5,10))\n    local ky = math.floor(torch.uniform(5,10))\n    local kz = math.floor(torch.uniform(5,10))\n\n    local x = torch.rand(ix,iy,iz)\n    local k = torch.rand(kx,ky,kz)\n\n    local o3 = torch.xcorr3(x,k,'F')\n\n    local o32 = torch.zeros(o3:size())\n\n    for i=1,x:size(1) do\n        for j=1,k:size(1) do\n            o32[i+j-1]:add(torch.xcorr2(x[i],k[k:size(1)-j + 1],'F'))\n        end\n    end\n\n    mytester:assertlt(maxdiff(o3,o32),precision,'torch.conv3_conv2_eq')\nend\n\nfunction torchtest.conv3_conv2_eq()\n    local ix = math.floor(torch.uniform(20,40))\n    local iy = math.floor(torch.uniform(20,40))\n    local iz = math.floor(torch.uniform(20,40))\n    local kx = math.floor(torch.uniform(5,10))\n    local ky = math.floor(torch.uniform(5,10))\n    local kz = math.floor(torch.uniform(5,10))\n\n    local x = torch.rand(ix,iy,iz)\n    local k = torch.rand(kx,ky,kz)\n\n    local o3 = torch.conv3(x,k)\n    local o32 = torch.zeros(o3:size())\n\n    for i=1,o3:size(1) do\n        for j=1,k:size(1) do\n            o32[i]:add(torch.conv2(x[i+j-1],k[k:size(1)-j+1]))\n        end\n    end\n\n    mytester:assertlt(maxdiff(o3,o32),precision,'torch.conv3_conv2_eq')\nend\n\nfunction torchtest.fconv3_fconv2_eq()\n    local ix = math.floor(torch.uniform(20,40))\n    local iy = math.floor(torch.uniform(20,40))\n    local iz = math.floor(torch.uniform(20,40))\n    local kx = math.floor(torch.uniform(5,10))\n    local ky = math.floor(torch.uniform(5,10))\n    local kz = math.floor(torch.uniform(5,10))\n\n    local x = torch.rand(ix,iy,iz)\n    local k = torch.rand(kx,ky,kz)\n\n    local o3 = torch.conv3(x,k,'F')\n\n    local o32 = torch.zeros(o3:size())\n\n    for i=1,x:size(1) do\n        for j=1,k:size(1) do\n            o32[i+j-1]:add(torch.conv2(x[i],k[j],'F'))\n        end\n    end\n\n    mytester:assertlt(maxdiff(o3,o32),precision,'torch.conv3_conv2_eq')\nend\n\nfunction torchtest.logical()\n   local x = torch.rand(100,100)*2-1;\n   local xx = x:clone()\n\n   local xgt = torch.gt(x,1)\n   local xlt = torch.lt(x,1)\n\n   local xeq = torch.eq(x,1)\n   local xne = torch.ne(x,1)\n\n   local neqs = xgt+xlt\n   local all = neqs + xeq\n   mytester:asserteq(neqs:sum(), xne:sum(), 'torch.logical')\n   mytester:asserteq(x:nElement(),all:double():sum() , 'torch.logical')\nend\n\nfunction torchtest.RNGState()\n   local state = torch.getRNGState()\n   local stateCloned = state:clone()\n   local before = torch.rand(1000)\n\n   mytester:assert(state:ne(stateCloned):long():sum() == 0, 'getRNGState should have value semantics, but appears to have reference semantics')\n\n   torch.setRNGState(state)\n   local after = torch.rand(1000)\n   mytester:assertTensorEq(before, after, 1e-16, 'getRNGState/setRNGState not generating same sequence')\nend\n\nfunction torchtest.RNGStateAliasing()\n    torch.manualSeed(1)\n    local unused = torch.uniform()\n\n    -- Fork the random number stream at this point\n    local gen = torch.Generator()\n    torch.setRNGState(gen, torch.getRNGState())\n\n    local target_value = torch.rand(1000)\n    --Dramatically alter the internal state of the main generator\n    local also_unused = torch.rand(100000)\n    local forked_value = torch.rand(gen, 1000)\n    mytester:assertTensorEq(target_value, forked_value, 1e-16, \"RNG has not forked correctly.\")\nend\n\nfunction torchtest.serializeGenerator()\n   local generator = torch.Generator()\n   torch.manualSeed(generator, 123)\n   local differentGenerator = torch.Generator()\n   torch.manualSeed(differentGenerator, 124)\n   local serializedGenerator = torch.serialize(generator)\n   local deserializedGenerator = torch.deserialize(serializedGenerator)\n   local generated = torch.random(generator)\n   local differentGenerated = torch.random(differentGenerator)\n   local deserializedGenerated = torch.random(deserializedGenerator)\n   mytester:asserteq(generated, deserializedGenerated, 'torch.Generator changed internal state after being serialized')\n   mytester:assertne(generated, differentGenerated, 'Generators with different random seed should not produce the same output')\nend\n\nfunction torchtest.testBoxMullerState()\n    torch.manualSeed(123)\n    local odd_number = 101\n    local seeded = torch.randn(odd_number)\n    local state = torch.getRNGState()\n    local midstream = torch.randn(odd_number)\n    torch.setRNGState(state)\n    local repeat_midstream = torch.randn(odd_number)\n    torch.manualSeed(123)\n    local reseeded = torch.randn(odd_number)\n    mytester:assertTensorEq(midstream, repeat_midstream, 1e-16, 'getRNGState/setRNGState not generating same sequence of normally distributed numbers')\n    mytester:assertTensorEq(seeded, reseeded, 1e-16, 'repeated calls to manualSeed not generating same sequence of normally distributed numbers')\nend\n\nfunction torchtest.testCholesky()\n   local x = torch.rand(10,10)\n   local A = torch.mm(x, x:t())\n\n   ---- Default Case\n   local C = torch.potrf(A)\n   local B = torch.mm(C:t(), C)\n   mytester:assertTensorEq(A, B, 1e-14, 'potrf did not allow rebuilding the original matrix')\n\n    ---- Test Upper Triangular\n    local U = torch.potrf(A, 'U')\n          B = torch.mm(U:t(), U)\n    mytester:assertTensorEq(A, B, 1e-14, 'potrf (upper) did not allow rebuilding the original matrix')\n\n    ---- Test Lower Triangular\n    local L = torch.potrf(A, 'L')\n          B = torch.mm(L, L:t())\n    mytester:assertTensorEq(A, B, 1e-14, 'potrf (lower) did not allow rebuilding the original matrix')\nend\n\nfunction torchtest.potrs()\n   if not torch.potrs then return end\n   local a=torch.Tensor({{6.80, -2.11,  5.66,  5.97,  8.23},\n                         {-6.05, -3.30,  5.36, -4.44,  1.08},\n                         {-0.45,  2.58, -2.70,  0.27,  9.04},\n                         {8.32,  2.71,  4.35, -7.17,  2.14},\n                         {-9.67, -5.14, -7.26,  6.08, -6.87}}):t()\n   local b=torch.Tensor({{4.02,  6.19, -8.22, -7.57, -3.03},\n                         {-1.56,  4.00, -8.67,  1.75,  2.86},\n                         {9.81, -4.09, -4.57, -8.61,  8.99}}):t()\n\n   ---- Make sure 'a' is symmetric PSD\n   a = torch.mm(a, a:t())\n\n   ---- Upper Triangular Test\n   local U = torch.potrf(a, 'U')\n   local x = torch.potrs(b, U, 'U')\n   mytester:assertlt(b:dist(a*x),1e-12,\"torch.potrs; uplo='U'\")\n\n   ---- Lower Triangular Test\n   local L = torch.potrf(a, 'L')\n   x = torch.potrs(b, L, 'L')\n   mytester:assertlt(b:dist(a*x),1e-12,\"torch.potrs; uplo='L\")\nend\n\nfunction torchtest.potri()\n   if not torch.potrs then return end\n   local a=torch.Tensor({{6.80, -2.11,  5.66,  5.97,  8.23},\n                         {-6.05, -3.30,  5.36, -4.44,  1.08},\n                         {-0.45,  2.58, -2.70,  0.27,  9.04},\n                         {8.32,  2.71,  4.35, -7.17,  2.14},\n                         {-9.67, -5.14, -7.26,  6.08, -6.87}}):t()\n\n   ---- Make sure 'a' is symmetric PSD\n   a = torch.mm(a, a:t())\n\n   ---- Compute inverse directly\n   local inv0 = torch.inverse(a)\n\n   ---- Default case\n   local chol = torch.potrf(a)\n   local inv1 = torch.potri(chol)\n   mytester:assertlt(inv0:dist(inv1),1e-12,\"torch.potri; uplo=''\")\n\n   ---- Upper Triangular Test\n   chol = torch.potrf(a, 'U')\n   inv1 = torch.potri(chol, 'U')\n   mytester:assertlt(inv0:dist(inv1),1e-12,\"torch.potri; uplo='U'\")\n\n   ---- Lower Triangular Test\n   chol = torch.potrf(a, 'L')\n   inv1 = torch.potri(chol, 'L')\n   mytester:assertlt(inv0:dist(inv1),1e-12,\"torch.potri; uplo='L'\")\nend\n\nfunction torchtest.pstrf()\n  local function checkPsdCholesky(a, uplo, inplace)\n    local u, piv, args, a_reconstructed\n    if inplace then\n      u = torch.Tensor(a:size())\n      piv = torch.IntTensor(a:size(1))\n      args = {u, piv, a}\n    else\n      args = {a}\n    end\n\n    if uplo then table.insert(args, uplo) end\n\n    u, piv = torch.pstrf(unpack(args))\n\n    if uplo == 'L' then\n      a_reconstructed = torch.mm(u, u:t())\n    else\n      a_reconstructed = torch.mm(u:t(), u)\n    end\n\n    piv = piv:long()\n    local a_permuted = a:index(1, piv):index(2, piv)\n    mytester:assertTensorEq(a_permuted, a_reconstructed, 1e-14,\n                            'torch.pstrf did not allow rebuilding the original matrix;' ..\n                            'uplo=' .. tostring(uplo))\n  end\n\n  local dimensions = { {5, 1}, {5, 3}, {5, 5}, {10, 10} }\n  for _, dim in pairs(dimensions) do\n    local m = torch.Tensor(unpack(dim)):uniform()\n    local a = torch.mm(m, m:t())\n    -- add a small number to the diagonal to make the matrix numerically positive semidefinite\n    for i = 1, m:size(1) do\n      a[i][i] = a[i][i] + 1e-7\n    end\n    checkPsdCholesky(a, nil, false)\n    checkPsdCholesky(a, 'U', false)\n    checkPsdCholesky(a, 'L', false)\n    checkPsdCholesky(a, nil, true)\n    checkPsdCholesky(a, 'U', true)\n    checkPsdCholesky(a, 'L', true)\n  end\nend\n\nfunction torchtest.testNumel()\n    local b = torch.ByteTensor(3, 100, 100)\n    mytester:asserteq(b:nElement(), 3*100*100, \"nElement not right\")\n    mytester:asserteq(b:numel(), 3*100*100, \"numel not right\")\nend\n\n\n-- Generate a tensor of size `size` whose values are ascending integers from\n-- `start` (or 1, if `start is not given)\nlocal function consecutive(size, start)\n    local sequence = torch.ones(torch.Tensor(size):prod(1)[1]):cumsum(1)\n    if start then\n        sequence:add(start - 1)\n    end\n    return sequence:resize(unpack(size))\nend\n\nfunction torchtest.index()\n    local badIndexMsg = \"Lookup with valid index should return correct result\"\n    local reference = consecutive{3, 3, 3}\n    mytester:assertTensorEq(reference[1], consecutive{3, 3}, 1e-16, badIndexMsg)\n    mytester:assertTensorEq(reference[2], consecutive({3, 3}, 10), 1e-16, badIndexMsg)\n    mytester:assertTensorEq(reference[3], consecutive({3, 3}, 19), 1e-16, badIndexMsg)\n    mytester:assertTensorEq(reference[{1}], consecutive{3, 3}, 1e-16, badIndexMsg)\n    mytester:assertTensorEq(reference[{2}], consecutive({3, 3}, 10), 1e-16, badIndexMsg)\n    mytester:assertTensorEq(reference[{3}], consecutive({3, 3}, 19), 1e-16, badIndexMsg)\n    mytester:assertTensorEq(reference[{1,2}], consecutive({3}, 4), 1e-16, badIndexMsg)\n    mytester:assertTensorEq(reference[{{1,2}}], consecutive({2, 3, 3}), 1e-16, badIndexMsg)\n    mytester:asserteq(reference[{3, 3, 3}], 27, badIndexMsg)\n    mytester:assertTensorEq(reference[{}], consecutive{3, 3, 3}, 1e-16, badIndexMsg)\n\n    local shouldErrorMsg = \"Lookup with too many indices should error\"\n    mytester:assertError(function() return reference[{1, 1, 1, 1}] end, shouldErrorMsg)\n    mytester:assertError(function() return reference[{1, 1, 1, {1, 1}}] end, shouldErrorMsg)\n    mytester:assertError(function() return reference[{3, 3, 3, 3, 3, 3, 3, 3}] end, shouldErrorMsg)\nend\n\nfunction torchtest.newIndex()\n    local badIndexMsg = \"Assignment to valid index should produce correct result\"\n    local reference = consecutive{3, 3, 3}\n    -- This relies on __index__() being correct - but we have separate tests for that\n    local function checkPartialAssign(index)\n        local reference = torch.zeros(3, 3, 3)\n        reference[index] = consecutive{3, 3, 3}[index]\n        mytester:assertTensorEq(reference[index], consecutive{3, 3, 3}[index], 1e-16, badIndexMsg)\n        reference[index] = 0\n        mytester:assertTensorEq(reference, torch.zeros(3, 3, 3), 1e-16, badIndexMsg)\n    end\n\n    checkPartialAssign{1}\n    checkPartialAssign{2}\n    checkPartialAssign{3}\n    checkPartialAssign{1,2}\n    checkPartialAssign{2,3}\n    checkPartialAssign{1,3}\n    checkPartialAssign{}\n\n    local shouldErrorMsg = \"Assignment with too many indices should error\"\n    mytester:assertError(function() reference[{1, 1, 1, 1}] = 1 end, shouldErrorMsg)\n    mytester:assertError(function() reference[{1, 1, 1, {1, 1}}] = 1 end, shouldErrorMsg)\n    mytester:assertError(function() reference[{3, 3, 3, 3, 3, 3, 3, 3}] = 1 end, shouldErrorMsg)\nend\n\nfunction torchtest.indexCopy()\n   local nCopy, nDest = 3, 20\n   local dest = torch.randn(nDest,4,5)\n   local src = torch.randn(nCopy,4,5)\n   local idx = torch.randperm(nDest):narrow(1, 1, nCopy):long()\n   local dest2 = dest:clone()\n   dest:indexCopy(1, idx, src)\n   for i=1,idx:size(1) do\n      dest2[idx[i]]:copy(src[i])\n   end\n   mytester:assertTensorEq(dest, dest2, 0.000001, \"indexCopy tensor error\")\n\n   local dest = torch.randn(nDest)\n   local src = torch.randn(nCopy)\n   local idx = torch.randperm(nDest):narrow(1, 1, nCopy):long()\n   local dest2 = dest:clone()\n   dest:indexCopy(1, idx, src)\n   for i=1,idx:size(1) do\n      dest2[idx[i]] = src[i]\n   end\n   mytester:assertTensorEq(dest, dest2, 0.000001, \"indexCopy scalar error\")\nend\n\nfunction torchtest.indexAdd()\n   local nCopy, nDest = 3, 20\n   local dest = torch.randn(nDest,4,5)\n   local src = torch.randn(nCopy,4,5)\n   local idx = torch.randperm(nDest):narrow(1, 1, nCopy):long()\n   local dest2 = dest:clone()\n   dest:indexAdd(1, idx, src)\n   for i=1,idx:size(1) do\n      dest2[idx[i]]:add(src[i])\n   end\n   mytester:assertTensorEq(dest, dest2, 0.000001, \"indexAdd tensor error\")\n\n   local dest = torch.randn(nDest)\n   local src = torch.randn(nCopy)\n   local idx = torch.randperm(nDest):narrow(1, 1, nCopy):long()\n   local dest2 = dest:clone()\n   dest:indexAdd(1, idx, src)\n   for i=1,idx:size(1) do\n      dest2[idx[i]] = dest2[idx[i]] + src[i]\n   end\n   mytester:assertTensorEq(dest, dest2, 0.000001, \"indexAdd scalar error\")\nend\n\n-- Fill idx with valid indices.\nlocal function fillIdx(idx, dim, dim_size, elems_per_row, m, n, o)\n   for i = 1, (dim == 1 and 1 or m) do\n      for j = 1, (dim == 2 and 1 or n) do\n         for k = 1, (dim == 3 and 1 or o) do\n            local ii = {i, j, k}\n            ii[dim] = {}\n            idx[ii] = torch.randperm(dim_size)[{{1, elems_per_row}}]\n         end\n      end\n   end\nend\n\nfunction torchtest.gather()\n   local m, n, o = torch.random(10, 20), torch.random(10, 20), torch.random(10, 20)\n   local elems_per_row = torch.random(10)\n   local dim = torch.random(3)\n\n   local src = torch.randn(m, n, o)\n   local idx_size = {m, n, o}\n   idx_size[dim] = elems_per_row\n   local idx = torch.LongTensor():resize(unpack(idx_size))\n   fillIdx(idx, dim, src:size(dim), elems_per_row, m, n, o)\n\n   local actual = torch.gather(src, dim, idx)\n   local expected = torch.Tensor():resize(unpack(idx_size))\n   for i = 1, idx_size[1] do\n      for j = 1, idx_size[2] do\n         for k = 1, idx_size[3] do\n            local ii = {i, j, k}\n            ii[dim] = idx[i][j][k]\n            expected[i][j][k] = src[ii]\n         end\n      end\n   end\n   mytester:assertTensorEq(actual, expected, 0, \"Wrong values for gather\")\n\n   idx[1][1][1] = 23\n   mytester:assertError(function() torch.gather(src, dim, idx) end,\n                        \"Invalid index not detected\")\nend\n\nfunction torchtest.gatherMax()\n   local src = torch.randn(3, 4, 5)\n   local expected, idx = src:max(3)\n   local actual = torch.gather(src, 3, idx)\n   mytester:assertTensorEq(actual, expected, 0, \"Wrong values for gather\")\nend\n\nfunction torchtest.scatter()\n   local m, n, o = torch.random(10, 20), torch.random(10, 20), torch.random(10, 20)\n   local elems_per_row = torch.random(10)\n   local dim = torch.random(3)\n\n   local idx_size = {m, n, o}\n   idx_size[dim] = elems_per_row\n   local idx = torch.LongTensor():resize(unpack(idx_size))\n   fillIdx(idx, dim, ({m, n, o})[dim], elems_per_row, m, n, o)\n   local src = torch.Tensor():resize(unpack(idx_size)):normal()\n\n   local actual = torch.zeros(m, n, o):scatter(dim, idx, src)\n   local expected = torch.zeros(m, n, o)\n   for i = 1, idx_size[1] do\n      for j = 1, idx_size[2] do\n         for k = 1, idx_size[3] do\n            local ii = {i, j, k}\n            ii[dim] = idx[i][j][k]\n           expected[ii] = src[i][j][k]\n         end\n      end\n   end\n   mytester:assertTensorEq(actual, expected, 0, \"Wrong values for scatter\")\n\n   idx[1][1][1] = 34\n   mytester:assertError(function() torch.zeros(m, n, o):scatter(dim, idx, src) end,\n                        \"Invalid index not detected\")\nend\n\nfunction torchtest.scatterFill()\n   local m, n, o = torch.random(10, 20), torch.random(10, 20), torch.random(10, 20)\n   local elems_per_row = torch.random(10)\n   local dim = torch.random(3)\n\n   local val = torch.uniform()\n   local idx_size = {m, n, o}\n   idx_size[dim] = elems_per_row\n   local idx = torch.LongTensor():resize(unpack(idx_size))\n   fillIdx(idx, dim, ({m, n, o})[dim], elems_per_row, m, n, o)\n\n   local actual = torch.zeros(m, n, o):scatter(dim, idx, val)\n   local expected = torch.zeros(m, n, o)\n   for i = 1, idx_size[1] do\n      for j = 1, idx_size[2] do\n         for k = 1, idx_size[3] do\n            local ii = {i, j, k}\n            ii[dim] = idx[i][j][k]\n            expected[ii] = val\n         end\n      end\n   end\n   mytester:assertTensorEq(actual, expected, 0, \"Wrong values for scatter\")\n\n   idx[1][1][1] = 28\n   mytester:assertError(function() torch.zeros(m, n, o):scatter(dim, idx, val) end,\n                        \"Invalid index not detected\")\nend\n\nfunction torchtest.maskedCopy()\n   local nCopy, nDest = 3, 10\n   local dest = torch.randn(nDest)\n   local src = torch.randn(nCopy)\n   local mask = torch.ByteTensor{0,0,0,0,1,0,1,0,1,0}\n   local dest2 = dest:clone()\n   dest:maskedCopy(mask, src)\n   local j = 1\n   for i=1,nDest do\n      if mask[i] == 1 then\n         dest2[i] = src[j]\n         j = j + 1\n      end\n   end\n   mytester:assertTensorEq(dest, dest2, 0.000001, \"maskedCopy error\")\n\n   -- make source bigger than number of 1s in mask\n   src = torch.randn(nDest)\n   local ok = pcall(dest.maskedCopy, dest, mask, src)\n   mytester:assert(ok, \"maskedCopy incorrect complaint when\"\n\t\t      .. \" src is bigger than mask's one count\")\n\n   src = torch.randn(nCopy - 1) -- make src smaller. this should fail\n   local ok = pcall(dest.maskedCopy, dest, mask, src)\n   mytester:assert(not ok, \"maskedCopy not erroring when\"\n\t\t      .. \" src is smaller than mask's one count\")\nend\n\nfunction torchtest.maskedSelect()\n   local nSrc = 10\n   local src = torch.randn(nSrc)\n   local mask = torch.rand(nSrc):mul(2):floor():byte()\n   local dst = torch.Tensor()\n   dst:maskedSelect(src, mask)\n   local dst2 = {}\n   for i=1,nSrc do\n      if mask[i] == 1 then\n         table.insert(dst2, src[i])\n      end\n   end\n   mytester:assertTensorEq(dst, torch.DoubleTensor(dst2), 0.000001, \"maskedSelect error\")\nend\n\nfunction torchtest.maskedFill()\n   local nDst = 10\n   local dst = torch.randn(nDst)\n   local mask = torch.rand(nDst):mul(2):floor():byte()\n   local val = math.random()\n   local dst2 = dst:clone()\n   dst:maskedFill(mask, val)\n   for i=1,nDst do\n      if mask[i] == 1 then\n         dst2[i] = val\n      end\n   end\n   mytester:assertTensorEq(dst, dst2, 0.000001, \"maskedFill error\")\nend\n\nfunction torchtest.abs()\n   local size = 1000\n   local range = 1000\n   local original = torch.rand(size):mul(range)\n   -- Tensor filled with {-1,1}\n   local switch = torch.rand(size):mul(2):floor():mul(2):add(-1)\n\n   local types = {'torch.DoubleTensor', 'torch.FloatTensor', 'torch.LongTensor', 'torch.IntTensor'}\n   for k,t in ipairs(types) do\n      local data = original:type(t)\n      local switch = switch:type(t)\n      local input = torch.cmul(data, switch)\n      mytester:assertTensorEq(input:abs(), data, 1e-16, 'Error in abs() for '..t)\n   end\n\n   -- Checking that the right abs function is called for LongTensor\n   local bignumber\n   if torch.LongTensor():elementSize() > 4 then\n      bignumber = 2^31 + 1\n   else\n      bignumber = 2^15 + 1\n   end\n   local input = torch.LongTensor{-bignumber}\n   mytester:assertgt(input:abs()[1], 0, 'torch.abs(3)')\nend\n\nfunction torchtest.classInModule()\n    -- Need a global for this module\n    _mymodule123 = {}\n    local x = torch.class('_mymodule123.myclass')\n    mytester:assert(x ~= nil, 'Could not create class in module')\n    -- Remove the global\n    _G['_mymodule123'] = nil\n    debug.getregistry()['_mymodule123.myclass']=nil\nend\n\nfunction torchtest.classNoModule()\n    local x = torch.class('_myclass123')\n    mytester:assert(x ~= nil, 'Could not create class in module')\n    debug.getregistry()['_myclass123'] = nil\nend\n\nfunction torchtest.type()\n   local objects = {torch.DoubleTensor(), {}, nil, 2, \"asdf\"}\n   local types = {'torch.DoubleTensor', 'table', 'nil', 'number', 'string'}\n   for i,obj in ipairs(objects) do\n      mytester:assert(torch.type(obj) == types[i], \"wrong type \"..types[i])\n   end\nend\n\nfunction torchtest.isTypeOfInheritance()\n   do\n      local A = torch.class('A')\n      local B, parB = torch.class('B', 'A')\n      local C, parC = torch.class('C', 'A')\n   end\n   local a, b, c = A(), B(), C()\n\n   mytester:assert(torch.isTypeOf(a, 'A'), 'isTypeOf error, string spec')\n   mytester:assert(torch.isTypeOf(a, A), 'isTypeOf error, constructor')\n   mytester:assert(torch.isTypeOf(b, 'B'), 'isTypeOf error child class')\n   mytester:assert(torch.isTypeOf(b, B), 'isTypeOf error child class ctor')\n   mytester:assert(torch.isTypeOf(b, 'A'), 'isTypeOf error: inheritance')\n   mytester:assert(torch.isTypeOf(b, A), 'isTypeOf error: inheritance')\n   mytester:assert(not torch.isTypeOf(c, 'B'), 'isTypeOf error: common parent')\n   mytester:assert(not torch.isTypeOf(c, B), 'isTypeOf error: common parent')\n   debug.getregistry()['A'] = nil\n   debug.getregistry()['B'] = nil\n   debug.getregistry()['C'] = nil\nend\n\nfunction torchtest.isTypeOfPartial()\n    do\n      local TorchDummy = torch.class('TorchDummy')\n      local OtherTorchDummy = torch.class('OtherTorchDummy')\n      local TorchMember = torch.class('TorchMember')\n      local OtherTorchMember = torch.class('OtherTorchMember')\n      local FirstTorchMember = torch.class('FirstTorchMember',\n                                           'TorchMember')\n      local SecondTorchMember = torch.class('SecondTorchMember',\n                                            'TorchMember')\n      local ThirdTorchMember = torch.class('ThirdTorchMember',\n                                           'OtherTorchMember')\n   end\n   local td, otd = TorchDummy(), OtherTorchDummy()\n   local tm, ftm, stm, ttm = TorchMember(), FirstTorchMember(),\n      SecondTorchMember(), ThirdTorchMember()\n\n   mytester:assert(not torch.isTypeOf(td, 'OtherTorchDummy'),\n                   'isTypeOf error: incorrect partial match')\n   mytester:assert(not torch.isTypeOf(otd, 'TorchDummy'),\n                   'isTypeOf error: incorrect partial match')\n   mytester:assert(torch.isTypeOf(tm, 'TorchMember'),\n                   'isTypeOf error, string spec')\n   mytester:assert(torch.isTypeOf(tm, TorchMember),\n                   'isTypeOf error, constructor')\n   mytester:assert(torch.isTypeOf(ftm, 'FirstTorchMember'),\n                   'isTypeOf error child class')\n   mytester:assert(torch.isTypeOf(ftm, FirstTorchMember),\n                   'isTypeOf error child class ctor')\n   mytester:assert(torch.isTypeOf(ftm, 'TorchMember'),\n                   'isTypeOf error: inheritance')\n   mytester:assert(torch.isTypeOf(ftm, TorchMember),\n                   'isTypeOf error: inheritance')\n   mytester:assert(not torch.isTypeOf(stm, 'FirstTorchMember'),\n                   'isTypeOf error: common parent')\n   mytester:assert(not torch.isTypeOf(stm, FirstTorchMember),\n                   'isTypeOf error: common parent')\n   mytester:assert(not torch.isTypeOf(ttm, TorchMember),\n                   'isTypeOf error: inheritance')\n   mytester:assert(not torch.isTypeOf(ttm, 'TorchMember'),\n                   'isTypeOf error: inheritance')\n   debug.getregistry()['TorchDummy'] = nil\n   debug.getregistry()['OtherTorchDummy'] = nil\n   debug.getregistry()['TorchMember'] = nil\n   debug.getregistry()['OtherTorchMember'] = nil\n   debug.getregistry()['FirstTorchMember'] = nil\n   debug.getregistry()['SecondTorchMember'] = nil\n   debug.getregistry()['ThirdTorchMember'] = nil\nend\n\nfunction torchtest.isTypeOfPattern()\n   local t = torch.LongTensor()\n   mytester:assert(torch.isTypeOf(t, torch.LongTensor),\n                   'isTypeOf error: incorrect match')\n   mytester:assert(not torch.isTypeOf(t, torch.IntTensor),\n                   'isTypeOf error: incorrect match')\n   mytester:assert(torch.isTypeOf(t, 'torch.LongTensor'),\n                   'isTypeOf error: incorrect match')\n   mytester:assert(not torch.isTypeOf(t, 'torch.Long'),\n                   'isTypeOf error: incorrect match')\n   mytester:assert(torch.isTypeOf(t, 'torch.*Tensor'),\n                   'isTypeOf error: incorrect match')\n   mytester:assert(torch.isTypeOf(t, '.*Long'),\n                   'isTypeOf error: incorrect match')\n   mytester:assert(not torch.isTypeOf(t, 'torch.IntTensor'),\n                   'isTypeOf error: incorrect match')\nend\n\nfunction torchtest.isTensor()\n   for k,v in ipairs({\"real\", \"half\"}) do\n      torchtest_isTensor(torch.getmetatable(torch.Tensor():type())[v])\n   end\nend\n\nfunction torchtest_isTensor(func)\n   local t = func(torch.randn(3,4))\n   mytester:assert(torch.isTensor(t), 'error in isTensor')\n   mytester:assert(torch.isTensor(t[1]), 'error in isTensor for subTensor')\n   mytester:assert(not torch.isTensor(t[1][2]), 'false positive in isTensor')\n   mytester:assert(torch.Tensor.isTensor(t), 'alias not working')\nend\n\nfunction torchtest.isStorage()\n   for k,v in ipairs({\"real\", \"half\"}) do\n      torchtest_isStorage(torch.getmetatable(torch.Tensor():type())[v])\n   end\nend\n\nfunction torchtest_isStorage(func)\n  local t = torch.randn(3,4)\n  mytester:assert(torch.isStorage(t:storage()), 'error in isStorage')\n  mytester:assert(not torch.isStorage(t), 'false positive in isStorage')\nend\n\nfunction torchtest.view()\n   for k,v in ipairs({\"real\", \"half\"}) do\n      torchtest_view(torch.getmetatable(torch.Tensor():type())[v])\n   end\nend\n\nfunction torchtest_view(func)\n   local tensor = func(torch.rand(15))\n   local template = func(torch.rand(3,5))\n   local target = template:size():totable()\n   mytester:assertTableEq(tensor:viewAs(template):size():totable(), target, 'Error in viewAs')\n   mytester:assertTableEq(tensor:view(3,5):size():totable(), target, 'Error in view')\n   mytester:assertTableEq(tensor:view(torch.LongStorage{3,5}):size():totable(), target, 'Error in view using LongStorage')\n   mytester:assertTableEq(tensor:view(-1,5):size():totable(), target, 'Error in view using dimension -1')\n   mytester:assertTableEq(tensor:view(3,-1):size():totable(), target, 'Error in view using dimension -1')\n   local tensor_view = tensor:view(5,3)\n   tensor_view:fill(torch.rand(1)[1])\n   mytester:asserteq((tensor_view-tensor):abs():max(), 0, 'Error in view')\n\n   local target_tensor = func(torch.Tensor())\n   mytester:assertTableEq(target_tensor:viewAs(tensor, template):size():totable(), target, 'Error in viewAs')\n   mytester:assertTableEq(target_tensor:view(tensor, 3,5):size():totable(), target, 'Error in view')\n   mytester:assertTableEq(target_tensor:view(tensor, torch.LongStorage{3,5}):size():totable(), target, 'Error in view using LongStorage')\n   mytester:assertTableEq(target_tensor:view(tensor, -1,5):size():totable(), target, 'Error in view using dimension -1')\n   mytester:assertTableEq(target_tensor:view(tensor, 3,-1):size():totable(), target, 'Error in view using dimension -1')\n   target_tensor:fill(torch.rand(1)[1])\n   mytester:asserteq((target_tensor-tensor):abs():max(), 0, 'Error in viewAs')\nend\n\nfunction torchtest.expand()\n   for k,v in ipairs({\"real\", \"half\"}) do\n      torchtest_expand(torch.getmetatable(torch.Tensor():type())[v])\n   end\nend\n\nfunction torchtest_expand(func)\n   local result = func(torch.Tensor())\n   local tensor = func(torch.rand(8,1))\n   local template = func(torch.rand(8,5))\n   local target = template:size():totable()\n   mytester:assertTableEq(tensor:expandAs(template):size():totable(), target, 'Error in expandAs')\n   mytester:assertTableEq(tensor:expand(8,5):size():totable(), target, 'Error in expand')\n   mytester:assertTableEq(tensor:expand(torch.LongStorage{8,5}):size():totable(), target, 'Error in expand using LongStorage')\n   result:expandAs(tensor,template)\n   mytester:assertTableEq(result:size():totable(), target, 'Error in expandAs using result')\n   result:expand(tensor,8,5)\n   mytester:assertTableEq(result:size():totable(), target, 'Error in expand using result')\n   result:expand(tensor,torch.LongStorage{8,5})\n   mytester:assertTableEq(result:size():totable(), target, 'Error in expand using result and LongStorage')\n   mytester:asserteq((result:mean(2):view(8,1)-tensor):abs():max(), 0, 'Error in expand (not equal)')\nend\n\nfunction torchtest.repeatTensor()\n   for k,v in ipairs({\"real\", \"half\"}) do\n      torchtest_repeatTensor(torch.getmetatable(torch.Tensor():type())[v])\n   end\nend\n\nfunction torchtest_repeatTensor(func, mean)\n   local result = func(torch.Tensor())\n   local tensor = func(torch.rand(8,4))\n   local size = {3,1,1}\n   local sizeStorage = torch.LongStorage(size)\n   local target = {3,8,4}\n   mytester:assertTableEq(tensor:repeatTensor(unpack(size)):size():totable(), target, 'Error in repeatTensor')\n   mytester:assertTableEq(tensor:repeatTensor(sizeStorage):size():totable(), target, 'Error in repeatTensor using LongStorage')\n   result:repeatTensor(tensor,unpack(size))\n   mytester:assertTableEq(result:size():totable(), target, 'Error in repeatTensor using result')\n   result:repeatTensor(tensor,sizeStorage)\n   mytester:assertTableEq(result:size():totable(), target, 'Error in repeatTensor using result and LongStorage')\n   mytester:asserteq((result:mean(1):view(8,4)-tensor):abs():max(), 0, 'Error in repeatTensor (not equal)')\nend\n\nfunction torchtest.isSameSizeAs()\n   for k,v in ipairs({\"real\", \"half\"}) do\n      torchtest_isSameSizeAs(torch.getmetatable(torch.Tensor():type())[v])\n   end\nend\n\nfunction torchtest_isSameSizeAs(func)\n   local t1 = func(torch.Tensor(3, 4, 9, 10))\n   local t2 = func(torch.Tensor(3, 4))\n   local t3 = func(torch.Tensor(1, 9, 3, 3))\n   local t4 = func(torch.Tensor(3, 4, 9, 10))\n\n   mytester:assert(t1:isSameSizeAs(t2) == false, \"wrong answer \")\n   mytester:assert(t1:isSameSizeAs(t3) == false, \"wrong answer \")\n   mytester:assert(t1:isSameSizeAs(t4) == true, \"wrong answer \")\nend\n\nfunction torchtest.isSetTo()\n   for k,v in ipairs({\"real\", \"half\"}) do\n      torchtest_isSetTo(torch.getmetatable(torch.Tensor():type())[v])\n   end\nend\n\nfunction torchtest_isSetTo(func)\n   local t1 = func(torch.Tensor(3, 4, 9, 10))\n   local t2 = func(torch.Tensor(3, 4, 9, 10))\n   local t3 = func(torch.Tensor()):set(t1)\n   local t4 = t3:reshape(12, 90)\n   mytester:assert(t1:isSetTo(t2) == false, \"tensors do not share storage\")\n   mytester:assert(t1:isSetTo(t3) == true, \"tensor is set to other\")\n   mytester:assert(t3:isSetTo(t1) == true, \"isSetTo should be symmetric\")\n   mytester:assert(t1:isSetTo(t4) == false, \"tensors have different view\")\n   mytester:assert(not func(torch.Tensor()):isSetTo(func(torch.Tensor())),\n                   \"Tensors with no storages should not appear to be set \" ..\n                   \"to each other\")\nend\n\nfunction torchtest.equal()\n  -- Contiguous, 1D\n  local t1 = torch.Tensor{3, 4, 9, 10}\n  local t2 = t1:clone()\n  local t3 = torch.Tensor{1, 9, 3, 10}\n  local t4 = torch.Tensor{3, 4, 9}\n  local t5 = torch.Tensor()\n  mytester:assert(t1:equal(t2) == true, \"wrong answer \")\n  mytester:assert(t1:equal(t3) == false, \"wrong answer \")\n  mytester:assert(t1:equal(t4) == false, \"wrong answer \")\n  mytester:assert(t1:equal(t5) == false, \"wrong answer \")\n  mytester:assert(torch.equal(t1, t2) == true, \"wrong answer \")\n  mytester:assert(torch.equal(t1, t3) == false, \"wrong answer \")\n  mytester:assert(torch.equal(t1, t4) == false, \"wrong answer \")\n  mytester:assert(torch.equal(t1, t5) == false, \"wrong answer \")\n\n  -- Non contiguous, 2D\n  local s = torch.Tensor({{1, 2, 3, 4}, {5, 6, 7, 8}})\n  local s1 = s[{{}, {2, 3}}]\n  local s2 = s1:clone()\n  local s3 = torch.Tensor({{2, 3}, {6, 7}})\n  local s4 = torch.Tensor({{0, 0}, {0, 0}})\n\n  mytester:assert(not s1:isContiguous(), \"wrong answer \")\n  mytester:assert(s1:equal(s2) == true, \"wrong answer \")\n  mytester:assert(s1:equal(s3) == true, \"wrong answer \")\n  mytester:assert(s1:equal(s4) == false, \"wrong answer \")\n  mytester:assert(torch.equal(s1, s2) == true, \"wrong answer \")\n  mytester:assert(torch.equal(s1, s3) == true, \"wrong answer \")\n  mytester:assert(torch.equal(s1, s4) == false, \"wrong answer \")\nend\n\nfunction torchtest.isSize()\n   for k,v in ipairs({\"real\", \"half\"}) do\n      torchtest_isSize(torch.getmetatable(torch.Tensor():type())[v])\n   end\nend\n\nfunction torchtest_isSize(func)\n  local t1 = func(torch.Tensor(3, 4, 5))\n  local s1 = torch.LongStorage({3, 4, 5})\n  local s2 = torch.LongStorage({5, 4, 3})\n\n   mytester:assert(t1:isSize(s1) == true, \"wrong answer \")\n   mytester:assert(t1:isSize(s2) == false, \"wrong answer \")\n   mytester:assert(t1:isSize(t1:size()) == true, \"wrong answer \")\nend\n\nfunction torchtest.elementSize()\n  local byte   =   torch.ByteStorage():elementSize()\n  local char   =   torch.CharStorage():elementSize()\n  local short  =  torch.ShortStorage():elementSize()\n  local int    =    torch.IntStorage():elementSize()\n  local long   =   torch.LongStorage():elementSize()\n  local float  =  torch.FloatStorage():elementSize()\n  local double = torch.DoubleStorage():elementSize()\n  local half = torch.HalfStorage():elementSize()\n\n  mytester:asserteq(byte,   torch.ByteTensor():elementSize())\n  mytester:asserteq(char,   torch.CharTensor():elementSize())\n  mytester:asserteq(short,  torch.ShortTensor():elementSize())\n  mytester:asserteq(int,    torch.IntTensor():elementSize())\n  mytester:asserteq(long,   torch.LongTensor():elementSize())\n  mytester:asserteq(float,  torch.FloatTensor():elementSize())\n  mytester:asserteq(double, torch.DoubleTensor():elementSize())\n  mytester:asserteq(half, torch.HalfTensor():elementSize())\n\n  mytester:assertne(byte, 0)\n  mytester:assertne(char, 0)\n  mytester:assertne(short, 0)\n  mytester:assertne(int, 0)\n  mytester:assertne(long, 0)\n  mytester:assertne(float, 0)\n  mytester:assertne(double, 0)\n  mytester:assertne(half, 0)\n\n  -- These tests are portable, not necessarily strict for your system.\n  mytester:asserteq(byte, 1)\n  mytester:asserteq(char, 1)\n  mytester:assert(short >= 2)\n  mytester:assert(int >= 2)\n  mytester:assert(int >= short)\n  mytester:assert(long >= 4)\n  mytester:assert(long >= int)\n  mytester:assert(double >= float)\n  mytester:assert(half <= float)\nend\n\nfunction torchtest.split()\n   for k,v in ipairs({\"real\", \"half\"}) do\n      torchtest_split(torch.getmetatable(torch.Tensor():type())[v])\n   end\nend\n\nfunction torchtest_split(func)\n   local result = {}\n   local tensor = func(torch.rand(7,4))\n   local splitSize = 3\n   local targetSize = {{3,4},{3,4},{1,4}}\n   local dim = 1\n   local splits = tensor:split(splitSize, dim)\n   local start = 1\n   for i, split in ipairs(splits) do\n      mytester:assertTableEq(split:size():totable(), targetSize[i], 'Size error in split '..i)\n      mytester:assertTensorEq(tensor:narrow(dim, start, targetSize[i][dim]), split, 0.00001, 'Content error in split '..i)\n      start = start + targetSize[i][dim]\n   end\n   torch.split(result, tensor, splitSize, dim)\n   local start = 1\n   for i, split in ipairs(result) do\n      mytester:assertTableEq(split:size():totable(), targetSize[i], 'Result size error in split '..i)\n      mytester:assertTensorEq(tensor:narrow(dim, start, targetSize[i][dim]), split, 0.000001, 'Result content error in split '..i)\n      start = start + targetSize[i][dim]\n   end\n   mytester:asserteq(#splits, #result, 'Non-consistent output size from split')\n   for i, split in ipairs(splits) do\n      mytester:assertTensorEq(split,result[i], 0, 'Non-consistent outputs from split')\n   end\nend\n\nfunction torchtest.chunk()\n   for k,v in ipairs({\"real\", \"half\"}) do\n      torchtest_chunk(torch.getmetatable(torch.Tensor():type())[v])\n   end\nend\n\nfunction torchtest_chunk(func)\n   local result = {}\n   local tensor = func(torch.rand(4,7))\n   local nChunk = 3\n   local targetSize = {{4,3},{4,3},{4,1}}\n   local dim = 2\n   local splits = tensor:chunk(nChunk, dim)\n   local start = 1\n   for i, split in ipairs(splits) do\n      mytester:assertTableEq(split:size():totable(), targetSize[i], 'Size error in chunk '..i)\n      mytester:assertTensorEq(tensor:narrow(dim, start, targetSize[i][dim]), split, 0.00001, 'Content error in chunk '..i)\n      start = start + targetSize[i][dim]\n   end\n   torch.split(result, tensor, nChunk, dim)\n   local start = 1\n   for i, split in ipairs(result) do\n      mytester:assertTableEq(split:size():totable(), targetSize[i], 'Result size error in chunk '..i)\n      mytester:assertTensorEq(tensor:narrow(dim, start, targetSize[i][dim]), split, 0.000001, 'Result content error in chunk '..i)\n      start = start + targetSize[i][dim]\n   end\nend\n\nfunction torchtest.table()\n   local convStorage = {\n     ['real'] = 'FloatStorage',\n     ['half'] = 'HalfStorage'\n   }\n   for k,v in ipairs(convStorage) do\n      torchtest_totable(torch.getmetatable(torch.Tensor():type())[k], v)\n   end\nend\n\nfunction torchtest_totable(func, storageType)\n  local table0D = {}\n  local tensor0D = func(torch.Tensor(table0D))\n  mytester:assertTableEq(torch.totable(tensor0D), table0D, 'tensor0D:totable incorrect')\n\n  local table1D = {1, 2, 3}\n  local tensor1D = func(torch.Tensor(table1D))\n  local storage = torch[storageType](table1D)\n  mytester:assertTableEq(tensor1D:totable(), table1D, 'tensor1D:totable incorrect')\n  mytester:assertTableEq(storage:totable(), table1D, 'storage:totable incorrect')\n  mytester:assertTableEq(torch.totable(tensor1D), table1D, 'torch.totable incorrect for Tensors')\n  mytester:assertTableEq(torch.totable(storage), table1D, 'torch.totable incorrect for Storages')\n\n  local table2D = {{1, 2}, {3, 4}}\n  local tensor2D = func(torch.Tensor(table2D))\n  mytester:assertTableEq(tensor2D:totable(), table2D, 'tensor2D:totable incorrect')\n\n  local tensor3D = func(torch.Tensor({{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}}))\n  local tensorNonContig = tensor3D:select(2, 2)\n  mytester:assert(not tensorNonContig:isContiguous(), 'invalid test')\n  mytester:assertTableEq(tensorNonContig:totable(), {{3, 4}, {7, 8}},\n                         'totable() incorrect for non-contiguous tensors')\nend\n\nfunction torchtest.permute()\n   for k,v in ipairs({\"real\", \"half\"}) do\n      torchtest_permute(torch.getmetatable(torch.Tensor():type())[v])\n   end\nend\n\nfunction torchtest_permute(func)\n  local orig = {1,2,3,4,5,6,7}\n  local perm = torch.randperm(7):totable()\n  local x = torch.Tensor(unpack(orig)):fill(0)\n  local new = x:permute(unpack(perm)):size():totable()\n  mytester:assertTableEq(perm, new, 'Tensor:permute incorrect')\n  mytester:assertTableEq(x:size():totable(), orig, 'Tensor:permute changes tensor')\nend\n\nfunction torchtest.serialize()\n   local tableObj = {6, a = 42}\n   local tensObj = torch.randn(3,4,5)\n\n   -- Test serializing a table\n   local serString = torch.serialize(tableObj)\n   local serStorage = torch.serializeToStorage(tableObj)\n   mytester:assertTableEq(tableObj, torch.deserialize(serString))\n   mytester:assertTableEq(tableObj, torch.deserializeFromStorage(serStorage))\n\n   -- Test serializing a Tensor\n   serString = torch.serialize(tensObj)\n   serStorage = torch.serializeToStorage(tensObj)\n   mytester:assertTensorEq(tensObj, torch.deserialize(serString), 1e-10)\n   mytester:assertTensorEq(tensObj, torch.deserializeFromStorage(serStorage), 1e-10)\nend\n\nfunction torchtest.storageview()\n   local s1 = torch.LongStorage({3, 4, 5})\n   local s2 = torch.LongStorage(s1, 2)\n\n   mytester:assert(s2:size() == 2, \"should be size 2\")\n   mytester:assert(s2[1] == s1[2], \"should have 4 at position 1\")\n   mytester:assert(s2[2] == s1[3], \"should have 5 at position 2\")\n\n   s2[1] = 13\n   mytester:assert(13 == s1[2], \"should have 13 at position 1\")\nend\n\nfunction torchtest.nonzero()\n  local nSrc = 12\n\n  local types = {\n      'torch.ByteTensor',\n      'torch.CharTensor',\n      'torch.ShortTensor',\n      'torch.IntTensor',\n      'torch.FloatTensor',\n      'torch.DoubleTensor',\n      'torch.LongTensor',\n  }\n\n  local shapes = {\n      torch.LongStorage{12},\n      torch.LongStorage{12, 1},\n      torch.LongStorage{1, 12},\n      torch.LongStorage{6, 2},\n      torch.LongStorage{3, 2, 2},\n  }\n\n  for _, type in ipairs(types) do\n    local tensor = torch.rand(nSrc):mul(2):floor():type(type)\n      for _, shape in ipairs(shapes) do\n        tensor = tensor:reshape(shape)\n        local dst1 = torch.nonzero(tensor)\n        local dst2 = tensor:nonzero()\n        -- Does not work. Torch uses the first argument to determine what\n        -- type the Tensor is expected to be. In our case the second argument\n        -- determines the type of Tensor.\n        --local dst3 = torch.LongTensor()\n        --torch.nonzero(dst3, tensor)\n        -- However, there are workarounds to this issue when it is desired to\n        -- use an existing tensor for the result:\n        local dst4 = torch.LongTensor()\n        tensor.nonzero(dst4, tensor)\n        if shape:size() == 1 then\n          local dst = {}\n          for i = 1 , nSrc do\n            if tensor[i] ~= 0 then\n              table.insert(dst, i)\n            end\n          end\n          mytester:assertTensorEq(dst1:select(2, 1), torch.LongTensor(dst), 0.0,\n                                  \"nonzero error\")\n          mytester:assertTensorEq(dst2:select(2, 1), torch.LongTensor(dst), 0.0,\n                                  \"nonzero error\")\n          --mytester:assertTensorEq(dst3:select(2, 1), torch.LongTensor(dst),\n          --                        0.0,  \"nonzero error\")\n          mytester:assertTensorEq(dst4:select(2, 1), torch.LongTensor(dst), 0.0,\n                                  \"nonzero error\")\n        elseif shape:size() == 2 then\n          -- This test will allow through some false positives. It only checks\n          -- that the elements flagged positive are indeed non-zero.\n          for i=1,dst1:size()[1] do\n            mytester:assert(tensor[dst1[i][1]][dst1[i][2]] ~= 0)\n          end\n        elseif shape:size() == 3 then\n          -- This test will allow through some false positives. It only checks\n          -- that the elements flagged positive are indeed non-zero.\n          for i=1,dst1:size()[1] do\n            mytester:assert(tensor[dst1[i][1]][dst1[i][2]][dst1[i][3]] ~= 0)\n          end\n        end\n      end\n   end\n\nend\n\nfunction torchtest.testheaptracking()\n  local oldheaptracking = torch._heaptracking\n  if oldheaptracking == nil then\n    oldheaptracking = false\n  end\n  torch.setheaptracking(true)\n  mytester:assert(torch._heaptracking == true, 'Heap tracking expected true')\n\n  torch.setheaptracking(false)\n  mytester:assert(torch._heaptracking == false, 'Heap tracking expected false')\n\n  -- put heap tracking to its original state\n  torch.setheaptracking(oldheaptracking)\nend\n\nfunction torchtest.bernoulli()\n  local size = torch.LongStorage{10, 10}\n  local t = torch.ByteTensor(size)\n\n  local function isBinary(t)\n    return torch.ne(t, 0):cmul(torch.ne(t, 1)):sum() == 0\n  end\n\n  local p = 0.5\n  t:bernoulli(p)\n  mytester:assert(isBinary(t), 'Sample from torch.bernoulli is not binary')\n\n  local p = torch.rand(size)\n  t:bernoulli(p)\n  mytester:assert(isBinary(t), 'Sample from torch.bernoulli is not binary')\nend\n\nfunction torchtest.logNormal()\n    local t = torch.FloatTensor(10, 10)\n    local mean, std = torch.uniform(), 0.1 * torch.uniform()\n    local tolerance = 0.02\n\n    t:logNormal(mean, std)\n    local logt = t:log()\n    mytester:assertalmosteq(logt:mean(), mean, tolerance, 'mean is wrong')\n    mytester:assertalmosteq(logt:std(), std, tolerance, 'tolerance is wrong')\nend\n\nfunction torch.test(tests)\n   torch.setheaptracking(true)\n   math.randomseed(os.time())\n   if torch.getdefaulttensortype() == 'torch.FloatTensor' then\n      precision = 1e-4\n   elseif  torch.getdefaulttensortype() == 'torch.DoubleTensor' then\n      precision = 1e-8\n   end\n   mytester = torch.Tester()\n   mytester:add(torchtest)\n   mytester:run(tests)\n   return mytester\nend\n"
  },
  {
    "path": "test/test_Multinomial.lua",
    "content": "-- Test multinomial for rare events (based on https://github.com/torch/torch7/issues/418)\n-- and for performance (cf. https://github.com/torch/torch7/issues/453)\n\nsys.tic()\ndo\n   local p = torch.FloatTensor(1001000):fill(1)\n   p:narrow(1, 50001, 50000):fill(1e-3)\n   p:div(p:sum())\n   local N = 1001000\n\n   local n = 0\n   local c = torch.LongTensor(p:nElement()):zero()\n   local c_ptr = c:data() - 1\n   local tmp = torch.LongTensor()\n   for i = 1, 100 do\n      p.multinomial(tmp, p, N, true);\n      n = n + N\n      tmp:apply(function(i) c_ptr[i] = c_ptr[i] + 1 end)\n   end\n\n   local actual = c:narrow(1, 50001, 50000):sum()\n   local expected = n*p:narrow(1, 50001, 50000):sum()\n   print('Actual, Expected: ', actual, expected)\nend\nprint('Time spent: ', sys.toc())\n"
  },
  {
    "path": "test/test_Tester.lua",
    "content": "require 'torch'\n\nlocal tester = torch.Tester()\n\nlocal MESSAGE = \"a really useful informative error message\"\n\nlocal subtester = torch.Tester()\n-- The message only interests us in case of failure\nsubtester._success = function(self) return true, MESSAGE end\nsubtester._failure = function(self, message) return false, message end\n\nlocal tests = torch.TestSuite()\n\nlocal test_name_passed_to_setUp\nlocal calls_to_setUp = 0\nlocal calls_to_tearDown = 0\n\nlocal originalIoWrite = io.write\nlocal function disableIoWrite()\n   io.write = function() end\nend\nlocal function enableIoWrite()\n   io.write = originalIoWrite\nend\n\nlocal function meta_assert_success(success, message)\n   tester:assert(success == true, \"assert wasn't successful\")\n   tester:assert(string.find(message, MESSAGE) ~= nil, \"message doesn't match\")\nend\nlocal function meta_assert_failure(success, message)\n   tester:assert(success == false, \"assert didn't fail\")\n   tester:assert(string.find(message, MESSAGE) ~= nil, \"message doesn't match\")\nend\n\nfunction tests.really_test_assert()\n   assert((subtester:assert(true, MESSAGE)),\n          \"subtester:assert doesn't actually work!\")\n   assert(not (subtester:assert(false, MESSAGE)),\n          \"subtester:assert doesn't actually work!\")\nend\n\nfunction tests.setEarlyAbort()\n   disableIoWrite()\n\n   for _, earlyAbort in ipairs{false, true} do\n      local myTester = torch.Tester()\n\n      local invokedCount = 0\n      local myTests = {}\n      function myTests.t1()\n         invokedCount = invokedCount + 1\n         myTester:assert(false)\n      end\n      myTests.t2 = myTests.t1\n\n      myTester:setEarlyAbort(earlyAbort)\n      myTester:add(myTests)\n      pcall(myTester.run, myTester)\n\n      tester:assert(invokedCount == (earlyAbort and 1 or 2),\n                    \"wrong number of tests invoked for use with earlyAbort\")\n   end\n\n   enableIoWrite()\nend\n\nfunction tests.setRethrowErrors()\n   disableIoWrite()\n\n   local myTester = torch.Tester()\n   myTester:setRethrowErrors(true)\n   myTester:add(function() error(\"a throw\") end)\n\n   tester:assertErrorPattern(function() myTester:run() end,\n                             \"a throw\",\n                             \"error should be rethrown\")\n\n   enableIoWrite()\nend\n\nfunction tests.disable()\n   disableIoWrite()\n\n   for disableCount = 1, 2 do\n      local myTester = torch.Tester()\n      local tests = {}\n      local test1Invoked = false\n      local test2Invoked = false\n      function tests.test1()\n         test1Invoked = true\n      end\n      function tests.test2()\n         test2Invoked = true\n      end\n      myTester:add(tests)\n\n      if disableCount == 1 then\n         myTester:disable('test1'):run()\n         tester:assert((not test1Invoked) and test2Invoked,\n                       \"disabled test shouldn't have been invoked\")\n      else\n         myTester:disable({'test1', 'test2'}):run()\n         tester:assert((not test1Invoked) and (not test2Invoked),\n                       \"disabled tests shouldn't have been invoked\")\n      end\n   end\n\n   enableIoWrite()\nend\n\nfunction tests.assert()\n   meta_assert_success(subtester:assert(true, MESSAGE))\n   meta_assert_failure(subtester:assert(false, MESSAGE))\nend\n\nlocal function testEqNe(eqExpected, ...)\n   if eqExpected then\n      meta_assert_success(subtester:eq(...))\n      meta_assert_failure(subtester:ne(...))\n   else\n      meta_assert_failure(subtester:eq(...))\n      meta_assert_success(subtester:ne(...))\n   end\nend\n\n--[[ Test :assertGeneralEq and :assertGeneralNe (also known as :eq and :ne).\n\nNote that in-depth testing of testing of many specific types of data (such as\nTensor) is covered below, when we test specific functions (such as\n:assertTensorEq). This just does a general check, as well as testing of testing\nof mixed datatypes.\n]]\nfunction tests.assertGeneral()\n   local one = torch.Tensor{1}\n\n   testEqNe(true, one, one, MESSAGE)\n   testEqNe(false, one, 1, MESSAGE)\n   testEqNe(true, \"hi\", \"hi\", MESSAGE)\n   testEqNe(true, {one, 1}, {one, 1}, MESSAGE)\n   testEqNe(true, {{{one}}}, {{{one}}}, MESSAGE)\n   testEqNe(false, {{{one}}}, {{one}}, MESSAGE)\n   testEqNe(true, torch.Storage{1}, torch.Storage{1}, MESSAGE)\n   testEqNe(false, torch.FloatStorage{1}, torch.LongStorage{1}, MESSAGE)\n   testEqNe(false, torch.Storage{1}, torch.Storage{1, 2}, MESSAGE)\n   testEqNe(false, \"one\", 1, MESSAGE)\n   testEqNe(false, {one}, {one + torch.Tensor{1e-10}}, MESSAGE)\n   testEqNe(true, {one}, {one + torch.Tensor{1e-10}}, 1e-9, MESSAGE)\nend\n\nfunction tests.assertlt()\n   meta_assert_success(subtester:assertlt(1, 2, MESSAGE))\n   meta_assert_failure(subtester:assertlt(2, 1, MESSAGE))\n   meta_assert_failure(subtester:assertlt(1, 1, MESSAGE))\nend\n\nfunction tests.assertgt()\n   meta_assert_success(subtester:assertgt(2, 1, MESSAGE))\n   meta_assert_failure(subtester:assertgt(1, 2, MESSAGE))\n   meta_assert_failure(subtester:assertgt(1, 1, MESSAGE))\nend\n\nfunction tests.assertle()\n   meta_assert_success(subtester:assertle(1, 2, MESSAGE))\n   meta_assert_failure(subtester:assertle(2, 1, MESSAGE))\n   meta_assert_success(subtester:assertle(1, 1, MESSAGE))\nend\n\nfunction tests.assertge()\n   meta_assert_success(subtester:assertge(2, 1, MESSAGE))\n   meta_assert_failure(subtester:assertge(1, 2, MESSAGE))\n   meta_assert_success(subtester:assertge(1, 1, MESSAGE))\nend\n\nfunction tests.asserteq()\n   meta_assert_success(subtester:asserteq(1, 1, MESSAGE))\n   meta_assert_failure(subtester:asserteq(1, 2, MESSAGE))\nend\n\nfunction tests.assertalmosteq()\n   meta_assert_success(subtester:assertalmosteq(1, 1, MESSAGE))\n   meta_assert_success(subtester:assertalmosteq(1, 1 + 1e-17, MESSAGE))\n   meta_assert_success(subtester:assertalmosteq(1, 2, 2, MESSAGE))\n   meta_assert_failure(subtester:assertalmosteq(1, 2, MESSAGE))\n   meta_assert_failure(subtester:assertalmosteq(1, 3, 1, MESSAGE))\nend\n\nfunction tests.assertne()\n   meta_assert_success(subtester:assertne(1, 2, MESSAGE))\n   meta_assert_failure(subtester:assertne(1, 1, MESSAGE))\nend\n\n-- The `alsoTestEq` flag is provided to test :eq in addition to :assertTensorEq.\n-- The behaviour of the two isn't always the same due to handling of tensors of\n-- different dimensions but the same number of elements.\nlocal function testTensorEqNe(eqExpected, alsoTestEq, ...)\n   if eqExpected then\n      meta_assert_success(subtester:assertTensorEq(...))\n      meta_assert_failure(subtester:assertTensorNe(...))\n      if alsoTestEq then\n         meta_assert_success(subtester:eq(...))\n         meta_assert_failure(subtester:ne(...))\n      end\n   else\n      meta_assert_failure(subtester:assertTensorEq(...))\n      meta_assert_success(subtester:assertTensorNe(...))\n      if alsoTestEq then\n         meta_assert_failure(subtester:eq(...))\n         meta_assert_success(subtester:ne(...))\n      end\n   end\nend\n\nfunction tests.assertTensor_types()\n   local allTypes = {\n         torch.ByteTensor,\n         torch.CharTensor,\n         torch.ShortTensor,\n         torch.IntTensor,\n         torch.LongTensor,\n         torch.FloatTensor,\n         torch.DoubleTensor,\n   }\n   for _, tensor1 in ipairs(allTypes) do\n      for _, tensor2 in ipairs(allTypes) do\n         local t1 = tensor1():ones(10)\n         local t2 = tensor2():ones(10)\n         testTensorEqNe(tensor1 == tensor2, true, t1, t2, 1e-6, MESSAGE)\n      end\n   end\n\n   testTensorEqNe(false, true, torch.FloatTensor(), torch.LongTensor(), MESSAGE)\nend\n\nfunction tests.assertTensor_sizes()\n   local t = torch.Tensor() -- no dimensions\n   local t2 = torch.ones(2)\n   local t3 = torch.ones(3)\n   local t12 = torch.ones(1, 2)\n   assert(subtester._assertTensorEqIgnoresDims == true) -- default state\n   testTensorEqNe(false, false, t, t2, 1e-6, MESSAGE)\n   testTensorEqNe(false, false, t, t3, 1e-6, MESSAGE)\n   testTensorEqNe(false, false, t, t12, 1e-6, MESSAGE)\n   testTensorEqNe(false, false, t2, t3, 1e-6, MESSAGE)\n   testTensorEqNe(true, false, t2, t12, 1e-6, MESSAGE)\n   testTensorEqNe(false, false, t3, t12, 1e-6, MESSAGE)\n   subtester._assertTensorEqIgnoresDims = false\n   testTensorEqNe(false, true, t, t2, 1e-6, MESSAGE)\n   testTensorEqNe(false, true, t, t3, 1e-6, MESSAGE)\n   testTensorEqNe(false, true, t, t12, 1e-6, MESSAGE)\n   testTensorEqNe(false, true, t2, t3, 1e-6, MESSAGE)\n   testTensorEqNe(false, true, t2, t12, 1e-6, MESSAGE)\n   testTensorEqNe(false, true, t3, t12, 1e-6, MESSAGE)\n   subtester._assertTensorEqIgnoresDims = true -- reset back\nend\n\nfunction tests.assertTensor_epsilon()\n   local t1 = torch.rand(100, 100)\n   local t2 = torch.rand(100, 100) * 1e-5\n   local t3 = t1 + t2\n   testTensorEqNe(true, true, t1, t3, 1e-4, MESSAGE)\n   testTensorEqNe(false, true, t1, t3, 1e-6, MESSAGE)\nend\n\nfunction tests.assertTensor_arg()\n   local one = torch.Tensor{1}\n\n   tester:assertErrorPattern(\n         function() subtester:assertTensorEq(one, 2) end,\n         \"Second argument should be a Tensor\")\n\n   -- Test that assertTensorEq support message and tolerance in either ordering\n   tester:assertNoError(\n         function() subtester:assertTensorEq(one, one, 0.1, MESSAGE) end)\n   tester:assertNoError(\n         function() subtester:assertTensorEq(one, one, MESSAGE, 0.1) end)\nend\n\nfunction tests.assertTensor()\n   local t1 = torch.randn(100, 100)\n   local t2 = t1:clone()\n   local t3 = torch.randn(100, 100)\n   testTensorEqNe(true, true, t1, t2, 1e-6, MESSAGE)\n   testTensorEqNe(false, true, t1, t3, 1e-6, MESSAGE)\n   testTensorEqNe(true, true, torch.Tensor(), torch.Tensor(), MESSAGE)\nend\n\n-- Check that calling assertTensorEq with two tensors with the same content but\n-- different dimensions gives a warning.\nfunction tests.assertTensorDimWarning()\n   local myTester = torch.Tester()\n   myTester:add(\n       function()\n          myTester:assertTensorEq(torch.Tensor{{1}}, torch.Tensor{1})\n       end)\n\n   local warningGiven = false\n   io.write = function(s)\n      if string.match(s, 'but different dimensions') then\n         warningGiven = true\n      end\n   end\n\n   myTester:run()\n   enableIoWrite()\n\n   tester:assert(warningGiven,\n                 \"Calling :assertTensorEq({{1}}, {1}) should give a warning\")\nend\n\nlocal function testTableEqNe(eqExpected, ...)\n   if eqExpected then\n      meta_assert_success(subtester:assertTableEq(...))\n      meta_assert_failure(subtester:assertTableNe(...))\n      meta_assert_success(subtester:eq(...))\n      meta_assert_failure(subtester:ne(...))\n   else\n      meta_assert_failure(subtester:assertTableEq(...))\n      meta_assert_success(subtester:assertTableNe(...))\n      meta_assert_failure(subtester:eq(...))\n      meta_assert_success(subtester:ne(...))\n   end\nend\n\nfunction tests.assertTable()\n   testTableEqNe(true, {1, 2, 3}, {1, 2, 3}, MESSAGE)\n   testTableEqNe(false, {1, 2, 3}, {3, 2, 1}, MESSAGE)\n   testTableEqNe(true, {1, 2, {4, 5}}, {1, 2, {4, 5}}, MESSAGE)\n   testTableEqNe(false, {1, 2, 3}, {1,2}, MESSAGE)\n   testTableEqNe(false, {1, 2, 3}, {1, 2, 3, 4}, MESSAGE)\n   testTableEqNe(true, {{1}}, {{1}}, MESSAGE)\n   testTableEqNe(false, {{1}}, {{{1}}}, MESSAGE)\n   testTableEqNe(true, {false}, {false}, MESSAGE)\n   testTableEqNe(false, {true}, {false}, MESSAGE)\n   testTableEqNe(false, {false}, {true}, MESSAGE)\n\n   local tensor = torch.rand(100, 100)\n   local t1 = {1, \"a\", key = \"value\", tensor = tensor, subtable = {\"nested\"}}\n   local t2 = {1, \"a\", key = \"value\", tensor = tensor, subtable = {\"nested\"}}\n   testTableEqNe(true, t1, t2, MESSAGE)\n   for k, v in pairs(t1) do\n      local x = \"something else\"\n      t2[k] = nil\n      t2[x] = v\n      testTableEqNe(false, t1, t2, MESSAGE)\n      t2[x] = nil\n      t2[k] = x\n      testTableEqNe(false, t1, t2, MESSAGE)\n      t2[k] = v\n      testTableEqNe(true, t1, t2, MESSAGE)\n   end\nend\n\nlocal function good_fn() end\nlocal function bad_fn() error(\"muahaha!\") end\n\nfunction tests.assertError()\n   meta_assert_success(subtester:assertError(bad_fn, MESSAGE))\n   meta_assert_failure(subtester:assertError(good_fn, MESSAGE))\nend\n\nfunction tests.assertNoError()\n   meta_assert_success(subtester:assertNoError(good_fn, MESSAGE))\n   meta_assert_failure(subtester:assertNoError(bad_fn, MESSAGE))\nend\n\nfunction tests.assertErrorPattern()\n   meta_assert_success(subtester:assertErrorPattern(bad_fn, \"haha\", MESSAGE))\n   meta_assert_failure(subtester:assertErrorPattern(bad_fn, \"hehe\", MESSAGE))\nend\n\nfunction tests.testSuite_duplicateTests()\n   local function createDuplicateTests()\n      local tests = torch.TestSuite()\n      function tests.testThis() end\n      function tests.testThis() end\n   end\n   tester:assertErrorPattern(createDuplicateTests,\n                             \"Test testThis is already defined.\")\nend\n\n--[[ Returns a Tester with `numSuccess` success cases, `numFailure` failure\n  cases, and with an error if `hasError` is true.\n  Success and fail tests are evaluated with tester:eq\n]]\nlocal function genDummyTest(numSuccess, numFailure, hasError)\n   hasError = hasError or false\n\n   local dummyTester = torch.Tester()\n   local dummyTests = torch.TestSuite()\n\n   if numSuccess > 0 then\n      function dummyTests.testDummySuccess()\n         for i = 1, numSuccess do\n           dummyTester:eq({1}, {1}, '', 0)\n         end\n      end\n   end\n\n   if numFailure > 0 then\n      function dummyTests.testDummyFailure()\n         for i = 1, numFailure do\n            dummyTester:eq({1}, {2}, '', 0)\n         end\n      end\n   end\n\n   if hasError then\n      function dummyTests.testDummyError()\n         error('dummy error')\n      end\n   end\n\n   return dummyTester:add(dummyTests)\nend\n\nfunction tests.runStatusAndAssertCounts()\n   local emptyTest      = genDummyTest(0, 0, false)\n   local sucTest        = genDummyTest(1, 0, false)\n   local multSucTest    = genDummyTest(4, 0, false)\n   local failTest       = genDummyTest(0, 1, false)\n   local errTest        = genDummyTest(0, 0, true)\n   local errFailTest    = genDummyTest(0, 1, true)\n   local errSucTest     = genDummyTest(1, 0, true)\n   local failSucTest    = genDummyTest(1, 1, false)\n   local failSucErrTest = genDummyTest(1, 1, true)\n\n   disableIoWrite()\n\n   local success, msg = pcall(emptyTest.run, emptyTest)\n   tester:asserteq(success, true, \"pcall should succeed for empty tests\")\n\n   local success, msg = pcall(sucTest.run, sucTest)\n   tester:asserteq(success, true, \"pcall should succeed for 1 successful test\")\n\n   local success, msg = pcall(multSucTest.run, multSucTest)\n   tester:asserteq(success, true,\n                   \"pcall should succeed for 2+ successful tests\")\n\n   local success, msg = pcall(failTest.run, failTest)\n   tester:asserteq(success, false, \"pcall should fail for tests with failure\")\n\n   local success, msg = pcall(errTest.run, errTest)\n   tester:asserteq(success, false, \"pcall should fail for tests with error\")\n\n   local success, msg = pcall(errFailTest.run, errFailTest)\n   tester:asserteq(success, false, \"pcall should fail for error+fail tests\")\n\n   local success, msg = pcall(errSucTest.run, errSucTest)\n   tester:asserteq(success, false, \"pcall should fail for error+success tests\")\n\n   local success, msg = pcall(failSucTest.run, failSucTest)\n   tester:asserteq(success, false, \"pcall should fail for fail+success tests\")\n\n   local success, msg = pcall(failSucErrTest.run, failSucErrTest)\n   tester:asserteq(success, false,\n                   \"pcall should fail for fail+success+err test\")\n\n   enableIoWrite()\n\n   tester:asserteq(emptyTest.countasserts, 0,\n                   \"emptyTest should have 0 asserts\")\n   tester:asserteq(sucTest.countasserts, 1, \"sucTest should have 1 assert\")\n   tester:asserteq(multSucTest.countasserts, 4,\n                   \"multSucTest should have 4 asserts\")\n   tester:asserteq(failTest.countasserts, 1, \"failTest should have 1 assert\")\n   tester:asserteq(errTest.countasserts, 0, \"errTest should have 0 asserts\")\n   tester:asserteq(errFailTest.countasserts, 1,\n                   \"errFailTest should have 1 assert\")\n   tester:asserteq(errSucTest.countasserts, 1,\n                   \"errSucTest should have 0 asserts\")\n   tester:asserteq(failSucTest.countasserts, 2,\n                   \"failSucTest should have 2 asserts\")\nend\n\nfunction tests.checkNestedTestsForbidden()\n   disableIoWrite()\n\n   local myTester = torch.Tester()\n   local myTests = {{function() end}}\n   tester:assertErrorPattern(function() myTester:add(myTests) end,\n                             \"Nested sets\",\n                             \"tester should forbid adding nested test sets\")\n\n   enableIoWrite()\nend\n\nfunction tests.checkWarningOnAssertObject()\n   -- This test checks that calling assert with an object generates a warning\n   local myTester = torch.Tester()\n   local myTests = {}\n   function myTests.assertAbuse()\n      myTester:assert({})\n   end\n   myTester:add(myTests)\n\n   local warningGiven = false\n   io.write = function(s)\n      if string.match(s, 'should only be used for boolean') then\n         warningGiven = true\n      end\n   end\n\n   myTester:run()\n   enableIoWrite()\n\n   tester:assert(warningGiven, \"Should warn on calling :assert(object)\")\nend\n\nfunction tests.checkWarningOnAssertNeObject()\n   -- This test checks that calling assertne with two objects generates warning\n   local myTester = torch.Tester()\n   local myTests = {}\n   function myTests.assertAbuse()\n      myTester:assertne({}, {})\n   end\n   myTester:add(myTests)\n\n   local warningGiven = false\n   io.write = function(s)\n      if string.match(s, 'assertne should only be used to compare basic') then\n         warningGiven = true\n      end\n   end\n\n   myTester:run()\n   enableIoWrite()\n\n   tester:assert(warningGiven, \"Should warn on calling :assertne(obj, obj)\")\nend\n\nfunction tests.checkWarningOnExtraAssertArguments()\n   -- This test checks that calling assert with extra args gives a lua error\n   local myTester = torch.Tester()\n   local myTests = {}\n   function myTests.assertAbuse()\n      myTester:assert(true, \"some message\", \"extra argument\")\n   end\n   myTester:add(myTests)\n\n   local errorGiven = false\n   io.write = function(s)\n      if string.match(s, 'Unexpected arguments') then\n         errorGiven = true\n      end\n   end\n   tester:assertError(function() myTester:run() end)\n   enableIoWrite()\n\n   tester:assert(errorGiven, \":assert should fail on extra arguments\")\nend\n\nfunction tests.checkWarningOnUsingTable()\n   -- Checks that if we don't use a TestSuite then gives a warning\n   local myTester = torch.Tester()\n   local myTests = {}\n   myTester:add(myTests)\n\n   local errorGiven = false\n   io.write = function(s)\n      if string.match(s, 'use TestSuite rather than plain lua table') then\n         errorGiven = true\n      end\n   end\n   myTester:run()\n\n   enableIoWrite()\n   tester:assert(errorGiven, \"Using a plain lua table for testsuite should warn\")\nend\n\nfunction tests.checkMaxAllowedSetUpAndTearDown()\n   -- Checks can have at most 1 set-up and at most 1 tear-down function\n   local function f() end\n   local myTester = torch.Tester()\n\n   for _, name in ipairs({'_setUp', '_tearDown'}) do\n      tester:assertNoError(function() myTester:add(f, name) end,\n                           \"Adding 1 set-up / tear-down should be fine\")\n      tester:assertErrorPattern(function() myTester:add(f, name) end,\n                                \"Only one\",\n                                \"Adding second set-up / tear-down should fail\")\n   end\nend\n\nfunction tests.test_setUp()\n   tester:asserteq(test_name_passed_to_setUp, 'test_setUp')\n   for key, value in pairs(tester.tests) do\n      tester:assertne(key, '_setUp')\n   end\nend\n\nfunction tests.test_tearDown()\n   for key, value in pairs(tester.tests) do\n      tester:assertne(key, '_tearDown')\n   end\nend\n\nfunction tests._setUp(name)\n   test_name_passed_to_setUp = name\n   calls_to_setUp = calls_to_setUp + 1\nend\n\nfunction tests._tearDown(name)\n   calls_to_tearDown = calls_to_tearDown + 1\nend\n\ntester:add(tests):run()\n\n-- Additional tests to check that _setUp and _tearDown were called.\nlocal test_count = 0\nfor _ in pairs(tester.tests) do\n   test_count = test_count + 1\nend\nlocal postTests = torch.TestSuite()\nlocal postTester = torch.Tester()\n\nfunction postTests.test_setUp(tester)\n   postTester:asserteq(calls_to_setUp, test_count,\n                       \"Expected \" .. test_count .. \" calls to _setUp\")\nend\n\nfunction postTests.test_tearDown()\n   postTester:asserteq(calls_to_tearDown, test_count,\n                      \"Expected \" .. test_count .. \" calls to _tearDown\")\nend\n\npostTester:add(postTests):run()\n"
  },
  {
    "path": "test/test_aliasMultinomial.lua",
    "content": "local tester = torch.Tester()\n\n\nlocal function aliasMultinomial()\n   local n_class = 10000\n   local probs = torch.Tensor(n_class):uniform(0,1)\n   probs:div(probs:sum())\n   local a = torch.Timer()\n   local state = torch.multinomialAliasSetup(probs)\n   print(\"AliasMultinomial setup in \"..a:time().real..\" seconds(hot)\")\n   a:reset()\n   state = torch.multinomialAliasSetup(probs, state)\n   print(\"AliasMultinomial setup in \"..a:time().real..\" seconds(cold)\")\n   a:reset()\n   \n   tester:assert(state[1]:min() >= 0, \"Index =\"..state[1]:min()..\"alias indices has an index below or equal to 0\")\n   tester:assert(state[1]:max() <= n_class, state[1]:max()..\" alias indices has an index exceeding num_class\")\n   local output = torch.LongTensor(1000000)\n   torch.multinomialAlias(output, state)\n   local n_samples = output:nElement()\n   print(\"AliasMultinomial draw \"..n_samples..\" elements from \"..n_class..\" classes \"..\"in \"..a:time().real..\" seconds\")\n   local counts = torch.Tensor(n_class):zero()\n   mult_output = torch.multinomial(probs, n_samples, true)\n   print(\"Multinomial draw \"..n_samples..\" elements from \"..n_class..\" classes \"..\" in \"..a:time().real..\" seconds\")\n   tester:assert(output:min() > 0, \"sampled indices has an index below or equal to 0\")\n   tester:assert(output:max() <= n_class, \"indices has an index exceeding num_class\")\n   output:apply(function(x)\n         counts[x] = counts[x] + 1\n   end)\n   a:reset()\n   \n   counts:div(counts:sum())\n   \n   tester:assert(state[1]:min() >= 0, \"Index =\"..state[1]:min()..\"alias indices has an index below or equal to 0\")\n   tester:assert(state[1]:max() <= n_class, state[1]:max()..\" alias indices has an index exceeding num_class\")\n   tester:eq(probs, counts, 0.001, \"probs and counts should be approximately equal\")\nend\n\ntester:add(aliasMultinomial)\ntester:run()\n"
  },
  {
    "path": "test/test_half.lua",
    "content": "local mytester\nlocal torchtest = torch.TestSuite()\n\n-- Lua 5.2 compatibility\nlocal loadstring = loadstring or load\nlocal unpack = unpack or table.unpack\n\nfunction torchtest.easy()\n   local x=torch.randn(5, 6):half()\n   mytester:assert(x:isContiguous(), 'x should be contiguous')\n   mytester:assert(x:dim() == 2, 'x should have dim of 2')\n   mytester:assert(x:nDimension() == 2, 'x should have nDimension of 2')\n   mytester:assert(x:nElement() == 5 * 6, 'x should have 30 elements')\n   local stride = x:stride()\n   local expectedStride = torch.LongStorage{6,1}\n   for i=1,stride:size() do\n      mytester:assert(stride[i] == expectedStride[i], \"stride is wrong\")\n   end\n\n   x=x:t()\n   mytester:assert(not x:isContiguous(), 'x transpose should not be contiguous')\n   x=x:transpose(1,2)\n   mytester:assert(x:isContiguous(), 'x should be contiguous after 2 transposes')\n\n   local y=torch.HalfTensor()\n   y:resizeAs(x:t()):copy(x:t())\n   mytester:assert(x:isContiguous(), 'after resize and copy, x should be contiguous')\n   mytester:assertTensorEq(y, x:t(), 0.001, 'copy broken after resizeAs')\n   local z=torch.HalfTensor()\n   z:resize(6, 5):copy(x:t())\n   mytester:assertTensorEq(y, x:t(), 0.001, 'copy broken after resize')\nend\n\nfunction torchtest.narrowSub()\n   local x = torch.randn(5, 6):half()\n   local narrow = x:narrow(1, 2, 3)\n   local sub = x:sub(2, 4)\n   mytester:assertTensorEq(narrow, sub, 0.001, 'narrow not equal to sub')\nend\n\nfunction torchtest.selectClone()\n   local x = torch.zeros(5, 6)\n   x:select(1,2):fill(2)\n   x=x:half()\n   local y=x:clone()\n   mytester:assertTensorEq(x, y, 0.001, 'not equal after select and clone')\n   x:select(1,1):fill(3)\n   mytester:assert(y[1][1] == 0, 'clone broken')\nend\n\ntorch.setheaptracking(true)\nmath.randomseed(os.time())\nmytester = torch.Tester()\nmytester:add(torchtest)\nmytester:run(tests)\n"
  },
  {
    "path": "test/test_qr.lua",
    "content": "-- This file contains tests for the QR decomposition functions in torch:\n-- torch.qr(), torch.geqrf() and torch.orgqr().\nlocal torch = require 'torch'\nlocal tester = torch.Tester()\nlocal tests = torch.TestSuite()\n\n-- torch.qr() with result tensors given.\nlocal function qrInPlace(tensorFunc)\n  return function(x)\n    local q, r = tensorFunc(), tensorFunc()\n    torch.qr(q, r, x:clone())\n    return q, r\n  end\nend\n\n-- torch.qr() without result tensors given.\nlocal function qrReturned(tensorFunc)\n  return function(x)\n    return torch.qr(x:clone())\n  end\nend\n\n-- torch.geqrf() with result tensors given.\nlocal function geqrfInPlace(tensorFunc)\n  return function(x)\n    local result = tensorFunc()\n    local tau = tensorFunc()\n    local result_, tau_ = torch.geqrf(result, tau, x)\n    assert(torch.pointer(result) == torch.pointer(result_),\n           'expected result, result_ same tensor')\n    assert(torch.pointer(tau) == torch.pointer(tau_),\n           'expected tau, tau_ same tensor')\n    return result_, tau_\n  end\nend\n\n-- torch.orgqr() with result tensors given.\nlocal function orgqrInPlace(tensorFunc)\n  return function(result, tau)\n    local q = tensorFunc()\n    local q_ = torch.orgqr(q, result, tau)\n    assert(torch.pointer(q) == torch.pointer(q_), 'expected q, q_ same tensor')\n    return q\n  end\nend\n\n-- Test a custom QR routine that calls the LAPACK functions manually.\nlocal function qrManual(geqrfFunc, orgqrFunc)\n  return function(x)\n    local m = x:size(1)\n    local n = x:size(2)\n    local k = math.min(m, n)\n    local result, tau = geqrfFunc(x)\n    assert(result:size(1) == m)\n    assert(result:size(2) == n)\n    assert(tau:size(1) == k)\n    local r = torch.triu(result:narrow(1, 1, k))\n    local q = orgqrFunc(result, tau)\n    return q:narrow(2, 1, k), r\n  end\nend\n\n-- Check that Q multiplied with a matrix with ormqr gives the correct result\nlocal function checkQM(testOpts, mat1, mat2)\n  local q, r = torch.qr(mat1)\n  local m, tau = torch.geqrf(mat1)\n  local requiredPrecision = 1e-5\n  tester:assertTensorEq(torch.mm(q, mat2), torch.ormqr(m, tau, mat2),\n                        requiredPrecision)\n  tester:assertTensorEq(torch.mm(mat2, q), torch.ormqr(m, tau, mat2, 'R'),\n                        requiredPrecision)\n  tester:assertTensorEq(torch.mm(q:t(), mat2),\n                        torch.ormqr(m, tau, mat2, 'L', 'T'), requiredPrecision)\n  tester:assertTensorEq(torch.mm(mat2, q:t()),\n                        torch.ormqr(m, tau, mat2, 'R', 'T'), requiredPrecision)\nend\n\n-- Check that the given `q`, `r` matrices are a valid QR decomposition of `a`.\nlocal function checkQR(testOpts, a, q, r)\n  local qrFunc = testOpts.qr\n  if not q then\n    q, r = qrFunc(a)\n  end\n  local k = math.min(a:size(1), a:size(2))\n  tester:asserteq(q:size(1), a:size(1), \"Bad size for q first dimension.\")\n  tester:asserteq(q:size(2), k, \"Bad size for q second dimension.\")\n  tester:asserteq(r:size(1), k, \"Bad size for r first dimension.\")\n  tester:asserteq(r:size(2), a:size(2), \"Bad size for r second dimension.\")\n  tester:assertTensorEq(q:t() * q,\n                        torch.eye(q:size(2)):typeAs(testOpts.tensorFunc()),\n                        testOpts.precision,\n                        \"Q was not orthogonal\")\n  tester:assertTensorEq(r, r:triu(), testOpts.precision,\n                        \"R was not upper triangular\")\n  tester:assertTensorEq(q * r, a, testOpts.precision, \"QR = A\")\nend\n\n-- Do a QR decomposition of `a` and check that the result is valid and matches\n-- the given expected `q` and `r`.\nlocal function checkQRWithExpected(testOpts, a, expected_q, expected_r)\n  local qrFunc = testOpts.qr\n  -- Since the QR decomposition is unique only up to the signs of the rows of\n  -- R, we must ensure these are positive before doing the comparison.\n  local function canonicalize(q, r)\n      local d = r:diag():sign():diag()\n      return q * d, d * r\n  end\n  local q, r = qrFunc(a)\n  local q_canon, r_canon = canonicalize(q, r)\n  local expected_q_canon, expected_r_canon\n      = canonicalize(expected_q, expected_r)\n  tester:assertTensorEq(q_canon, expected_q_canon, testOpts.precision,\n                        \"Q did not match expected\")\n  tester:assertTensorEq(r_canon, expected_r_canon, testOpts.precision,\n                        \"R did not match expected\")\n  checkQR(testOpts, a, q, r)\nend\n\n-- Generate a separate test based on `func` for each of the possible\n-- combinations of tensor type (double or float) and QR function (torch.qr\n-- in-place, torch.qr, and manually calling the geqrf and orgqr from Lua\n-- (both in-place and not).\n--\n-- The tests are added to the given `tests` table, with names generated by\n-- appending a unique string for the specific combination to `name`.\n--\n-- If opts.doubleTensorOnly is true, then the FloatTensor versions of the test\n-- will be skipped.\nlocal function addTestVariations(tests, name, func, opts)\n  opts = opts or {}\n  local tensorTypes = {\n      [torch.DoubleTensor] = 1e-12,\n      [torch.FloatTensor] = 1e-5,\n  }\n  for tensorFunc, requiredPrecision in pairs(tensorTypes) do\n    local qrFuncs = {\n        ['inPlace'] = qrInPlace(tensorFunc),\n        ['returned'] = qrReturned(tensorFunc),\n        ['manualInPlace'] = qrManual(geqrfInPlace(tensorFunc),\n                                     orgqrInPlace(tensorFunc)),\n        ['manualReturned'] = qrManual(torch.geqrf, torch.orgqr)\n    }\n    for qrName, qrFunc in pairs(qrFuncs) do\n      local testOpts = {\n          tensorFunc=tensorFunc,\n          precision=requiredPrecision,\n          qr=qrFunc,\n      }\n      local tensorType = tensorFunc():type()\n      local fullName = name .. \"_\" .. qrName .. \"_\" .. tensorType\n      assert(not tests[fullName])\n      if tensorType == 'torch.DoubleTensor' or not opts.doubleTensorOnly then\n        tests[fullName] = function()\n          local state = torch.getRNGState()\n          torch.manualSeed(1)\n          func(testOpts)\n          torch.setRNGState(state)\n        end\n      end\n    end\n  end\nend\n\n-- Decomposing a specific square matrix.\naddTestVariations(tests, 'qrSquare', function(testOpts)\n  return function(testOpts)\n    local tensorFunc = testOpts.tensorFunc\n    local a = tensorFunc{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}\n    local expected_q = tensorFunc{\n        {-1.230914909793328e-01,  9.045340337332914e-01,\n         4.082482904638621e-01},\n        {-4.923659639173310e-01,  3.015113445777629e-01,\n         -8.164965809277264e-01},\n        {-8.616404368553292e-01, -3.015113445777631e-01,\n         4.082482904638634e-01},\n    }\n    local expected_r = tensorFunc{\n        {-8.124038404635959e+00, -9.601136296387955e+00,\n         -1.107823418813995e+01},\n        { 0.000000000000000e+00,  9.045340337332926e-01,\n         1.809068067466585e+00},\n        { 0.000000000000000e+00,  0.000000000000000e+00,\n         -8.881784197001252e-16},\n    }\n    checkQRWithExpected(testOpts, a,  expected_q, expected_r)\n  end\nend, {doubleTensorOnly=true})\n\n-- Decomposing a specific (wide) rectangular matrix.\naddTestVariations(tests, 'qrRectFat', function(testOpts)\n  -- The matrix is chosen to be full-rank.\n  local a = testOpts.tensorFunc{\n      {1,  2,  3,  4},\n      {5,  6,  7,  8},\n      {9, 10, 11, 13}\n  }\n  local expected_q = testOpts.tensorFunc{\n      {-0.0966736489045663,  0.907737593658436 ,  0.4082482904638653},\n      {-0.4833682445228317,  0.3157348151855452, -0.8164965809277254},\n      {-0.870062840141097 , -0.2762679632873518,  0.4082482904638621}\n  }\n  local expected_r = testOpts.tensorFunc{\n      { -1.0344080432788603e+01,  -1.1794185166357092e+01,\n        -1.3244289899925587e+01,  -1.5564457473635180e+01},\n      {  0.0000000000000000e+00,   9.4720444555662542e-01,\n         1.8944088911132546e+00,   2.5653453733825331e+00},\n      {  0.0000000000000000e+00,   0.0000000000000000e+00,\n         1.5543122344752192e-15,   4.0824829046386757e-01}\n  }\n  checkQRWithExpected(testOpts, a, expected_q, expected_r)\nend, {doubleTensorOnly=true})\n\n-- Decomposing a specific (thin) rectangular matrix.\naddTestVariations(tests, 'qrRectThin', function(testOpts)\n  -- The matrix is chosen to be full-rank.\n  local a = testOpts.tensorFunc{\n      { 1,  2,  3},\n      { 4,  5,  6},\n      { 7,  8,  9},\n      {10, 11, 13},\n  }\n  local expected_q = testOpts.tensorFunc{\n      {-0.0776150525706334, -0.833052161400748 ,  0.3651483716701106},\n      {-0.3104602102825332, -0.4512365874254053, -0.1825741858350556},\n      {-0.5433053679944331, -0.0694210134500621, -0.7302967433402217},\n      {-0.7761505257063329,  0.3123945605252804,  0.5477225575051663}\n  }\n  local expected_r = testOpts.tensorFunc{\n      {-12.8840987267251261, -14.5916298832790581, -17.0753115655393231},\n      {  0,                  -1.0413152017509357,  -1.770235842976589 },\n      {  0,                   0,                    0.5477225575051664}\n  }\n  checkQRWithExpected(testOpts, a, expected_q, expected_r)\nend, {doubleTensorOnly=true})\n\n-- Decomposing a sequence of medium-sized random matrices.\naddTestVariations(tests, 'randomMediumQR', function(testOpts)\n  for x = 0, 10 do\n    for y = 0, 10 do\n      local m = math.pow(2, x)\n      local n = math.pow(2, y)\n      local x = torch.rand(m, n)\n      checkQR(testOpts, x:typeAs(testOpts.tensorFunc()))\n    end\n  end\nend)\n\n-- Decomposing a sequence of small random matrices.\naddTestVariations(tests, 'randomSmallQR', function(testOpts)\n  for m = 1, 40 do\n    for n = 1, 40 do\n      checkQR(testOpts, torch.rand(m, n):typeAs(testOpts.tensorFunc()))\n    end\n  end\nend)\n\n-- Decomposing a sequence of small matrices that are not contiguous in memory.\naddTestVariations(tests, 'randomNonContiguous', function(testOpts)\n  for m = 2, 40 do\n    for n = 2, 40 do\n      local x = torch.rand(m, n):t()\n      tester:assert(not x:isContiguous(), \"x should not be contiguous\")\n      checkQR(testOpts, x:typeAs(testOpts.tensorFunc()))\n    end\n  end\nend)\n\nfunction tests.testQM()\n  checkQM({}, torch.randn(10, 10), torch.randn(10, 10))\n  -- checkQM({}, torch.randn(20, 10), torch.randn(20, 20))\nend\n\ntester:add(tests)\ntester:run()\n"
  },
  {
    "path": "test/test_sharedmem.lua",
    "content": "require 'torch'\nlocal ffi = require 'ffi'\n\nlocal tester = torch.Tester()\nlocal tests = torch.TestSuite()\n\nlocal function createSharedMemStorage(name, size, storageType)\n  local storageType = storageType or 'FloatStorage'\n  local shmName = name or os.tmpname():gsub('/','_')\n  local isShared = true\n  local isSharedMem = true\n  local nElements = size or torch.random(10000, 20000)\n  local storage = torch[storageType](shmName, isShared, nElements, isSharedMem)\n  return storage, shmName\nend\n\nlocal function shmFilePath(shmName)\n  return (ffi.os ~= 'Windows' and '/dev/shm/' or '') .. shmName\nend\n\nlocal function removeShmFile(shmFileName)\n  if ffi.os == 'Windows' then\n    os.remove(shmFileName)\n  end\nend\n\nfunction tests.createSharedMemFile()\n  local storage, shmName = createSharedMemStorage()\n  local shmFileName = shmFilePath(shmName)\n\n  -- check that file is at /dev/shm\n  tester:assert(paths.filep(shmFileName),\n                'Shared memory file exists')\n\n  -- collect storage and make sure that file is gone\n  storage = nil\n  collectgarbage()\n  collectgarbage()\n  removeShmFile(shmFileName)\n  tester:assert(not paths.filep(shmFileName),\n                'Shared memory file does not exists')\nend\n\nfunction tests.checkContents()\n  local storage, shmName = createSharedMemStorage()\n  local shmFileName = shmFilePath(shmName)\n  local tensor = torch.FloatTensor(storage, 1, torch.LongStorage{storage:size()})\n  tensor:copy(torch.rand(storage:size()))\n\n  local sharedFile = torch.DiskFile(shmFileName, 'r'):binary()\n  for i = 1, storage:size() do\n    tester:assert(sharedFile:readFloat() == storage[i], 'value is not correct')\n  end\n  sharedFile:close()\n  removeShmFile(shmFileName)\nend\n\nfunction tests.testSharing()\n  -- since we are going to cast numbers into double (lua default)\n  -- we specifically generate double storage\n  local storage, shmName = createSharedMemStorage(nil, nil, 'DoubleStorage')\n  local shmFileName = shmFilePath(shmName)\n  local tensor = torch.DoubleTensor(storage, 1, torch.LongStorage{storage:size()})\n  tensor:copy(torch.rand(storage:size()))\n  local tensorCopy = tensor.new():resizeAs(tensor):copy(tensor)\n\n  -- access the same shared memory file as regular mapping from same process\n  local storage2 = torch.DoubleStorage(shmFileName, true, storage:size())\n  local tensor2 = torch.DoubleTensor(storage2, 1,\n                                     torch.LongStorage{storage2:size()})\n  local tensor2Copy = tensor2.new():resizeAs(tensor2):copy(tensor2)\n\n  tester:assertTensorEq(tensorCopy, tensor2Copy, 0, 'contents don\\'t match')\n\n  -- fill tensor 1 with a random value and read from 2\n  local rval = torch.uniform()\n  tensor:fill(rval)\n  for i = 1, tensor2:size(1) do\n    tester:asserteq(tensor2[i], rval, 'content is wrong')\n  end\n\n  -- fill tensor 2 with a random value and read from 1\n  local rval = torch.uniform()\n  tensor2:fill(rval)\n  for i = 1, tensor:size(1) do\n    tester:asserteq(tensor[i], rval, 'content is wrong')\n  end\n  removeShmFile(shmFileName)\nend\n\ntester:add(tests)\ntester:run()\n"
  },
  {
    "path": "test/test_timer.lua",
    "content": "require 'torch'\nlocal ffi = require 'ffi'\n\nlocal tester = torch.Tester()\nlocal tests = torch.TestSuite()\n\nfunction tests.timerTime()\n  local timer = torch.Timer()\n\n  local function wait(seconds)\n    if ffi.os == 'Windows' then\n        os.execute(string.format('ping 127.0.0.1 -n %d > nul', seconds + 1))\n    else\n        os.execute(string.format('sleep %d > nul', seconds))\n    end\n  end\n\n  timer:reset()\n  wait(1)\n  local passed_time = timer:time().real\n  tester:assert(passed_time < 1.1,\n               (\"Too long time passed: %.1f sec >= 1.1 sec\"):format(passed_time))\n  tester:assert(passed_time > 0.9,\n               (\"Too short time passed:  %.1f sec <= 0.9 sec\"):format(passed_time))\n\n  timer:stop()\n  wait(1)\n  passed_time = timer:time().real\n  tester:assert(passed_time < 1.1,\n               (\"Too long time passed: %.1f sec >= 1.1 sec\"):format(passed_time))\n  tester:assert(passed_time > 0.9,\n               (\"Too short time passed:  %.1f sec <= 0.9 sec\"):format(passed_time))\n\n  timer:resume()\n  wait(1)\n  passed_time = timer:time().real\n  tester:assert(passed_time < 2.2,\n               (\"Too long time passed: %.1f sec >= 2.2 sec\"):format(passed_time))\n  tester:assert(passed_time > 1.8,\n               (\"Too short time passed:  %.1f sec <= 1.8 sec\"):format(passed_time))\n\n  timer:reset()\n  wait(1)\n  passed_time = timer:time().real\n  tester:assert(passed_time < 1.1,\n               (\"Too long time passed: %.1f sec >= 1.1 sec\"):format(passed_time))\n  tester:assert(passed_time > 0.9,\n               (\"Too short time passed:  %.1f sec <= 0.9 sec\"):format(passed_time))\nend\n\ntester:add(tests)\ntester:run()\n"
  },
  {
    "path": "test/test_writeObject.lua",
    "content": "require 'torch'\n\nlocal myTester = torch.Tester()\n\nlocal tests = torch.TestSuite()\n\nfunction torch.HalfTensor:norm()\n   return self:real():norm()\nend\n\n-- checks that an object can be written and unwritten\n-- returns false if an error occurs\nlocal function serializeAndDeserialize(obj)\n   local file = torch.MemoryFile()\n   file:binary()\n   local ok, msg = pcall (file.writeObject, file, obj)\n   myTester:assert(ok, 'error in writing an object'  )\n   file:seek(1)\n   local ok, copy = pcall(file.readObject, file)\n   if not ok then print(copy) end\n   myTester:assert(ok, 'error in reading an object ')\n   return copy\nend\n\nfunction tests.test_can_write_a_nil_closure()\n  local a\n  local function closure()\n    if not a then return 1 end\n    return 0\n  end\n\n  local copyClosure = serializeAndDeserialize(closure)\n  myTester:assert(copyClosure() == closure(), 'the closures should give same output')\nend\n\nfunction tests.test_nil_upvalues_in_closure()\n  local a = 1\n  local b\n  local c = 2\n  local function closure()\n    if not b then return c end\n    return a\n  end\n\n  local copyClosure = serializeAndDeserialize(closure)\n  myTester:assert(copyClosure() == closure(), 'the closures should give same output')\nend\n\nfunction tests.test_global_function_in_closure()\n  local x = \"5\"\n  local function closure(str)\n    return tonumber(str .. x)\n  end\n\n  local copyClosure = serializeAndDeserialize(closure)\n  myTester:assert(copyClosure(\"3\") == closure(\"3\"), 'the closures should give same output')\nend\n\nfunction tests.test_a_recursive_closure()\n  local foo\n\n  foo = function (level)\n    if level == 1 then return 1 end\n    return 1+foo(level-1)\n  end\n\n  local copyFoo = serializeAndDeserialize(foo)\n  myTester:assert(copyFoo(42) == foo(42), 'the closures should give same output')\nend\n\nfunction tests.test_a_tensor()\n   for k,v in ipairs({\"real\", \"half\"}) do\n      tests_test_a_tensor(torch.getmetatable(torch.Tensor():type())[v])\n   end\nend\n\nfunction tests_test_a_tensor(func)\n   local x = func(torch.rand(5, 10))\n   local xcopy = serializeAndDeserialize(x)\n   myTester:assert(x:norm() == xcopy:norm(), 'tensors should be the same')\nend\n\n-- Regression test for bug reported in issue 456.\nfunction tests.test_empty_table()\n   local file = torch.MemoryFile()\n   file:writeObject({})\nend\n\nfunction tests.test_error_msg()\n   local torch = torch\n   local inner = {\n       baz = function(a) torch.somefunc() end\n   }\n   local outer = {\n       theinner = inner\n   }\n   local function evil_func()\n      outer.prop = 1\n      image.compress(1)\n   end\n   local ok, msg = pcall(torch.save, 'saved.t7', evil_func)\n   myTester:assert(not ok)\n   myTester:assert(msg:find('at <%?>%.outer%.theinner%.baz%.torch') ~= nil)\nend\n\nfunction tests.test_warning_msg()\n  local foo = {}\n  torch.class('Bar', foo)\n\n  local obj = foo.Bar()\n  local tensor = torch.Tensor()\n  obj.data = tensor:cdata() -- pick something NOT writable\n\n  local file = torch.MemoryFile('rw'):binary()\n  local ok, _ = pcall(torch.File.writeObject, file, obj)\n  -- only a warning is printed on STDOUT:\n  --   $ Warning: cannot write object field <data> of <Bar> <?>\n  myTester:assert(ok)\n  file:close()\nend\n\nfunction tests.test_referenced()\n   local file = torch.MemoryFile('rw'):binary()\n   file:referenced(false)\n\n   local foo = 'bar'\n   file:writeObject(foo)\n   file:close()\nend\n\nfunction tests.test_shared_upvalues()\n  if debug.upvalueid then\n     local i=1\n     local j=2\n\n     local func = {}\n\n     func.increment = function()\n        i=i+1\n        j=j+2\n     end\n     func.get_i = function()\n        return i\n     end\n     func.get_j = function()\n        return j\n     end\n\n     local copyFunc = serializeAndDeserialize(func)\n     myTester:assert(copyFunc.get_i()==1)\n     myTester:assert(copyFunc.get_j()==2)\n     copyFunc.increment()\n     myTester:assert(copyFunc.get_i()==2)\n     myTester:assert(copyFunc.get_j()==4)\n  else\n     print('Not running shared upvalues test, as we are in Lua-5.1')\n  end\nend\n\n\n-- checks that the hook function works properly\n-- returns false if an error occurs\nfunction tests.test_SerializationHook()\n   -- Simpel uuid implementation from [https://gist.github.com/jrus/3197011]\n   -- The only goal is to aoid collisions within the scope of tests,\n   -- so more than enough.\n   local random = math.random\n   local function uuid()\n       local template ='xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'\n       return string.gsub(template, '[xy]', function (c)\n           local v = (c == 'x') and random(0, 0xf) or random(8, 0xb)\n           return string.format('%x', v)\n       end)\n   end\n   local unique1 = uuid()\n   local unique2 = uuid()\n   local class = {}\n   -- Create 2 classes\n   local spec = torch.class('class.'.. unique1, class)\n   function spec:test()\n      return false\n   end\n   local gen = torch.class('class.' .. unique2, class)\n   function gen:test()\n      return true\n   end\n   local hook = function(object)\n      local class = class\n      local newObject = object\n      if torch.typename(object) == 'class.'..unique1 then\n         newObject = class[unique2]()\n      end\n      return newObject\n   end\n\n   -- Write to 2 files, first without hooking,\n   -- second with hooking\n   local file = torch.MemoryFile('rw')\n   file:binary()\n   local file2 = torch.MemoryFile('rw')\n   file2:binary()\n   local s = class[unique1]()\n   local object = {s1 = s, v = 'test', g = class[unique2](), s2 = s}\n   file:writeObject(object)\n   file2:writeObject(object, nil, hook)\n\n   -- unregister class[unique1] and try to reload the first serialized object\n   if debug and debug.getregistry then\n      local ok, res = pcall(function() classTestSerializationHook1 = nil debug.getregistry()[classTestSerializationHook1] = nil file:seek(1) return file:readObject() end)\n      myTester:assert(not ok)\n   else\n      print('Not running serialization hook failure test because debug is missing.')\n   end\n\n   -- Try to reload the second serialized object\n   local ok, clone = pcall(function() file2:seek(1) return file2:readObject()  end)\n\n   -- Test that everything happened smoothly\n   myTester:assert(clone.v == 'test')\n   myTester:assert(torch.typename(clone.s1) == 'class.' .. unique2)\n   myTester:assert(clone.s1:test() and clone.s2:test())\n   myTester:assert(string.format('%x',torch.pointer(clone.s1)) == string.format('%x',torch.pointer(clone.s2)))\nend\n\nfunction tests.test_serializeToStorage()\n   torch.save(\"foo.t7\", \"foo\")\n   local f = io.open(\"foo.t7\", \"rb\")\n   local size = f:seek(\"end\")\n   f:close()\n   myTester:eq(\n      torch.serializeToStorage(\"foo\"):size(), size,\n      \"memory and disk serializations should have the same size\"\n   )\nend\n\nmyTester:add(tests)\nmyTester:run()\nif myTester.errors[1] then os.exit(1) end\n"
  },
  {
    "path": "test/timeSort.lua",
    "content": "-- gnuplot.figure(2)\n-- Test torch sort, show it suffers from the problems of quicksort\n-- i.e. complexity O(N^2) in worst-case of sorted list\nrequire 'gnuplot'\nlocal ffi = require 'ffi'\n\nlocal cmd = torch.CmdLine()\ncmd:option('-N', 10^7, 'Maximum array size')\ncmd:option('-p',  50, 'Number of points in logspace')\ncmd:option('-r', 20, 'Number of repetitions')\n\nlocal options = cmd:parse(arg or {})\nfunction main()\n    local log10 = math.log10 or function(x) return math.log(x, 10) end\n    local pow10 = torch.linspace(1,log10(options.N), options.p)\n    local num_sizes = options.p\n    local num_reps = options.r\n\n    local old_rnd = torch.zeros(num_sizes, num_reps)\n    local old_srt = torch.zeros(num_sizes, num_reps)\n    local old_cst = torch.zeros(num_sizes, num_reps)\n    local new_rnd = torch.zeros(num_sizes, num_reps)\n    local new_srt = torch.zeros(num_sizes, num_reps)\n    local new_cst = torch.zeros(num_sizes, num_reps)\n    local ratio_rnd = torch.zeros(num_sizes, num_reps)\n    local ratio_srt = torch.zeros(num_sizes, num_reps)\n    local ratio_cst = torch.zeros(num_sizes, num_reps)\n\n    -- Ascending sort uses new sort\n    local function time_sort(x)\n        collectgarbage()\n        local start = os.clock()\n        torch.sort(x,false)\n        return (os.clock()-start)\n    end\n\n    -- Descending sort uses old sort\n    local function time_old_sort(x)\n        collectgarbage()\n        local start = os.clock()\n        torch.sort(x,true)\n        return (os.clock()-start)\n    end\n\n    local benches = {\n        function(i,j,n)\n            -- on random\n            local input = torch.rand(n)\n            new_rnd[i][j] = time_sort(input:clone())\n            old_rnd[i][j] = time_old_sort(input:clone())\n        end,\n\n        function(i,j,n)\n            -- on sorted\n            new_srt[i][j] = time_sort(torch.linspace(0,1,n))\n            old_srt[i][j] = time_old_sort(torch.linspace(0,1,n):add(-1):mul(-1)) -- old_time is called on descending sort, hence the reversed input\n        end,\n\n        function(i,j,n)\n            -- on constant\n            new_cst[i][j] = time_sort(torch.zeros(n))\n            old_cst[i][j] = time_old_sort(torch.zeros(n))\n        end\n    }\n\n    local num_benches = #benches\n    local num_exps = num_sizes * num_benches * num_reps\n\n    -- Full randomization\n    local perm = torch.randperm(num_exps):long()\n    local perm_benches = torch.Tensor(num_exps)\n    local perm_reps = torch.Tensor(num_exps)\n    local perm_sizes = torch.Tensor(num_exps)\n\n    local l = 1\n    for i=1, num_sizes do\n        for j=1, num_reps do\n            for k=1, num_benches do\n                perm_benches[ perm[l] ] = k\n                perm_reps[ perm[l] ] = j\n                perm_sizes[ perm[l] ] = i\n                l = l+1\n            end\n        end\n    end\n\n    local pc = 0\n    for j = 1, num_exps do\n        local n = 10^pow10[perm_sizes[j]]\n    --    print(string.format('rep %d / %d, bench %d, size %d, rep %d\\n', j, num_exps, perm_benches[j], n, perm_reps[j]))\n        if math.floor(100*j/num_exps) > pc then\n            pc = math.floor(100*j/num_exps)\n            io.write('.')\n            if pc % 10 == 0 then\n                io.write(' ' .. pc .. '%\\n')\n             end\n            io.flush()\n        end\n        benches[perm_benches[j]](perm_sizes[j], perm_reps[j], n)\n    end\n\n    ratio_rnd = torch.cdiv(old_rnd:mean(2), new_rnd:mean(2))\n    ratio_srt = torch.cdiv(old_srt:mean(2), new_srt:mean(2))\n    ratio_cst = torch.cdiv(old_cst:mean(2), new_cst:mean(2))\n\n    local N = pow10:clone():apply(function(x) return 10^x end)\n\n    if ffi.os == 'Windows' then\n      gnuplot.setterm('windows')\n    else\n      gnuplot.setterm('x11')\n    end\n    gnuplot.figure(1)\n    gnuplot.raw('set log x; set mxtics 10')\n    gnuplot.raw('set grid mxtics mytics xtics ytics')\n    gnuplot.raw('set xrange [' .. N:min() .. ':' .. N:max() .. ']' )\n    gnuplot.plot({'Random - new', N, new_rnd:mean(2)},\n                 {'Sorted - new', N, new_srt:mean(2)},\n                 {'Constant - new', N, new_cst:mean(2)},\n                 {'Random - old', N, old_rnd:mean(2)},\n                 {'Sorted - old', N, old_srt:mean(2)},\n                 {'Constant - old', N, old_cst:mean(2)})\n    gnuplot.xlabel('N')\n    gnuplot.ylabel('Time (s)')\n    gnuplot.figprint('benchmarkTime.png')\n\n    gnuplot.figure(2)\n    gnuplot.raw('set log x; set mxtics 10')\n    gnuplot.raw('set grid mxtics mytics xtics ytics')\n    gnuplot.raw('set xrange [' .. N:min() .. ':' .. N:max() .. ']' )\n    gnuplot.plot({'Random', N, ratio_rnd:mean(2)},\n                 {'Sorted', N, ratio_srt:mean(2)},\n                 {'Constant', N, ratio_cst:mean(2)})\n    gnuplot.xlabel('N')\n    gnuplot.ylabel('Speed-up Factor (s)')\n    gnuplot.figprint('benchmarkRatio.png')\n\n    torch.save('benchmark.t7', {\n               new_rnd=new_rnd,\n               new_srt=new_srt,\n               new_cst=new_cst,\n               old_rnd=old_rnd,\n               old_srt=old_srt,\n               old_cst=old_cst,\n               ratio_rnd=ratio_rnd,\n               ratio_srt=ratio_srt,\n               ratio_cst=ratio_cst,\n               pow10 = pow10,\n               num_reps = num_reps\n           })\nend\n\nmain()\n"
  },
  {
    "path": "torchcwrap.lua",
    "content": "local wrap = require 'cwrap'\nlocal types = wrap.types\n\ntypes.Tensor = {\n\n   helpname = function(arg)\n                 if arg.dim then\n                    return string.format(\"Tensor~%dD\", arg.dim)\n                 else\n                    return \"Tensor\"\n                 end\n            end,\n\n   declare = function(arg)\n                local txt = {}\n                table.insert(txt, string.format(\"THTensor *arg%d = NULL;\", arg.i))\n                if arg.returned then\n                   table.insert(txt, string.format(\"int arg%d_idx = 0;\", arg.i));\n                end\n                return table.concat(txt, '\\n')\n           end,\n\n   check = function(arg, idx)\n              if arg.dim then\n                 return string.format(\"(arg%d = luaT_toudata(L, %d, torch_Tensor)) && (arg%d->nDimension == %d)\", arg.i, idx, arg.i, arg.dim)\n              else\n                 return string.format(\"(arg%d = luaT_toudata(L, %d, torch_Tensor))\", arg.i, idx)\n              end\n         end,\n\n   read = function(arg, idx)\n             if arg.returned then\n                return string.format(\"arg%d_idx = %d;\", arg.i, idx)\n             end\n          end,\n\n   init = function(arg)\n             if type(arg.default) == 'boolean' then\n                return string.format('arg%d = THTensor_(new)();', arg.i)\n             elseif type(arg.default) == 'number' then\n                return string.format('arg%d = %s;', arg.i, arg.args[arg.default]:carg())\n             else\n                error('unknown default tensor type value')\n             end\n          end,\n\n   carg = function(arg)\n             return string.format('arg%d', arg.i)\n          end,\n\n   creturn = function(arg)\n                return string.format('arg%d', arg.i)\n             end,\n\n   precall = function(arg)\n                local txt = {}\n                if arg.default and arg.returned then\n                   table.insert(txt, string.format('if(arg%d_idx)', arg.i)) -- means it was passed as arg\n                   table.insert(txt, string.format('lua_pushvalue(L, arg%d_idx);', arg.i))\n                   table.insert(txt, string.format('else'))\n                   if type(arg.default) == 'boolean' then -- boolean: we did a new()\n                      table.insert(txt, string.format('luaT_pushudata(L, arg%d, torch_Tensor);', arg.i))\n                   else  -- otherwise: point on default tensor --> retain\n                      table.insert(txt, string.format('{'))\n                      table.insert(txt, string.format('THTensor_(retain)(arg%d);', arg.i)) -- so we need a retain\n                      table.insert(txt, string.format('luaT_pushudata(L, arg%d, torch_Tensor);', arg.i))\n                      table.insert(txt, string.format('}'))\n                   end\n                elseif arg.default then\n                   -- we would have to deallocate the beast later if we did a new\n                   -- unlikely anyways, so i do not support it for now\n                   if type(arg.default) == 'boolean' then\n                      error('a tensor cannot be optional if not returned')\n                   end\n                elseif arg.returned then\n                   table.insert(txt, string.format('lua_pushvalue(L, arg%d_idx);', arg.i))\n                end\n                return table.concat(txt, '\\n')\n             end,\n\n   postcall = function(arg)\n                 local txt = {}\n                 if arg.creturned then\n                    -- this next line is actually debatable\n                    table.insert(txt, string.format('THTensor_(retain)(arg%d);', arg.i))\n                    table.insert(txt, string.format('luaT_pushudata(L, arg%d, torch_Tensor);', arg.i))\n                 end\n                 return table.concat(txt, '\\n')\n              end\n}\n\ntypes.Generator = {\n\n   helpname = function(arg)\n                 return \"Generator\"\n              end,\n\n   declare = function(arg)\n                return string.format(\"THGenerator *arg%d = NULL;\", arg.i)\n             end,\n\n   check = function(arg, idx)\n              return string.format(\"(arg%d = luaT_toudata(L, %d, torch_Generator))\", arg.i, idx)\n           end,\n\n   read = function(arg, idx)\n          end,\n\n   init = function(arg)\n             local text = {}\n             -- If no generator is supplied, pull the default out of the torch namespace.\n             table.insert(text, 'lua_getglobal(L,\"torch\");')\n             table.insert(text, string.format('arg%d = luaT_getfieldcheckudata(L, -1, \"_gen\", torch_Generator);', arg.i))\n             table.insert(text, 'lua_pop(L, 2);')\n             return table.concat(text, '\\n')\n          end,\n\n   carg = function(arg)\n             return string.format('arg%d', arg.i)\n          end,\n\n   creturn = function(arg)\n                return string.format('arg%d', arg.i)\n             end,\n\n   precall = function(arg)\n             end,\n\n   postcall = function(arg)\n              end\n}\n\ntypes.IndexTensor = {\n\n   helpname = function(arg)\n               return \"LongTensor\"\n            end,\n\n   declare = function(arg)\n                local txt = {}\n                table.insert(txt, string.format(\"THLongTensor *arg%d = NULL;\", arg.i))\n                if arg.returned then\n                   table.insert(txt, string.format(\"int arg%d_idx = 0;\", arg.i));\n                end\n                return table.concat(txt, '\\n')\n           end,\n\n   check = function(arg, idx)\n              return string.format('(arg%d = luaT_toudata(L, %d, \"torch.LongTensor\"))', arg.i, idx)\n           end,\n\n   read = function(arg, idx)\n             local txt = {}\n             if not arg.noreadadd then\n                table.insert(txt, string.format(\"THLongTensor_add(arg%d, arg%d, -1);\", arg.i, arg.i));\n             end\n             if arg.returned then\n                table.insert(txt, string.format(\"arg%d_idx = %d;\", arg.i, idx))\n             end\n             return table.concat(txt, '\\n')\n          end,\n\n   init = function(arg)\n             return string.format('arg%d = THLongTensor_new();', arg.i)\n          end,\n\n   carg = function(arg)\n             return string.format('arg%d', arg.i)\n          end,\n\n   creturn = function(arg)\n                return string.format('arg%d', arg.i)\n             end,\n\n   precall = function(arg)\n                local txt = {}\n                if arg.default and arg.returned then\n                   table.insert(txt, string.format('if(arg%d_idx)', arg.i)) -- means it was passed as arg\n                   table.insert(txt, string.format('lua_pushvalue(L, arg%d_idx);', arg.i))\n                   table.insert(txt, string.format('else')) -- means we did a new()\n                   table.insert(txt, string.format('luaT_pushudata(L, arg%d, \"torch.LongTensor\");', arg.i))\n                elseif arg.default then\n                   error('a tensor cannot be optional if not returned')\n                elseif arg.returned then\n                   table.insert(txt, string.format('lua_pushvalue(L, arg%d_idx);', arg.i))\n                end\n                return table.concat(txt, '\\n')\n             end,\n\n   postcall = function(arg)\n                 local txt = {}\n                 if arg.creturned or arg.returned then\n                    table.insert(txt, string.format(\"THLongTensor_add(arg%d, arg%d, 1);\", arg.i, arg.i));\n                 end\n                 if arg.creturned then\n                    -- this next line is actually debatable\n                    table.insert(txt, string.format('THLongTensor_retain(arg%d);', arg.i))\n                    table.insert(txt, string.format('luaT_pushudata(L, arg%d, \"torch.LongTensor\");', arg.i))\n                 end\n                 return table.concat(txt, '\\n')\n              end\n}\n\nfor _,typename in ipairs({\"ByteTensor\", \"CharTensor\", \"ShortTensor\", \"IntTensor\", \"LongTensor\",\n                          \"FloatTensor\", \"HalfTensor\", \"DoubleTensor\"}) do\n\n   types[typename] = {\n\n      helpname = function(arg)\n                    if arg.dim then\n                       return string.format('%s~%dD', typename, arg.dim)\n                    else\n                       return typename\n                    end\n                 end,\n\n      declare = function(arg)\n                   local txt = {}\n                   table.insert(txt, string.format(\"TH%s *arg%d = NULL;\", typename, arg.i))\n                   if arg.returned then\n                      table.insert(txt, string.format(\"int arg%d_idx = 0;\", arg.i));\n                   end\n                   return table.concat(txt, '\\n')\n                end,\n\n      check = function(arg, idx)\n                 if arg.dim then\n                    return string.format('(arg%d = luaT_toudata(L, %d, \"torch.%s\")) && (arg%d->nDimension == %d)', arg.i, idx, typename, arg.i, arg.dim)\n                 else\n                    return string.format('(arg%d = luaT_toudata(L, %d, \"torch.%s\"))', arg.i, idx, typename)\n                 end\n              end,\n\n      read = function(arg, idx)\n                if arg.returned then\n                   return string.format(\"arg%d_idx = %d;\", arg.i, idx)\n                end\n             end,\n\n      init = function(arg)\n                if type(arg.default) == 'boolean' then\n                   return string.format('arg%d = TH%s_new();', arg.i, typename)\n                elseif type(arg.default) == 'number' then\n                   return string.format('arg%d = %s;', arg.i, arg.args[arg.default]:carg())\n                else\n                   error('unknown default tensor type value')\n                end\n             end,\n\n      carg = function(arg)\n                return string.format('arg%d', arg.i)\n             end,\n\n      creturn = function(arg)\n                   return string.format('arg%d', arg.i)\n             end,\n\n      precall = function(arg)\n                   local txt = {}\n                   if arg.default and arg.returned then\n                      table.insert(txt, string.format('if(arg%d_idx)', arg.i)) -- means it was passed as arg\n                      table.insert(txt, string.format('lua_pushvalue(L, arg%d_idx);', arg.i))\n                      table.insert(txt, string.format('else'))\n                      if type(arg.default) == 'boolean' then -- boolean: we did a new()\n                         table.insert(txt, string.format('luaT_pushudata(L, arg%d, \"torch.%s\");', arg.i, typename))\n                      else  -- otherwise: point on default tensor --> retain\n                         table.insert(txt, string.format('{'))\n                         table.insert(txt, string.format('TH%s_retain(arg%d);', typename, arg.i)) -- so we need a retain\n                         table.insert(txt, string.format('luaT_pushudata(L, arg%d, \"torch.%s\");', arg.i, typename))\n                         table.insert(txt, string.format('}'))\n                      end\n                   elseif arg.default then\n                      -- we would have to deallocate the beast later if we did a new\n                      -- unlikely anyways, so i do not support it for now\n                      if type(arg.default) == 'boolean' then\n                         error('a tensor cannot be optional if not returned')\n                      end\n                   elseif arg.returned then\n                      table.insert(txt, string.format('lua_pushvalue(L, arg%d_idx);', arg.i))\n                   end\n                   return table.concat(txt, '\\n')\n                end,\n\n      postcall = function(arg)\n                    local txt = {}\n                    if arg.creturned then\n                       -- this next line is actually debatable\n                       table.insert(txt, string.format('TH%s_retain(arg%d);', typename, arg.i))\n                       table.insert(txt, string.format('luaT_pushudata(L, arg%d, \"torch.%s\");', arg.i, typename))\n                    end\n                    return table.concat(txt, '\\n')\n                 end\n   }\n\n   types[typename .. 'Array'] = {\n\n      helpname = function(arg)\n                    return string.format('{%s+}', typename)\n               end,\n\n      declare = function(arg)\n                   local txt = {}\n                   table.insert(txt, string.format('TH%s **arg%d_data = NULL;', typename, arg.i))\n                   table.insert(txt, string.format('long arg%d_size = 0;', arg.i))\n                   table.insert(txt, string.format('int arg%d_i = 0;', arg.i))\n                   return table.concat(txt, '\\n')\n              end,\n\n      check = function(arg, idx)\n                 return string.format('torch_isnonemptytable(L, %d)', idx)\n            end,\n\n      read = function(arg, idx)\n                local txt = {}\n                -- Iterate over the array to find its length, leave elements on stack.\n                table.insert(txt, string.format('do'))\n                table.insert(txt, string.format('{'))\n                table.insert(txt, string.format('  arg%d_size++;', arg.i))\n                table.insert(txt, string.format('  lua_checkstack(L, 1);'))\n                table.insert(txt, string.format('  lua_rawgeti(L, %d, arg%d_size);', idx, arg.i))\n                table.insert(txt, string.format('}'))\n                table.insert(txt, string.format('while (!lua_isnil(L, -1));'))\n                table.insert(txt, string.format('arg%d_size--;', arg.i))\n                -- Pop nil element from stack.\n                table.insert(txt, string.format('lua_pop(L, 1);'))\n                -- Allocate tensor pointers and read values from stack backwards.\n                table.insert(txt, string.format('arg%d_data = (TH%s**)THAlloc(arg%d_size * sizeof(TH%s*));', arg.i, typename, arg.i, typename))\n                table.insert(txt, string.format('for (arg%d_i = arg%d_size - 1; arg%d_i >= 0; arg%d_i--)', arg.i, arg.i, arg.i, arg.i))\n                table.insert(txt, string.format('{'))\n                table.insert(txt, string.format('  if (!(arg%d_data[arg%d_i] = luaT_toudata(L, -1, \"torch.%s\")))', arg.i, arg.i, typename))\n                table.insert(txt, string.format('    luaL_error(L, \"expected %s in tensor array\");', typename))\n                table.insert(txt, string.format('  lua_pop(L, 1);'))\n                table.insert(txt, string.format('}'))\n                table.insert(txt, string.format(''))\n                return table.concat(txt, '\\n')\n             end,\n\n      init = function(arg)\n             end,\n\n      carg = function(arg)\n                return string.format('arg%d_data,arg%d_size', arg.i, arg.i)\n             end,\n\n      creturn = function(arg)\n                   error('TensorArray cannot be returned.')\n                end,\n\n      precall = function(arg)\n                end,\n\n      postcall = function(arg)\n                    return string.format('THFree(arg%d_data);', arg.i)\n                 end\n   }\nend\n\ntypes.LongArg = {\n\n   vararg = true,\n\n   helpname = function(arg)\n               return \"(LongStorage | dim1 [dim2...])\"\n            end,\n\n   declare = function(arg)\n              return string.format(\"THLongStorage *arg%d = NULL;\", arg.i)\n           end,\n\n   init = function(arg)\n             if arg.default then\n                error('LongArg cannot have a default value')\n             end\n          end,\n\n   check = function(arg, idx)\n            return string.format(\"torch_islongargs(L, %d)\", idx)\n         end,\n\n   read = function(arg, idx)\n             return string.format(\"arg%d = torch_checklongargs(L, %d);\", arg.i, idx)\n          end,\n\n   carg = function(arg, idx)\n             return string.format('arg%d', arg.i)\n          end,\n\n   creturn = function(arg, idx)\n                return string.format('arg%d', arg.i)\n             end,\n\n   precall = function(arg)\n                local txt = {}\n                if arg.returned then\n                   table.insert(txt, string.format('luaT_pushudata(L, arg%d, \"torch.LongStorage\");', arg.i))\n                end\n                return table.concat(txt, '\\n')\n             end,\n\n   postcall = function(arg)\n                 local txt = {}\n                 if arg.creturned then\n                    -- this next line is actually debatable\n                    table.insert(txt, string.format('THLongStorage_retain(arg%d);', arg.i))\n                    table.insert(txt, string.format('luaT_pushudata(L, arg%d, \"torch.LongStorage\");', arg.i))\n                 end\n                 if not arg.returned and not arg.creturned then\n                    table.insert(txt, string.format('THLongStorage_free(arg%d);', arg.i))\n                 end\n                 return table.concat(txt, '\\n')\n              end\n}\n\ntypes.charoption = {\n\n   helpname = function(arg)\n                 if arg.values then\n                    return \"(\" .. table.concat(arg.values, '|') .. \")\"\n                 end\n              end,\n\n   declare = function(arg)\n                local txt = {}\n                table.insert(txt, string.format(\"const char *arg%d = NULL;\", arg.i))\n                if arg.default then\n                   table.insert(txt, string.format(\"char arg%d_default = '%s';\", arg.i, arg.default))\n                end\n                return table.concat(txt, '\\n')\n           end,\n\n   init = function(arg)\n             return string.format(\"arg%d = &arg%d_default;\", arg.i, arg.i)\n          end,\n\n   check = function(arg, idx)\n              local txt = {}\n              local txtv = {}\n              table.insert(txt, string.format('(arg%d = lua_tostring(L, %d)) && (', arg.i, idx))\n              for _,value in ipairs(arg.values) do\n                 table.insert(txtv, string.format(\"*arg%d == '%s'\", arg.i, value))\n              end\n              table.insert(txt, table.concat(txtv, ' || '))\n              table.insert(txt, ')')\n              return table.concat(txt, '')\n         end,\n\n   read = function(arg, idx)\n          end,\n\n   carg = function(arg, idx)\n             return string.format('arg%d', arg.i)\n          end,\n\n   creturn = function(arg, idx)\n             end,\n\n   precall = function(arg)\n             end,\n\n   postcall = function(arg)\n              end\n}\n\nfor _,typename in ipairs({\"ptrdiff_t\", \"size_t\"}) do\n  types[typename] =  {\n\n  helpname = function(arg)\n                return typename\n             end,\n\n  declare = function(arg)\n               -- if it is a number we initialize here\n               local default = tonumber(tostring(arg.default)) or 0\n               return string.format(\"%s arg%d = %g;\", typename, arg.i, default)\n            end,\n\n  check = function(arg, idx)\n             return string.format(\"lua_isnumber(L, %d)\", idx)\n          end,\n\n  read = function(arg, idx)\n            return string.format(\"arg%d = (%s)lua_tonumber(L, %d);\", arg.i, typename, idx)\n         end,\n\n  init = function(arg)\n            -- otherwise do it here\n            if arg.default then\n               local default = tostring(arg.default)\n               if not tonumber(default) then\n                  return string.format(\"arg%d = %s;\", arg.i, default)\n               end\n            end\n         end,\n\n  carg = function(arg)\n            return string.format('arg%d', arg.i)\n         end,\n\n  creturn = function(arg)\n               return string.format('arg%d', arg.i)\n            end,\n\n  precall = function(arg)\n               if arg.returned then\n                  return string.format('lua_pushnumber(L, (lua_Number)arg%d);', arg.i)\n               end\n            end,\n\n  postcall = function(arg)\n                if arg.creturned then\n                   return string.format('lua_pushnumber(L, (lua_Number)arg%d);', arg.i)\n                end\n             end\n  }\nend\n"
  },
  {
    "path": "utils.c",
    "content": "#include \"general.h\"\n#include \"utils.h\"\n\n#ifdef WIN32\n# include <time.h>\n#else\n# include <sys/time.h>\n#endif\n\nTHLongStorage* torch_checklongargs(lua_State *L, int index)\n{\n  THLongStorage *storage;\n  int i;\n  int narg = lua_gettop(L)-index+1;\n\n  if(narg == 1 && luaT_toudata(L, index, \"torch.LongStorage\"))\n  {\n    THLongStorage *storagesrc = luaT_toudata(L, index, \"torch.LongStorage\");\n    storage = THLongStorage_newWithSize(storagesrc->size);\n    THLongStorage_copy(storage, storagesrc);\n  }\n  else\n  {\n    storage = THLongStorage_newWithSize(narg);\n    for(i = index; i < index+narg; i++)\n    {\n      if(!lua_isnumber(L, i))\n      {\n        THLongStorage_free(storage);\n        luaL_argerror(L, i, \"number expected\");\n      }\n      THLongStorage_set(storage, i-index, lua_tonumber(L, i));\n    }\n  }\n  return storage;\n}\n\nint torch_islongargs(lua_State *L, int index)\n{\n  int narg = lua_gettop(L)-index+1;\n\n  if(narg == 1 && luaT_toudata(L, index, \"torch.LongStorage\"))\n  {\n    return 1;\n  }\n  else\n  {\n    int i;\n\n    for(i = index; i < index+narg; i++)\n    {\n      if(!lua_isnumber(L, i))\n        return 0;\n    }\n    return 1;\n  }\n  return 0;\n}\n\n#ifdef _WIN32\n#include <windows.h>\n#include <io.h>\nstatic __declspec( thread ) LARGE_INTEGER ticksPerSecond = { 0 };\n#endif\n\nstatic int torch_isatty(lua_State *L)\n{\n  FILE **fp = (FILE **) luaL_checkudata(L, -1, LUA_FILEHANDLE);\n#ifdef _WIN32\n  lua_pushboolean(L, _isatty(_fileno(*fp)));\n#else\n  lua_pushboolean(L, isatty(fileno(*fp)));\n#endif\n  return 1;\n}\n\nstatic double real_time()\n{\n#ifdef _WIN32\n  if (ticksPerSecond.QuadPart == 0)\n  {\n    QueryPerformanceFrequency(&ticksPerSecond);\n  }\n  LARGE_INTEGER current;\n  QueryPerformanceCounter(&current);\n  return (double)(current.QuadPart) / ticksPerSecond.QuadPart;\n#else\n  struct timeval current;\n  gettimeofday(&current, NULL);\n  return (current.tv_sec + current.tv_usec/1000000.0);\n#endif\n}\n\nstatic int torch_lua_tic(lua_State* L)\n{\n  double ttime = real_time();\n  lua_pushnumber(L,ttime);\n  return 1;\n}\n\nstatic int torch_lua_toc(lua_State* L)\n{\n  double toctime = real_time();\n  lua_Number tictime = luaL_checknumber(L,1);\n  lua_pushnumber(L,toctime-tictime);\n  return 1;\n}\n\nstatic int torch_lua_getdefaulttensortype(lua_State *L)\n{\n  const char* tname = torch_getdefaulttensortype(L);\n  if(tname)\n  {\n    lua_pushstring(L, tname);\n    return 1;\n  }\n  return 0;\n}\n\nconst char* torch_getdefaulttensortype(lua_State *L)\n{\n  lua_getglobal(L, \"torch\");\n  if(lua_istable(L, -1))\n  {\n    lua_getfield(L, -1, \"Tensor\");\n    if(lua_istable(L, -1))\n    {\n      if(lua_getmetatable(L, -1))\n      {\n        lua_pushstring(L, \"__index\");\n        lua_rawget(L, -2);\n        if(lua_istable(L, -1))\n        {\n          lua_rawget(L, LUA_REGISTRYINDEX);\n          if(lua_isstring(L, -1))\n          {\n            const char *tname = lua_tostring(L, -1);\n            lua_pop(L, 4);\n            return tname;\n          }\n        }\n        else\n        {\n          lua_pop(L, 4);\n          return NULL;\n        }\n      }\n      else\n      {\n        lua_pop(L, 2);\n        return NULL;\n      }\n    }\n    else\n    {\n      lua_pop(L, 2);\n      return NULL;\n    }\n  }\n  else\n  {\n    lua_pop(L, 1);\n    return NULL;\n  }\n  return NULL;\n}\n\nstatic int torch_getnumthreads(lua_State *L)\n{\n  lua_pushinteger(L, THGetNumThreads());\n  return 1;\n}\n\nstatic int torch_setnumthreads(lua_State *L)\n{\n  THSetNumThreads(luaL_checkint(L, 1));\n  return 0;\n}\n\nstatic int torch_getnumcores(lua_State *L)\n{\n  lua_pushinteger(L, THGetNumCores());\n  return 1;\n}\n\nstatic void luaTorchGCFunction(void *data)\n{\n  lua_State *L = data;\n  lua_gc(L, LUA_GCCOLLECT, 0);\n}\n\nstatic int torch_setheaptracking(lua_State *L)\n{\n  int enabled = luaT_checkboolean(L,1);\n  lua_getglobal(L, \"torch\");\n  lua_pushboolean(L, enabled);\n  lua_setfield(L, -2, \"_heaptracking\");\n  if(enabled) {\n    THSetGCHandler(luaTorchGCFunction, L);\n  } else {\n    THSetGCHandler(NULL, NULL);\n  }\n  return 0;\n}\n\nstatic void luaTorchErrorHandlerFunction(const char *msg, void *data)\n{\n  lua_State *L = data;\n  luaL_error(L, msg);\n}\n\nstatic void luaTorchArgErrorHandlerFunction(int argNumber, const char *msg, void *data)\n{\n  lua_State *L = data;\n  luaL_argcheck(L, 0, argNumber, msg);\n}\n\nstatic int torch_updateerrorhandlers(lua_State *L)\n{\n  THSetErrorHandler(luaTorchErrorHandlerFunction, L);\n  THSetArgErrorHandler(luaTorchArgErrorHandlerFunction, L);\n  return 0;\n}\n\nstatic const struct luaL_Reg torch_utils__ [] = {\n  {\"getdefaulttensortype\", torch_lua_getdefaulttensortype},\n  {\"isatty\", torch_isatty},\n  {\"tic\", torch_lua_tic},\n  {\"toc\", torch_lua_toc},\n  {\"setnumthreads\", torch_setnumthreads},\n  {\"getnumthreads\", torch_getnumthreads},\n  {\"getnumcores\", torch_getnumcores},\n  {\"factory\", luaT_lua_factory},\n  {\"getconstructortable\", luaT_lua_getconstructortable},\n  {\"typename\", luaT_lua_typename},\n  {\"isequal\", luaT_lua_isequal},\n  {\"getenv\", luaT_lua_getenv},\n  {\"setenv\", luaT_lua_setenv},\n  {\"newmetatable\", luaT_lua_newmetatable},\n  {\"setmetatable\", luaT_lua_setmetatable},\n  {\"getmetatable\", luaT_lua_getmetatable},\n  {\"metatype\", luaT_lua_metatype},\n  {\"pushudata\", luaT_lua_pushudata},\n  {\"version\", luaT_lua_version},\n  {\"pointer\", luaT_lua_pointer},\n  {\"setheaptracking\", torch_setheaptracking},\n  {\"updateerrorhandlers\", torch_updateerrorhandlers},\n  {NULL, NULL}\n};\n\nvoid torch_utils_init(lua_State *L)\n{\n  torch_updateerrorhandlers(L);\n  luaT_setfuncs(L, torch_utils__, 0);\n}\n"
  },
  {
    "path": "utils.h",
    "content": "#ifndef TORCH_UTILS_INC\n#define TORCH_UTILS_INC\n\n#include \"luaT.h\"\n#include \"TH.h\"\n\n#include <lua.h>\n#include <lualib.h>\n\n#ifdef _WIN32\n#else\n#include <unistd.h>\n#endif\n\n#ifdef __cplusplus\n# define TORCH_EXTERNC extern \"C\"\n#else\n# define TORCH_EXTERNC extern\n#endif\n\n#ifdef _WIN32\n# ifdef torch_EXPORTS\n#  define TORCH_API TORCH_EXTERNC __declspec(dllexport)\n# else\n#  define TORCH_API TORCH_EXTERNC __declspec(dllimport)\n# endif\n#else\n# define TORCH_API TORCH_EXTERNC\n#endif\n\n\nTORCH_API THLongStorage* torch_checklongargs(lua_State *L, int index);\nTORCH_API int torch_islongargs(lua_State *L, int index);\nTORCH_API const char* torch_getdefaulttensortype(lua_State *L);\n\n#endif\n"
  }
]