Full Code of chadmv/cvwrap for AI

master 5a0ee778ab11 cached
23 files
149.4 KB
40.2k tokens
78 symbols
1 requests
Download .txt
Repository: chadmv/cvwrap
Branch: master
Commit: 5a0ee778ab11
Files: 23
Total size: 149.4 KB

Directory structure:
gitextract_0vtif6cx/

├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── LICENSE
├── README.md
├── build.bat
├── module.txt
├── scripts/
│   ├── AEcvWrapTemplate.mel
│   └── cvwrap/
│       ├── __init__.py
│       ├── bindui.py
│       └── menu.py
└── src/
    ├── CMakeLists.txt
    ├── bindingio.cpp
    ├── bindingio.h
    ├── common.cpp
    ├── common.h
    ├── cvWrapCmd.cpp
    ├── cvWrapCmd.h
    ├── cvWrapDeformer.cpp
    ├── cvWrapDeformer.h
    ├── cvwrap.cl
    ├── cvwrap_pre2018.cl
    └── pluginMain.cpp

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================

#ignore thumbnails created by windows
Thumbs.db
#Ignore files build by Visual Studio
*.obj
*.exe
*.pdb
*.user
*.aps
*.pch
*.vspscc
*_i.c
*_p.c
*.ncb
*.suo
*.tlb
*.tlh
*.bak
*.cache
*.ilk
*.log
[Bb]in
[Dd]ebug*/
*.lib
*.sbr
obj/
[Rr]elease*/
_ReSharper*/
[Bb]uild*/
[Tt]est[Rr]esult*
.idea


================================================
FILE: .gitmodules
================================================
[submodule "cgcmake"]
	path = cgcmake
	url = git@github.com:chadmv/cgcmake.git


================================================
FILE: CMakeLists.txt
================================================
cmake_minimum_required(VERSION 2.6)
project(cvwrap)

set(PROJECT_PATH ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT})
set(CMAKE_INSTALL_PREFIX ${PROJECT_PATH})
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cgcmake/modules)

add_subdirectory(src)

configure_file("${CMAKE_CURRENT_SOURCE_DIR}/module.txt" "${PROJECT_PATH}/${PROJECT_NAME}.txt")

install(DIRECTORY scripts DESTINATION .)


================================================
FILE: LICENSE
================================================
The MIT License (MIT)

Copyright (c) 2015 Chad Vernon

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.



================================================
FILE: README.md
================================================
# cvwrap
A Maya wrap deformer that is faster than Maya's wrap deformer, can be rebounded, has a GPU implementation, and supports inverted front of chain blend shapes.

You can purchase a video series documenting the development of this plug-in from scratch at [CGCircuit](http://www.cgcircuit.com/course/creating-a-gpu-driven-wrap-deformer?affid=df9a2a33e2f653182abfd4cfc9b7159752671dde4280a13fc724d0c42b62d143c055f01d504ecc4d176537d5ce1994d0010d204a200f178091bf59c85380dbdc).

```python
sphere = cmds.polySphere(sx=10, sy=10)[0]
cube = cmds.polyCube(w=2.3, h=2.3, d=2.3, sx=5, sy=5, sz=5)[0]

# Create a new wrap
wrap_node = cmds.cvWrap(sphere, cube, name='wrapnode', radius=0.1)

# Rebind a vertex
cmds.select(['{0}.vtx[75]'.format(sphere)])
cmds.select(['{0}.{1}'.format(cube, faces) for faces in ['f[110:111]', 'f[115:116]']], add=True)
cmds.cvWrap(rb=wrap_node)

file_path = r'E:\My Documents\maya\projects\default\data\binding.wrap'
# Export the binding
cmds.cvWrap(wrap_node, ex=file_path)

# Recreate the wrap node with the adjusted binding
cmds.delete(wrap_node)
wrap_node = cmds.cvWrap(sphere, cube, name=wrap_node, b=file_path)

# Import the binding again
cmds.cvWrap(wrap_node, im=file_path)
```


================================================
FILE: build.bat
================================================
@echo off
FOR %%G IN (2019, 2020, 2022, 2023) DO (call :subroutine "%%G")
GOTO :eof

:subroutine
set builddir=build.%1
if not exist %builddir% goto BUILDENV
del %builddir% /S /Q
:BUILDENV
mkdir %builddir%
cd %builddir%
if %1 LSS "2020" (
    cmake -A x64 -T v140 -DMAYA_VERSION=%1 ../
) ELSE (
    cmake -A x64 -T v141 -DMAYA_VERSION=%1 ../
)
cmake --build . --target install --config Release
cd ..
goto :eof


================================================
FILE: module.txt
================================================
+ ${PROJECT_NAME} 1.0.0 ${PROJECT_PATH}


================================================
FILE: scripts/AEcvWrapTemplate.mel
================================================
global proc AEcvWrapTemplate(string $nodeName) {
    editorTemplate -beginScrollLayout;
        editorTemplate -beginLayout "cvWrap Attributes" -collapse false;
            editorTemplate -beginNoOptimize;
            editorTemplate -addControl "scale";
            editorTemplate -endNoOptimize;				

            editorTemplate -suppress "driver";
            editorTemplate -suppress "bindMesh";
            editorTemplate -suppress "bindData";
            editorTemplate -suppress "numTasks";
        editorTemplate -endLayout;

    AEweightGeometryFilterTemplate $nodeName;

    editorTemplate -addExtraControls;
    editorTemplate -endScrollLayout;
}


================================================
FILE: scripts/cvwrap/__init__.py
================================================


================================================
FILE: scripts/cvwrap/bindui.py
================================================
import maya.cmds as cmds
if cmds.about(api=True) >= 201700:
    from PySide2 import QtWidgets as QtGui
else:
    from PySide import QtGui
from functools import partial
from maya.app.general.mayaMixin import MayaQWidgetBaseMixin

_win = None
def show():
    global _win
    if _win == None:
        _win = BindingDialog()
    _win.show()


class BindingDialog(MayaQWidgetBaseMixin, QtGui.QDialog):

    def __init__(self, parent=None):
        super(BindingDialog, self).__init__(parent)
        self.resize(600, 200)
        self.setWindowTitle('cvWrap Rebind')
        vbox = QtGui.QVBoxLayout(self)

        label_width = 130

        hbox = QtGui.QHBoxLayout()
        vbox.addLayout(hbox)
        label = QtGui.QLabel('Components to rebind:')
        label.setSizePolicy(QtGui.QSizePolicy.Preferred, QtGui.QSizePolicy.Fixed)
        label.setMinimumWidth(label_width)
        label.setMaximumWidth(label_width)
        hbox.addWidget(label)
        self.components_to_rebind = QtGui.QLineEdit()
        self.components_to_rebind.textChanged.connect(self.populate_cvwrap_dropdown)
        hbox.addWidget(self.components_to_rebind)
        button = QtGui.QPushButton('Set Components')
        button.released.connect(partial(self.set_selected_text, widget=self.components_to_rebind))
        hbox.addWidget(button)

        hbox = QtGui.QHBoxLayout()
        vbox.addLayout(hbox)
        label = QtGui.QLabel('Faces to rebind to:')
        label.setSizePolicy(QtGui.QSizePolicy.Preferred, QtGui.QSizePolicy.Fixed)
        label.setMinimumWidth(label_width)
        label.setMaximumWidth(label_width)
        hbox.addWidget(label)
        self.target_faces = QtGui.QLineEdit()
        hbox.addWidget(self.target_faces)
        button = QtGui.QPushButton('Set Faces')
        button.released.connect(partial(self.set_selected_text, widget=self.target_faces))
        hbox.addWidget(button)

        hbox = QtGui.QHBoxLayout()
        vbox.addLayout(hbox)
        label = QtGui.QLabel('cvWrap node:')
        label.setSizePolicy(QtGui.QSizePolicy.Preferred, QtGui.QSizePolicy.Fixed)
        label.setMinimumWidth(label_width)
        label.setMaximumWidth(label_width)
        hbox.addWidget(label)
        self.cvwrap_combo = QtGui.QComboBox()
        hbox.addWidget(self.cvwrap_combo)

        hbox = QtGui.QHBoxLayout()
        vbox.addLayout(hbox)
        label = QtGui.QLabel('Sample radius:')
        label.setSizePolicy(QtGui.QSizePolicy.Preferred, QtGui.QSizePolicy.Fixed)
        label.setMinimumWidth(label_width)
        label.setMaximumWidth(label_width)
        hbox.addWidget(label)
        self.sample_radius = QtGui.QDoubleSpinBox()
        self.sample_radius.setValue(0.1)
        self.sample_radius.setRange(0, 100)
        self.sample_radius.setDecimals(2)
        self.sample_radius.setSingleStep(.1)
        hbox.addWidget(self.sample_radius)

        vbox.addStretch()

        hbox = QtGui.QHBoxLayout()
        vbox.addLayout(hbox)
        button = QtGui.QPushButton('Rebind')
        button.released.connect(self.rebind)
        hbox.addWidget(button)

    def set_selected_text(self, widget):
        sel = cmds.ls(sl=True)
        text = ' '.join(sel)
        widget.setText(text)

    def populate_cvwrap_dropdown(self, text):
        node = text.split()
        if not node:
            return
        node = node[0].split('.')
        if not node:
            return
        node = node[0]
        wrap_nodes = [x for x in cmds.listHistory(node, pdo=True) or []
                      if cmds.nodeType(x) == 'cvWrap']
        self.cvwrap_combo.clear()
        self.cvwrap_combo.addItems(wrap_nodes)

    def rebind(self):
        components = self.components_to_rebind.text().split()
        faces = self.target_faces.text().split()
        wrap_node = self.cvwrap_combo.currentText()
        radius = self.sample_radius.value()
        # Make sure the faces are actual faces.  If they are not, convert to faces.
        cmds.select(faces)
        cmds.ConvertSelectionToFaces()
        faces = cmds.ls(sl=True)

        cmds.select(components)
        cmds.ConvertSelectionToVertices()
        cmds.select(faces, add=True)
        cmds.cvWrap(rb=wrap_node, radius=radius)
        print('Rebounded vertices')



================================================
FILE: scripts/cvwrap/menu.py
================================================
import maya.cmds as cmds
import maya.mel as mel
import maya.OpenMayaUI as OpenMayaUI
import os
if cmds.about(api=True) >= 201700:
    from PySide2 import QtGui
else:
    from PySide import QtGui
import cvwrap.bindui

NAME_WIDGET = 'cvwrap_name'
RADIUS_WIDGET = 'cvwrap_radius'
NEW_BIND_MESH_WIDGET = 'cvwrap_newbindmesh'
BIND_FILE_WIDGET = 'cvwrap_bindfile'
MENU_ITEMS = []


def create_menuitems():
    global MENU_ITEMS
    if MENU_ITEMS:
        # Already created
        return
    if cmds.about(api=True) < 201600:
        cmds.warning('cvWrap menus only available in Maya 2016 and higher.')
        return
    for menu in ['mainDeformMenu', 'mainRigDeformationsMenu']:
        # Make sure the menu widgets exist first.
        mel.eval('ChaDeformationsMenu MayaWindow|{0};'.format(menu))
        items = cmds.menu(menu, q=True, ia=True)
        for item in items:
            if cmds.menuItem(item, q=True, divider=True):
                section = cmds.menuItem(item, q=True, label=True)
            menu_label = cmds.menuItem(item, q=True, label=True)
            if menu_label == 'Wrap':
                if section == 'Create':
                    cvwrap_item = cmds.menuItem(label="cvWrap", command=create_cvwrap,
                                                sourceType='python', insertAfter=item, parent=menu)
                    cvwrap_options = cmds.menuItem(command=display_cvwrap_options,
                                                   insertAfter=cvwrap_item, parent=menu,
                                                   optionBox=True)
                    MENU_ITEMS.append(cvwrap_item)
                    MENU_ITEMS.append(cvwrap_options)
                elif section == 'Edit':
                    submenu = cmds.menuItem(label="cvWrap", subMenu=True, insertAfter=item,
                                            parent=menu)
                    MENU_ITEMS.append(submenu)
                    item = cmds.menuItem(label="Edit Binding", command=edit_binding,
                                         sourceType='python', parent=submenu)
                    MENU_ITEMS.append(item)
                    item = cmds.menuItem(label="Import Binding", command=import_binding,
                                         sourceType='python', parent=submenu)
                    MENU_ITEMS.append(item)
                    item = cmds.menuItem(label="Export Binding", command=export_binding,
                                         sourceType='python', parent=submenu)
                    MENU_ITEMS.append(item)
            elif menu_label == 'Cluster' and section == 'Paint Weights':
                    item = cmds.menuItem(label="cvWrap", command=paint_cvwrap_weights,
                                         sourceType='python', insertAfter=item, parent=menu)
                    MENU_ITEMS.append(item)


def create_cvwrap(*args, **kwargs):
    cmds.loadPlugin('cvwrap', qt=True)
    sel = cmds.ls(sl=True)
    if len(sel) >= 2:
        kwargs = get_create_command_kwargs()
        result = cmds.cvWrap(**kwargs)
        print(result)
    else:
        raise RuntimeError("Select at least one surface and one influence object.")


def get_create_command_kwargs():
    """Gets the cvWrap command arguments either from the option box widgets or the saved
    option vars.  If the widgets exist, their values will be saved to the option vars.
    @return A dictionary of the kwargs to the cvWrap command."""
    args = {}
    if cmds.textFieldGrp(NAME_WIDGET, exists=True):
        args['name'] = cmds.textFieldGrp(NAME_WIDGET, q=True, text=True)
        cmds.optionVar(sv=(NAME_WIDGET, args['name']))
    else:
        args['name'] = cmds.optionVar(q=NAME_WIDGET) or 'cvWrap#'
    if cmds.floatSliderGrp(RADIUS_WIDGET, exists=True):
        args['radius'] = cmds.floatSliderGrp(RADIUS_WIDGET, q=True, value=True)
        cmds.optionVar(fv=(RADIUS_WIDGET, args['radius']))
    else:
        args['radius'] = cmds.optionVar(q=RADIUS_WIDGET)

    if cmds.checkBoxGrp(NEW_BIND_MESH_WIDGET, exists=True):
        if cmds.checkBoxGrp(NEW_BIND_MESH_WIDGET, q=True, v1=True):
            args['newBindMesh'] = True
            cmds.optionVar(iv=(NEW_BIND_MESH_WIDGET, 1))
        else:
            cmds.optionVar(iv=(NEW_BIND_MESH_WIDGET, 0))
    else:
        value = cmds.optionVar(q=NEW_BIND_MESH_WIDGET)
        if value:
            args['newBindMesh'] = True

    if cmds.textFieldButtonGrp(BIND_FILE_WIDGET, exists=True):
        bind_file = cmds.textFieldButtonGrp(BIND_FILE_WIDGET, q=True, text=True)
        bind_file = os.path.expandvars(bind_file.strip())
        if bind_file:
            if os.path.exists(bind_file):
                args['binding'] = bind_file
            else:
                cmds.warning('{0} does not exist.'.format(bind_file))

    return args


def display_cvwrap_options(*args, **kwargs):
    cmds.loadPlugin('cvwrap', qt=True)
    layout = mel.eval('getOptionBox')
    cmds.setParent(layout)
    cmds.columnLayout(adj=True)

    for widget in [NAME_WIDGET, RADIUS_WIDGET, BIND_FILE_WIDGET, NEW_BIND_MESH_WIDGET]:
        # Delete the widgets so we don't create multiple controls with the same name
        try:
            cmds.deleteUI(widget, control=True)
        except:
            pass

    cmds.textFieldGrp(NAME_WIDGET, label='Node name', text='cvWrap#')
    radius = cmds.optionVar(q=RADIUS_WIDGET)
    cmds.floatSliderGrp(RADIUS_WIDGET, label='Sample radius', field=True, minValue=0.0,
                        maxValue=100.0, fieldMinValue=0.0, fieldMaxValue=100.0, value=radius,
                        step=0.01, precision=2)
    cmds.textFieldButtonGrp(BIND_FILE_WIDGET, label='Binding file ', text='', buttonLabel='Browse',
                            bc=display_bind_file_dialog)
    use_new_bind_mesh = cmds.optionVar(q=NEW_BIND_MESH_WIDGET)
    cmds.checkBoxGrp(NEW_BIND_MESH_WIDGET, numberOfCheckBoxes=1, label='Create new bind mesh',
                     v1=use_new_bind_mesh)
    mel.eval('setOptionBoxTitle("cvWrap Options");')
    mel.eval('setOptionBoxCommandName("cvWrap");')
    apply_close_button = mel.eval('getOptionBoxApplyAndCloseBtn;')
    cmds.button(apply_close_button, e=True, command=apply_and_close)
    apply_button = mel.eval('getOptionBoxApplyBtn;')
    cmds.button(apply_button, e=True, command=create_cvwrap)
    reset_button = mel.eval('getOptionBoxResetBtn;')
    # For some reason, the buttons in the menu only accept MEL.
    cmds.button(reset_button, e=True,
                command='python("import cvwrap.menu; cvwrap.menu.reset_to_defaults()");')
    close_button = mel.eval('getOptionBoxCloseBtn;')
    cmds.button(close_button, e=True, command=close_option_box)
    save_button = mel.eval('getOptionBoxSaveBtn;')
    cmds.button(save_button, e=True,
                command='python("import cvwrap.menu; cvwrap.menu.get_create_command_kwargs()");')
    mel.eval('showOptionBox')


def apply_and_close(*args, **kwargs):
    """Create the cvWrap deformer and close the option box."""
    create_cvwrap()
    mel.eval('saveOptionBoxSize')
    close_option_box()


def close_option_box(*args, **kwargs):
    mel.eval('hideOptionBox')


def display_bind_file_dialog(*args, **kwargs):
    """Displays the dialog to choose the binding file with which to create the cvWrap deformer."""
    root_dir = cmds.workspace(q=True, rootDirectory=True)
    start_directory = os.path.join(root_dir, 'data')
    file_path = cmds.fileDialog2(fileFilter='*.wrap', dialogStyle=2, fileMode=1,
                                 startingDirectory=start_directory)
    if file_path:
        cmds.textFieldButtonGrp(BIND_FILE_WIDGET, e=True, text=file_path[0])


def reset_to_defaults(*args, **kwargs):
    """Reset the cvWrap option box widgets to their defaults."""
    cmds.textFieldGrp(NAME_WIDGET, e=True, text='cvWrap#')
    cmds.floatSliderGrp(RADIUS_WIDGET, e=True, value=0)
    cmds.textFieldButtonGrp(BIND_FILE_WIDGET, e=True, text='')
    cmds.checkBoxGrp(NEW_BIND_MESH_WIDGET, e=True, v1=False)


def edit_binding(*args, **kwargs):
    cvwrap.bindui.show()


def export_binding(*args, **kwargs):
    """Export a wrap binding from the selected wrap node or mesh."""
    cmds.loadPlugin('cvwrap', qt=True)
    wrap_node = get_wrap_node_from_selected()
    if wrap_node:
        data_dir = os.path.join(cmds.workspace(q=True, rd=True), 'data')
        file_path = cmds.fileDialog2(fileFilter='*.wrap', dialogStyle=2, cap='Export Binding',
                                     startingDirectory=data_dir, fm=0)
        if file_path:
            cmds.cvWrap(wrap_node, ex=file_path[0])


def import_binding(*args, **kwargs):
    """Import a wrap binding onto the selected wrap node or mesh."""
    cmds.loadPlugin('cvwrap', qt=True)
    wrap_node = get_wrap_node_from_selected()
    if wrap_node:
        data_dir = os.path.join(cmds.workspace(q=True, rd=True), 'data')
        file_path = cmds.fileDialog2(fileFilter='*.wrap', dialogStyle=2, cap='Import Binding',
                                     startingDirectory=data_dir, fm=1)
        if file_path:
            cmds.cvWrap(wrap_node, im=file_path[0])


def get_wrap_node_from_selected():
    """Get a wrap node from the selected geometry."""
    sel = cmds.ls(sl=True) or []
    if not sel:
        raise RuntimeError('No cvWrap found on selected.')
    if cmds.nodeType(sel[0]) == 'cvWrap':
        return sel[0]
    history = cmds.listHistory(sel[0], pdo=0) or []
    wrap_nodes = [node for node in history if cmds.nodeType(node) == 'cvWrap']
    if not wrap_nodes:
        raise RuntimeError('No cvWrap node found on {0}.'.format(sel[0]))
    if len(wrap_nodes) == 1:
        return wrap_nodes[0]
    else:
        # Multiple wrap nodes are deforming the mesh.  Let the user choose which one
        # to use.
        return QtGui.QInputDialog.getItem(None, 'Select cvWrap node', 'cvWrap node:', wrap_nodes)


def destroy_menuitems():
    """Remove the cvWrap items from the menus."""
    global MENU_ITEMS
    for item in MENU_ITEMS:
        cmds.deleteUI(item, menuItem=True)
    MENU_ITEMS = []


def paint_cvwrap_weights(*args, **kwargs):
    """Activates the paint cvWrap weights context."""
    sel = cmds.ls(sl=True)
    if sel:
        wrap_node = get_wrap_node_from_selected()
        if wrap_node:
            mel.eval('artSetToolAndSelectAttr("artAttrCtx", "cvWrap.{0}.weights");'.format(
                     wrap_node))


================================================
FILE: src/CMakeLists.txt
================================================
set(SOURCE_FILES
    "pluginMain.cpp"
    "cvWrapCmd.cpp"
    "cvWrapCmd.h"
    "cvWrapDeformer.cpp"
    "cvWrapDeformer.h"
    "bindingio.cpp"
    "bindingio.h"
    "common.cpp"
    "common.h"
    "cvwrap.cl"
)

if (WIN32)
    set(COMPILE_FLAGS "/arch:AVX")
else()
    set(COMPILE_FLAGS "-mavx")
endif()

find_package(Maya REQUIRED)

add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES})
target_link_libraries(${PROJECT_NAME} PRIVATE Maya::Maya)
target_include_directories(${PROJECT_NAME} PRIVATE Maya::Maya)
target_compile_options(${PROJECT_NAME} PRIVATE ${COMPILE_FLAGS})
MAYA_PLUGIN(${PROJECT_NAME})

install(TARGETS ${PROJECT_NAME} ${MAYA_TARGET_TYPE} DESTINATION plug-ins)
install(FILES "cvwrap.cl" DESTINATION plug-ins)



================================================
FILE: src/bindingio.cpp
================================================
#include "bindingio.h"
#include "cvWrapDeformer.h"

#include <maya/MGlobal.h>
#include <maya/MObjectArray.h>
#include <maya/MFnDoubleArrayData.h>
#include <maya/MFnIntArrayData.h>
#include <maya/MFnMatrixData.h>
#include <maya/MFnWeightGeometryFilter.h>

const float BindingIO::kWrapFileVersion = 1.0f;

template <>
void WriteAttribute<double, MMatrix>(std::ofstream &out, const MMatrix& attribute) {
  double values[16];
  for (int i = 0; i < 4; i++) {
    for (int j = 0; j < 4; j++) {
      values[i*4 + j] = attribute[i][j];
    }
  }
  out.write((char *)values, 16 * sizeof(double));
}

template <>
void ReadAttribute<double, MMatrix>(std::ifstream &in, MMatrix &matrix) {
  double values[16];
  in.read((char *)values, 16 * sizeof(double));
  for (int i = 0; i < 4; i++) {
    for (int j = 0; j < 4; j++) {
      matrix[i][j] = values[(i * 4) + j];
    }
  }
}

MStatus BindingIO::ExportBinding(std::ofstream& out, MObject& oWrapNode) {
  MStatus status;
  MFnWeightGeometryFilter fnWrapNode(oWrapNode, &status);
  CHECK_MSTATUS_AND_RETURN_IT(status);

  if (fnWrapNode.typeId() != CVWrap::id) {
    MGlobal::displayError(fnWrapNode.name() + " is not a cvWrap node.");
    CHECK_MSTATUS_AND_RETURN_IT(MS::kFailure);
  }

  out.write((char *)&kWrapFileVersion, sizeof(float));

  MPlug plugBindData = fnWrapNode.findPlug(CVWrap::aBindData, false, &status);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  // Get the input geometry so we can get the geometry indices
  MObjectArray outputGeometry;
  status = fnWrapNode.getOutputGeometry(outputGeometry);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  // Write the number of geometry
  unsigned int geometryCount = outputGeometry.length();
  out.write((char *)(&geometryCount), sizeof(geometryCount));

  MIntArray triangleVerts(3);  /**< Storage for the triangle vertex ids. */
  MFloatArray baryCoords(3);  /**< Storage for the barycentric weights. */
  for (unsigned int i = 0; i < outputGeometry.length(); ++i) {
    unsigned int geomIndex = fnWrapNode.indexForOutputShape(outputGeometry[i], &status);
    // Get the plugs to the binding attributes for this geometry
    MPlug plugBind = plugBindData.elementByLogicalIndex(geomIndex, &status);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    MPlug plugSampleWeights = plugBind.child(CVWrap::aSampleWeights, &status);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    MPlug plugSampleVerts = plugBind.child(CVWrap::aSampleComponents, &status);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    MPlug plugSampleBindMatrix = plugBind.child(CVWrap::aBindMatrix, &status);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    MPlug plugTriangleVerts = plugBind.child(CVWrap::aTriangleVerts, &status);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    MPlug plugBarycentricWeights = plugBind.child(CVWrap::aBarycentricWeights, &status);
    CHECK_MSTATUS_AND_RETURN_IT(status);

    unsigned int numElements = plugSampleWeights.numElements();
    out.write((char *)(&numElements), sizeof(numElements));
  
    for (unsigned int j = 0; j < numElements; ++j) {
      // Write the logical index
      MPlug plugSampleVertElement = plugSampleVerts.elementByPhysicalIndex(j, &status);
      CHECK_MSTATUS_AND_RETURN_IT(status);
      unsigned int logicalIndex = plugSampleVertElement.logicalIndex();
      out.write((char *)(&logicalIndex), sizeof(logicalIndex));

      // Export sample vertex ids
      MObject oSampleIds = plugSampleVertElement.asMObject();
      CHECK_MSTATUS_AND_RETURN_IT(status);
      MFnIntArrayData fnIntData(oSampleIds, &status);
      CHECK_MSTATUS_AND_RETURN_IT(status);
      MIntArray sampleIds = fnIntData.array();
      WriteAttribute<int, MIntArray>(out, sampleIds);

      // Export sample weights
      MObject oWeightData = plugSampleWeights.elementByPhysicalIndex(j, &status).asMObject();
      CHECK_MSTATUS_AND_RETURN_IT(status);
      MFnDoubleArrayData fnDoubleData(oWeightData);
      MDoubleArray weights = fnDoubleData.array();
      WriteAttribute<double, MDoubleArray>(out, weights);

      // Export bind matrix
      MObject oBindMatrix = plugSampleBindMatrix.elementByPhysicalIndex(j, &status).asMObject();
      CHECK_MSTATUS_AND_RETURN_IT(status);
      MFnMatrixData fnMatrixData(oBindMatrix, &status);
      CHECK_MSTATUS_AND_RETURN_IT(status);
      WriteAttribute<double, MMatrix>(out, fnMatrixData.matrix());

      // Export triangle vertices
      MObject oTriangleVerts = plugTriangleVerts.elementByPhysicalIndex(j, &status).asMObject();
      CHECK_MSTATUS_AND_RETURN_IT(status);
      MFnNumericData fnNumericData(oTriangleVerts, &status);
      CHECK_MSTATUS_AND_RETURN_IT(status);
      fnNumericData.getData3Int(triangleVerts[0], triangleVerts[1], triangleVerts[2]);
      WriteAttribute<int, MIntArray>(out, triangleVerts);

      // Export the barycentric weights
      MObject oBaryWeights = plugBarycentricWeights.elementByPhysicalIndex(j, &status).asMObject();
      CHECK_MSTATUS_AND_RETURN_IT(status);
      MFnNumericData fnBaryData(oBaryWeights, &status);
      CHECK_MSTATUS_AND_RETURN_IT(status);
      fnBaryData.getData3Float(baryCoords[0], baryCoords[1], baryCoords[2]);
      WriteAttribute<float, MFloatArray>(out, baryCoords);
    }
  }

  MGlobal::displayInfo("Wrap binding exported.");

  return status;
}



MStatus BindingIO::ImportBinding(std::ifstream& in, MObject& oWrapNode) {
  MStatus status;

  MFnWeightGeometryFilter fnWrapNode(oWrapNode, &status);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  MPlug plugBindData = fnWrapNode.findPlug(CVWrap::aBindData, false, &status);
  CHECK_MSTATUS_AND_RETURN_IT(status);

  float version;
  in.read((char *)(&version), sizeof(float));

  unsigned int geometryCount = 0;
  in.read((char *)(&geometryCount), sizeof(geometryCount));

  MFnMatrixData fnMatrixData;
  MFnIntArrayData fnIntData;
  MFnDoubleArrayData fnDoubleData;
  MFnNumericData fnNumericData;
  // We are assuming that the geometryIndices are compact and continuous.  It is possible
  // that the indices could be sparse, but we will ignore that corner case.
  for (unsigned int geomIndex = 0; geomIndex < geometryCount; ++geomIndex) {
    // Get the plugs to the binding attributes for this geometry
    MPlug plugBind = plugBindData.elementByLogicalIndex(geomIndex, &status);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    MPlug plugSampleWeights = plugBind.child(CVWrap::aSampleWeights, &status);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    MPlug plugSampleVerts = plugBind.child(CVWrap::aSampleComponents, &status);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    MPlug plugSampleBindMatrix = plugBind.child(CVWrap::aBindMatrix, &status);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    MPlug plugTriangleVerts = plugBind.child(CVWrap::aTriangleVerts, &status);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    MPlug plugBarycentricWeights = plugBind.child(CVWrap::aBarycentricWeights, &status);
    CHECK_MSTATUS_AND_RETURN_IT(status);

    unsigned int numElements = plugSampleWeights.numElements();
    in.read((char *)(&numElements), sizeof(numElements));
    for (unsigned int i = 0; i < numElements; ++i) {
      unsigned int logicalIndex = 0;
      in.read((char *)(&logicalIndex), sizeof(logicalIndex));

      // Sample vert ids.
      MIntArray sampleIds;
      ReadAttribute<int, MIntArray>(in, sampleIds);
      MObject oIntData = fnIntData.create(sampleIds, &status);
      CHECK_MSTATUS_AND_RETURN_IT(status);
      plugSampleVerts.elementByLogicalIndex(logicalIndex, &status).setMObject(oIntData);
      CHECK_MSTATUS_AND_RETURN_IT(status);

      // Sample weights
      MDoubleArray weights;
      ReadAttribute<double, MDoubleArray>(in, weights);
      MObject oDoubleData = fnDoubleData.create(weights, &status);
      CHECK_MSTATUS_AND_RETURN_IT(status);
      plugSampleWeights.elementByLogicalIndex(logicalIndex, &status).setMObject(oDoubleData);
      CHECK_MSTATUS_AND_RETURN_IT(status);

      // Bind matrix
      MMatrix bindMatrix;
      ReadAttribute<double, MMatrix>(in, bindMatrix);
      MObject oMatrixData = fnMatrixData.create(bindMatrix, &status);
      CHECK_MSTATUS_AND_RETURN_IT(status);
      plugSampleBindMatrix.elementByLogicalIndex(logicalIndex, &status).setMObject(oMatrixData);
      CHECK_MSTATUS_AND_RETURN_IT(status);

      // Triangle vertices
      MIntArray triangleVertices;
      ReadAttribute<int, MIntArray>(in, triangleVertices);
      MObject oNumericData = fnNumericData.create(MFnNumericData::k3Int, &status);
      CHECK_MSTATUS_AND_RETURN_IT(status);
      status = fnNumericData.setData3Int(triangleVertices[0], triangleVertices[1],
                                         triangleVertices[2]);
      CHECK_MSTATUS_AND_RETURN_IT(status);
      plugTriangleVerts.elementByLogicalIndex(logicalIndex, &status).setMObject(oNumericData);
      CHECK_MSTATUS_AND_RETURN_IT(status);

      // Barycentric coordinates
      MFloatArray coords;
      ReadAttribute<float, MFloatArray>(in, coords);
      oNumericData = fnNumericData.create(MFnNumericData::k3Float, &status);
      CHECK_MSTATUS_AND_RETURN_IT(status);
      status = fnNumericData.setData3Float(coords[0], coords[1], coords[2]);
      CHECK_MSTATUS_AND_RETURN_IT(status);
      plugBarycentricWeights.elementByLogicalIndex(logicalIndex, &status).setMObject(oNumericData);
      CHECK_MSTATUS_AND_RETURN_IT(status);
    }
  }
  MGlobal::displayInfo("Wrap binding imported.");

  return MS::kSuccess;
}


================================================
FILE: src/bindingio.h
================================================
#ifndef CVWRAP_BindingIO_H
#define CVWRAP_BindingIO_H

#include <maya/MMatrix.h>
#include <maya/MObject.h>
#include <maya/MString.h>
#include <fstream>

/**
  The BindingIO is used to import and export binding information from a wrap node.
*/
class BindingIO {
 public:
  /**
    Exports the binding information to disk.
  */
  MStatus ExportBinding(std::ofstream& out, MObject& oWrapNode);

  /**
    Imports the binding information from disk.
  */
  MStatus ImportBinding(std::ifstream& in, MObject& oWrapNode);

  const static float kWrapFileVersion;
};

/**
  Convenience function to write a Maya array type to a binary stream.
  @param[in] out Output stream.
  @param[in] attribute A Maya array type.
*/
template <typename dataType, typename container>
void WriteAttribute(std::ofstream &out, const container& attribute) {
  unsigned int length = attribute.length();
  out.write((char *)(&length), sizeof(length));
  if (length > 0) {
    dataType * pAttr = new dataType[length];
    attribute.get(pAttr);
    out.write((char *)pAttr, length * sizeof(dataType));
    delete [] pAttr;
  }
}

/**
  Template specialization for MMatrix because they need to be read differently from
  normal array types.
*/
template <>
void WriteAttribute<double, MMatrix>(std::ofstream &out, const MMatrix& attribute);

/**
  Convenience function to read a Maya array type from a binary stream.
  @param[in] in Input stream.
  @param[out] attribute A Maya array type.
*/
template <typename dataType, typename container>
void ReadAttribute(std::ifstream &in, container &attribute) {
  attribute.clear();
  unsigned int length;
  in.read((char *)(&length), sizeof(length));
  if (length > 0) {
    attribute.setLength(length);
    dataType* pValues = new dataType[length];
    in.read((char *)pValues, length * sizeof(dataType));
    for (unsigned int i = 0; i < length; i++) {
      attribute[i] = pValues[i];
    }
    delete [] pValues;
  }
}

template <>
void ReadAttribute<double, MMatrix>(std::ifstream &in, MMatrix &matrix);


#endif

================================================
FILE: src/common.cpp
================================================
#include "common.h"

#include <maya/MGlobal.h>
#include <maya/MFnDagNode.h>
#include <maya/MFnMesh.h>
#include <maya/MItMeshVertex.h>
#include <maya/MSelectionList.h>
#include <algorithm>
#include <cassert>
#include <complex>
#include <set>
#include <queue>
#include <utility>

#define NORMALIZATION_INDEX -1

void StartProgress(const MString& title, unsigned int count) {
  if (MGlobal::mayaState() == MGlobal::kInteractive) {
    MString message = "progressBar -e -bp -ii true -st \"";
    message += title;
    message += "\" -max ";
    message += count;
    message += " $gMainProgressBar;";
    MGlobal::executeCommand(message);
  }
}


void StepProgress(int step) {
  if (MGlobal::mayaState() == MGlobal::kInteractive) {
    MString message = "progressBar -e -s ";
    message += step;
    message += " $gMainProgressBar;";
    MGlobal::executeCommand(message);
  }
}


bool ProgressCancelled() {
  if (MGlobal::mayaState() == MGlobal::kInteractive) {
    int cmdResult = 0;
    MGlobal::executeCommand("progressBar -query -isCancelled $gMainProgressBar", cmdResult);
    return cmdResult != 0;
  }
  return false;
}


void EndProgress() {
  if (MGlobal::mayaState() == MGlobal::kInteractive) {
    MGlobal::executeCommand("progressBar -e -ep $gMainProgressBar;");
  }
}


bool IsShapeNode(MDagPath& path) {
  return path.node().hasFn(MFn::kMesh) ||
         path.node().hasFn(MFn::kNurbsCurve) ||
         path.node().hasFn(MFn::kNurbsSurface);
}


MStatus GetShapeNode(MDagPath& path, bool intermediate) {
  MStatus status;

  if (IsShapeNode(path)) {
    // Start at the transform so we can honor the intermediate flag.
    path.pop();
  }

  if (path.hasFn(MFn::kTransform)) {
    unsigned int shapeCount = path.childCount();

    for (unsigned int i = 0; i < shapeCount; ++i) {
      status = path.push(path.child(i));
      CHECK_MSTATUS_AND_RETURN_IT(status);
      if (!IsShapeNode(path)) {
        path.pop();
        continue;
      }

      MFnDagNode fnNode(path, &status);
      CHECK_MSTATUS_AND_RETURN_IT(status);
      if ((!fnNode.isIntermediateObject() && !intermediate) ||
          (fnNode.isIntermediateObject() && intermediate)) {
        return MS::kSuccess;
      }
      // Go to the next shape
      path.pop();
    }
  }

  // No valid shape node found.
  return MS::kFailure;
}


MStatus GetDagPath(MString& name, MDagPath& path) {
  MStatus status;
  MSelectionList list;
  status = MGlobal::getSelectionListByName(name, list);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  status = list.getDagPath(0, path);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  return MS::kSuccess;
}

MStatus DeleteIntermediateObjects(MDagPath& path) {
  MStatus status;
  MDagPath pathMesh(path);
  while (GetShapeNode(pathMesh, true) == MS::kSuccess) {
    status = MGlobal::executeCommand("delete " + pathMesh.partialPathName());
    CHECK_MSTATUS_AND_RETURN_IT(status);
    pathMesh = MDagPath(path);
  }
  return MS::kSuccess;
}

void GetBarycentricCoordinates(const MPoint& P, const MPoint& A, const MPoint& B, const MPoint& C,
                               BaryCoords& coords) {
  // Compute the normal of the triangle
  MVector N = (B - A) ^ (C - A);
  MVector unitN = N.normal();

  // Compute twice area of triangle ABC
  double areaABC = unitN * N;

  if (areaABC == 0.0) {
    // If the triangle is degenerate, just use one of the points.
    coords[0] = 1.0f;
    coords[1] = 0.0f;
    coords[2] = 0.0f;
    return;
  }

  // Compute a
  double areaPBC = unitN * ((B - P) ^ (C - P));
  coords[0] = (float)(areaPBC / areaABC);

  // Compute b
  double areaPCA = unitN * ((C - P) ^ (A - P));
  coords[1] = (float)(areaPCA / areaABC);

  // Compute c
  coords[2] = 1.0f - coords[0] - coords[1];
}


MStatus GetAdjacency(MDagPath& pathMesh, std::vector<std::set<int> >& adjacency) {
  MStatus status;
  // Get mesh adjacency.  The adjacency will be all vertex ids on the connected faces.
  MItMeshVertex itVert(pathMesh, MObject::kNullObj, &status);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  MFnMesh fnMesh(pathMesh, &status);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  adjacency.resize(itVert.count());
  for (; !itVert.isDone(); itVert.next()) {
    MIntArray faces;
    status = itVert.getConnectedFaces(faces);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    adjacency[itVert.index()].clear();
    // Put the vertex ids in a set to avoid duplicates
    for (unsigned int j = 0; j < faces.length(); ++j) {
      MIntArray vertices;
      fnMesh.getPolygonVertices(faces[j], vertices);
      for (unsigned int k = 0; k < vertices.length(); ++k) {
        if (vertices[k] != itVert.index()) {
          adjacency[itVert.index()].insert(vertices[k]);
        }
      }
    }
  }
  return MS::kSuccess;
}


/**
  Used in the CrawlSurface function to keep track of where we are crawling.
*/
struct CrawlData {
  MPoint sourcePosition;  /**< Where the crawl iteration came from. */
  double crawlDistance;  /**< How far this crawl iteration has traveled. */
  int nextVertex;  /**< Where this crawl iteration should go next. */
};


MStatus CrawlSurface(const MPoint& startPoint, const MIntArray& vertexIndices, MPointArray& points, double maxDistance,
                     std::vector<std::set<int> >& adjacency, std::map<int, double>& distances) {
  MStatus status;
  distances[NORMALIZATION_INDEX] = 0.0; // -1 will represent our hit point.
  double minStartDistance = 999999.0;
  unsigned int minStartIndex = 0;

  // Instead of a recursive function, which can get pretty slow, we'll use a queue to keep
  // track of where we are going and where we are coming from.
  std::queue<CrawlData> verticesToVisit;
  // Add the initial crawl paths to the queue.
  for (unsigned int i = 0; i < vertexIndices.length(); ++i) {
    double distance = startPoint.distanceTo(points[vertexIndices[i]]);
    // Only crawl to the starting vertices if they are within the radius.
    if (distance <= maxDistance) {
      CrawlData root = {startPoint, distance, vertexIndices[i]};
      verticesToVisit.push(root);
    }
    // Track the minimum start distance in case we need to add the closest vertex below.
    // The minimum must be greater than 0 to make sure we do not use the vertex that is the
    // same as the startPoint which would create an invalid up vector.
    if (distance < minStartDistance && distance > 0.000001) {
      minStartDistance = distance;
      minStartIndex = vertexIndices[i];
    }
  }
  // If we didn't even reach a vertex in the hit face, or the startPoint is equal to a vertex
  // on the face, add the closest vertex so we can calculate a proper up vector
  if (verticesToVisit.size() <= 1) {
    CrawlData root = {startPoint, maxDistance - 0.001, (int)minStartIndex};
    verticesToVisit.push(root);
    distances[minStartIndex] = maxDistance - 0.001;
  }
  while (verticesToVisit.size()) {
    CrawlData next = verticesToVisit.front();
    verticesToVisit.pop();

    // Extract the data out of the crawl struct
    int idx = next.nextVertex;
    MPoint& pt = points[idx];
    MPoint sourcePoint = next.sourcePosition;
    double currentCrawlDistance = next.crawlDistance;

    currentCrawlDistance += sourcePoint.distanceTo(pt);
    if (currentCrawlDistance >= maxDistance) {
      // If this vertex is outside the radius, no need to crawl anymore from that vertex.
      continue;
    }
    double& savedDistance = distances[idx];
    if (currentCrawlDistance <= savedDistance || savedDistance == 0.0) {
      // If this current crawl distance is less then the distance we have saved for this
      // vertex, use this new crawl distance instead.
      savedDistance = currentCrawlDistance;
    } else {
      // A smaller distance is already stored so we don't want to crawl
      // from this vertex any further.
      continue;
    }
    // Crawl the adjacent vertices
    std::set<int>::iterator iter;
    for (iter = adjacency[idx].begin(); iter != adjacency[idx].end(); ++iter) {
      CrawlData data = {pt, currentCrawlDistance, *iter};
      verticesToVisit.push(data);
    }
  }
  assert(distances.size() > 0);
  
  return MS::kSuccess;
}

bool SampleSort(std::pair<int, double> lhs, std::pair<int, double> rhs) {
  // Ensure that the normalization sample comes last.
  return (lhs.second < rhs.second) || rhs.first == NORMALIZATION_INDEX; 
}

void CalculateSampleWeights(const std::map<int, double>& distances, double radius,
                            MIntArray& vertexIds, MDoubleArray& weights) {
  
  std::map<int, double>::const_iterator itDistance;
  std::vector<std::pair<int, double> > samples;
  for (itDistance = distances.begin();
        itDistance != distances.end();
        itDistance++) {
    double x = itDistance->second;
    double w = 1.0 - (x/radius);
    samples.push_back(std::pair<int, double>(itDistance->first, w));
  }

  // Make the samples a multiple of 4 so we can use fast intrinsics!
  int remainder = 4 - ((samples.size()-1) % 4);
  if (remainder != 4) {
    for (int i = 0; i < remainder; ++i) {
      samples.push_back(std::pair<int, double>(0, 0.0));
    }
  }

  unsigned int length = (unsigned int)samples.size();
  weights.setLength(length);
  vertexIds.setLength(length);
  std::sort(samples.begin(), samples.end(), SampleSort);
  std::vector<std::pair<int, double> >::iterator iter;
  int ii = 0;
  double sum = 0.0;
  for (iter = samples.begin(); iter != samples.end(); ++iter, ++ii) {
    vertexIds[ii] = (*iter).first;
    weights[ii] = (*iter).second;
    sum += (*iter).second;
  }
  assert(sum > 0.0);
  // Normalize the weights
  for (unsigned int i = 0; i < weights.length(); ++i) {
    weights[i] /= sum;
  }
}


void CreateMatrix(const MPoint& origin, const MVector& normal, const MVector& up,
                  MMatrix& matrix) {
  const MPoint& t = origin;
  const MVector& y = normal;
  MVector x = y ^ up;
  MVector z = x ^ y;
  // Renormalize vectors
  x.normalize();
  z.normalize();
  matrix[0][0] = x.x; matrix[0][1] = x.y; matrix[0][2] = x.z; matrix[0][3] = 0.0;
  matrix[1][0] = y.x; matrix[1][1] = y.y; matrix[1][2] = y.z; matrix[1][3] = 0.0;
  matrix[2][0] = z.x; matrix[2][1] = z.y; matrix[2][2] = z.z; matrix[2][3] = 0.0;
  matrix[3][0] = t.x; matrix[3][1] = t.y; matrix[3][2] = t.z; matrix[3][3] = 1.0;
}


void CalculateBasisComponents(const MDoubleArray& weights, const BaryCoords& coords,
                              const MIntArray& triangleVertices, const MPointArray& points,
                              const MFloatVectorArray& normals, const MIntArray& sampleIds,
                              double* alignedStorage,
                              MPoint& origin, MVector& up, MVector& normal) {
  // Start with the recreated point and normal using the barycentric coordinates of the hit point.
  unsigned int hitIndex = weights.length()-1;
#ifdef __AVX__
  __m256d originV = Dot4<MPoint>(coords[0], coords[1], coords[2], 0.0,
                                points[triangleVertices[0]], points[triangleVertices[1]],
                                points[triangleVertices[2]], MPoint::origin);
  __m256d hitNormalV = Dot4<MVector>(coords[0], coords[1], coords[2], 0.0,
                                normals[triangleVertices[0]], normals[triangleVertices[1]],
                                normals[triangleVertices[2]], MVector::zero);
  __m256d hitWeightV = _mm256_set1_pd(weights[hitIndex]);
  // Create the barycentric point and normal.
  __m256d normalV = _mm256_mul_pd(hitNormalV, hitWeightV);
  // Then use the weighted adjacent data.
  for (unsigned int j = 0; j < hitIndex; j += 4) {
    __m256d tempNormal = Dot4<MVector>(weights[j], weights[j+1], weights[j+2], weights[j+3],
                                       normals[sampleIds[j]], normals[sampleIds[j+1]],
                                       normals[sampleIds[j+2]], normals[sampleIds[j+3]]);
    normalV = _mm256_add_pd(tempNormal, normalV);
  }

  _mm256_store_pd(alignedStorage, originV);
  origin.x = alignedStorage[0];
  origin.y = alignedStorage[1];
  origin.z = alignedStorage[2];
  _mm256_store_pd(alignedStorage, normalV);
  normal.x = alignedStorage[0];
  normal.y = alignedStorage[1];
  normal.z = alignedStorage[2];

  // Calculate the up vector
  const MPoint& pt1 = points[triangleVertices[0]];
  const MPoint& pt2 = points[triangleVertices[1]];
  __m256d p1 = _mm256_set_pd(pt1.w, pt1.z, pt1.y, pt1.x);
  __m256d p2 = _mm256_set_pd(pt2.w, pt2.z, pt2.y, pt2.x);
  p1 = _mm256_add_pd(p1, p2);
  __m256d half = _mm256_set_pd(0.5, 0.5, 0.5, 0.5);
  p1 = _mm256_mul_pd(p1, half);
  __m256d upV = _mm256_sub_pd(p1, originV);
  _mm256_store_pd(alignedStorage, upV);
  up.x = alignedStorage[0];
  up.y = alignedStorage[1];
  up.z = alignedStorage[2];
#else
  MVector hitNormal;
  // Create the barycentric point and normal.
  for (int i = 0; i < 3; ++i) {
    origin += points[triangleVertices[i]] * coords[i];
    hitNormal += MVector(normals[triangleVertices[i]]) * coords[i];
  }
  // Use crawl data to calculate normal
  normal = hitNormal * weights[hitIndex];
  for (unsigned int j = 0; j < hitIndex; j++) {
    normal += MVector(normals[sampleIds[j]]) * weights[j];
  }

  // Calculate the up vector
  // The triangle vertices are sorted by decreasing barycentric coordinates so the first two are
  // the two closest vertices in the triangle.
  up = ((points[triangleVertices[0]] + points[triangleVertices[1]]) * 0.5) - origin;
#endif
  normal.normalize();
  GetValidUp(weights, points, sampleIds, origin, normal, up);
}


void GetValidUp(const MDoubleArray& weights, const MPointArray& points,
                const MIntArray& sampleIds, const MPoint& origin, const MVector& normal,
                MVector& up) {
  MVector unitUp = up.normal();
  // Adjust up if it's parallel to normal or if it's zero length
  if (std::abs((unitUp * normal) - 1.0) < 0.001 || up.length() < 0.0001) {
    for (unsigned int j = 0; j < weights.length()-1; ++j) {
      up -= (points[sampleIds[j]] - origin) * weights[j];
      unitUp = up.normal();
      if (std::abs((unitUp * normal) - 1.0) > 0.001 && up.length() > 0.0001) {
        // If the up and normal vectors are no longer parallel and the up vector has a length,
        // then we are good to go.
        break;
      }
    }
    up.normalize();
  } else {
    up = unitUp;
  }
}


================================================
FILE: src/common.h
================================================
/**
  Contains various helper functions.
*/

#ifndef CVWRAP_COMMON_H
#define CVWRAP_COMMON_H

#include <maya/MDagPath.h>
#include <maya/MDoubleArray.h>
#include <maya/MFloatVectorArray.h>
#include <maya/MIntArray.h>
#include <maya/MMatrix.h>
#include <maya/MPoint.h>
#include <maya/MPointArray.h>
#include <maya/MString.h>
#include <map>
#include <vector>
#include <set>

#ifdef __AVX__
#include <xmmintrin.h>
#include <immintrin.h>
#endif

/**
  Helper function to start a new progress bar.
  @param[in] title Status title.
  @param[in] count Progress bar maximum count.
*/
void StartProgress(const MString& title, unsigned int count);


/**
  Helper function to increase the progress bar by the specified amount.
  @param[in] step Step amount.
*/
void StepProgress(int step);


/**
  Check if the progress has been cancelled.
  @return true if the progress has been cancelled.
*/
bool ProgressCancelled();


/**
  Ends any running progress bar.
*/
void EndProgress();


/**
  Checks if the path points to a shape node.
  @param[in] path A dag path.
  @return true if the path points to a shape node.
 */
bool IsShapeNode(MDagPath& path);


/**
  Ensures that the given dag path points to a non-intermediate shape node.
  @param[in,out] path Path to a dag node that could be a transform or a shape.
  On return, the path will be to a shape node if one exists.
  @param[in] intermediate true to get the intermediate shape.
  @return MStatus.
 */
MStatus GetShapeNode(MDagPath& path, bool intermediate=false);


/**
  Get the MDagPath of an object.
  @param[in] name Name of a dag node.
  @param[out] path Storage for the dag path.
 */
MStatus GetDagPath(MString& name, MDagPath& path);


/**
  Delete all intermediate shapes of the given dag path.
  @param[in] path MDagPath.
 */
MStatus DeleteIntermediateObjects(MDagPath& path);


/**
  Helper struct to hold the 3 barycentric coordinates.
*/
struct BaryCoords {
  float coords[3];
  float operator[](int index) const { return coords[index]; }
  float& operator[](int index) { return coords[index]; }
};


/**
  Get the barycentric coordinates of point P in the triangle specified by points A,B,C.
  @param[in] P The sample point.
  @param[in] A Triangle point.
  @param[in] B Triangle point.
  @param[in] C Triangle point.
  @param[out] coords Barycentric coordinates storage.
*/
void GetBarycentricCoordinates(const MPoint& P, const MPoint& A, const MPoint& B, const MPoint& C,
                               BaryCoords& coords);


/**
  Get the vertex adjacency of the specified mesh.  The vertex adjacency are the vertex ids
  of the connected faces of each vertex.
  @param[in] pathMesh Path to a mesh.
  @param[out] adjacency Ajdancency storage of the adjancency per vertex id.
 */
MStatus GetAdjacency(MDagPath& pathMesh, std::vector<std::set<int> >& adjacency);


/**
  Crawls the surface to find all the points within the sampleradius.
  @param[in] startPoint The position from which to start the crawl.
  @param[in] vertexIndices The starting vertex indices we want to crawl to.
  @param[in] points The array of all the mesh points.
  @param[in] maxDistance The maximum crawl distance.
  @param[in] adjacency Vertex adjacency data from the GetAdjacency function.
  @param[out] distances Storage for the distances to the crawled points.
  @return MStatus
 */
MStatus CrawlSurface(const MPoint& startPoint, const MIntArray& vertexIndices, MPointArray& points, double maxDistance,
                     std::vector<std::set<int> >& adjacency, std::map<int, double>& distances);


/**
  Calculates a weight for each vertex within the crawl sample radius.  Vertices that are further
  away from the origin should have a lesser effect than vertices closer to the origin.
  @param[in] distances Crawl distances calculated from CrawlSurface.
  @param[in] radius Sample radius.
  @param[out] vertexIds Storage for the vertex ids sampled during the crawl.
  @param[out] weights Storage for the calculated weights of each sampled vertex.
*/
void CalculateSampleWeights(const std::map<int, double>& distances, double radius,
                            MIntArray& vertexIds, MDoubleArray& weights);

 /**
   Creates an orthonormal basis using the given point and two axes.
   @param[in] origin Position.
   @param[in] normal Normal vector.
   @param[in] up Up vector.
   @param[out] matrix Generated matrix.
 */
void CreateMatrix(const MPoint& origin, const MVector& normal, const MVector& up,
                  MMatrix& matrix);

/**
  Calculates the components necessary to create a wrap basis matrix.
  @param[in] weights The sample weights array from the wrap binding.
  @param[in] coords The barycentric coordinates of the closest point.
  @param[in] triangleVertices The vertex ids forming the triangle of the closest point.
  @param[in] points The driver point array.
  @param[in] normals The driver per-vertex normal array.
  @param[in] sampleIds The vertex ids on the driver of the current sample.
  @param[in] alignedStorage double array that is 32 byte aligned for AVX.
  @param[out] origin The origin of the coordinate system.
  @param[out] up The up vector of the coordinate system.
  @param[out] normal The normal vector of the coordinate system.
*/
void CalculateBasisComponents(const MDoubleArray& weights, const BaryCoords& coords,
                              const MIntArray& triangleVertices, const MPointArray& points,
                              const MFloatVectorArray& normals, const MIntArray& sampleIds,
                              double* alignedStorage,
                              MPoint& origin, MVector& up, MVector& normal);

/**
  Ensures that the up and normal vectors are perpendicular to each other.
  @param[in] weights The sample weights array from the wrap binding.
  @param[in] points The driver point array.
  @param[in] sampleIds The vertex ids on the driver of the current sample.
  @param[in] origin The origin of the coordinate system.
  @param[in] up The up vector of the coordinate system.
  @param[out] normal The normal vector of the coordinate system.
*/
void GetValidUp(const MDoubleArray& weights, const MPointArray& points,
                const MIntArray& sampleIds, const MPoint& origin, const MVector& normal,
                MVector& up);


template <typename T>
struct ThreadData {
  unsigned int start;
  unsigned int end;
  unsigned int numTasks;
  double* alignedStorage;
  T* pData;

#ifdef __AVX__
  ThreadData() {
    alignedStorage = (double*) _mm_malloc(4*sizeof(double), 256);
  }
  ~ThreadData() {
    _mm_free(alignedStorage);
  }
#endif
};


/**
  Creates the data stuctures that will be sent to each thread.  Divides the vertices into
  discrete chunks to be evaluated in the threads.
  @param[in] taskCount The number of individual tasks we want to divide the calculation into.
  @param[in] elementCount The number of vertices or elements to be divided up.
  @param[in] taskData The TaskData or BindData object.
  @param[out] threadData The array of ThreadData objects.  It is assumed the array is of size taskCount.
*/
template <typename T>
void CreateThreadData(int taskCount, unsigned int elementCount, T* taskData, ThreadData<T>* threadData) {
  unsigned int taskLength = (elementCount + taskCount - 1) / taskCount;
  unsigned int start = 0;
  unsigned int end = taskLength;
  int lastTask = taskCount - 1;
  for(int i = 0; i < taskCount; i++) {
    if (i == lastTask) {
      end = elementCount;
    }
    threadData[i].start = start;
    threadData[i].end = end;
    threadData[i].numTasks = taskCount;
    threadData[i].pData = taskData;

    start += taskLength;
    end += taskLength;
  }
}

#ifdef __AVX__
/**
  Calculates 4 dot products at once.
  @param[in] w1 Weight vector x element.
  @param[in] w2 Weight vector y element.
  @param[in] w3 Weight vector z element.
  @param[in] w4 Weight vector w element.
  @param[in] p1 First vector.
  @param[in] p2 Second vector.
  @param[in] p3 Third vector.
  @param[in] p4 Fourth vector.
  @return A __m256d vector where each element is the corresponding p vector dot product with w.
 */
template <typename T>
__m256d Dot4(double w1, double w2, double w3, double w4,
             const T& p1, const T& p2, const T& p3, const T& p4) {
  __m256d xxx = _mm256_set_pd(p1.x, p2.x, p3.x, p4.x);
  __m256d yyy = _mm256_set_pd(p1.y, p2.y, p3.y, p4.y);
  __m256d zzz = _mm256_set_pd(p1.z, p2.z, p3.z, p4.z);
  __m256d www = _mm256_set_pd(w1, w2, w3, w4);
  __m256d xw = _mm256_mul_pd(xxx, www);
  __m256d yw = _mm256_mul_pd(yyy, www);
  __m256d zw = _mm256_mul_pd(zzz, www);
  __m256d ww = _mm256_mul_pd(www, www); // Dummy
  // low to high: xw0+xw1 yw0+yw1 xw2+xw3 yw2+yw3
  __m256d temp01 = _mm256_hadd_pd(xw, yw);   
  // low to high: zw0+zw1 ww0+ww1 zw2+zw3 ww2+ww3
  __m256d temp23 = _mm256_hadd_pd(zw, ww);
  // low to high: xw2+xw3 yw2+yw3 zw0+zw1 ww0+ww1
  __m256d swapped = _mm256_permute2f128_pd(temp01, temp23, 0x21);
  // low to high: xw0+xw1 yw0+yw1 zw2+zw3 ww2+ww3
  __m256d blended = _mm256_blend_pd(temp01, temp23, 0xC);
  // low to high: xw0+xw1+xw2+xw3 yw0+yw1+yw2+yw3 zw0+zw1+zw2+zw3 ww0+ww1+ww2+ww3
  __m256d dotproduct = _mm256_add_pd(swapped, blended);
  return dotproduct;
}
#endif

#endif


================================================
FILE: src/cvWrapCmd.cpp
================================================
#include "cvWrapCmd.h"
#include "cvWrapDeformer.h"
#include "bindingio.h"

#include <maya/MArgDatabase.h>
#include <maya/MFnDoubleArrayData.h>
#include <maya/MFnIntArrayData.h>
#include <maya/MFnMatrixData.h>
#include <maya/MFnMesh.h>
#include <maya/MGlobal.h>
#include <maya/MItDependencyGraph.h>
#include <maya/MItGeometry.h>
#include <maya/MItSelectionList.h>
#include <maya/MMeshIntersector.h>
#include <maya/MFnSingleIndexedComponent.h>
#include <maya/MFnWeightGeometryFilter.h>
#include <maya/MSyntax.h>
#include <algorithm>
#include <cassert>
#include <utility>

#define PROGRESS_STEP 100
#define TASK_COUNT 32

/**
  A version number used to support future updates to the binary wrap binding file.
*/
const float kWrapFileVersion = 1.0f;

const char* CVWrapCmd::kName = "cvWrap";
const char* CVWrapCmd::kNameFlagShort = "-n";
const char* CVWrapCmd::kNameFlagLong = "-name";
const char* CVWrapCmd::kRadiusFlagShort = "-r";
const char* CVWrapCmd::kRadiusFlagLong = "-radius";
const char* CVWrapCmd::kNewBindMeshFlagShort = "-nbm";
const char* CVWrapCmd::kNewBindMeshFlagLong = "-newBindMesh";
const char* CVWrapCmd::kExportFlagShort = "-ex";
const char* CVWrapCmd::kExportFlagLong = "-export";
const char* CVWrapCmd::kImportFlagShort = "-im";
const char* CVWrapCmd::kImportFlagLong = "-import";
const char* CVWrapCmd::kBindingFlagShort = "-b";
const char* CVWrapCmd::kBindingFlagLong = "-binding";
const char* CVWrapCmd::kRebindFlagShort = "-rb";
const char* CVWrapCmd::kRebindFlagLong = "-rebind";
const char* CVWrapCmd::kHelpFlagShort = "-h";
const char* CVWrapCmd::kHelpFlagLong = "-help";

/**
  Displays command instructions.
*/
void DisplayHelp() {
  MString help;
  help += "Flags:\n"; 
  help += "-name (-n):          String     Name of the wrap node to create.\n"; 
  help += "-radius (-r):        Double     Sample radius.  Default is 0.1.  The greater the radius,\n"; 
  help += "                                the smoother the deformation but slower performance.\n";
  help += "-newBindMesh (-nbm)  N/A        Creates a new bind mesh, otherwise the existing bind mesh will be used.\n";
  help += "-export (-ex):       String     Path to a file to export the binding to.\n"; 
  help += "-import (-im):       String     Path to a file to import the binding from.\n"; 
  help += "-binding (-b):       String     Path to a file to import the binding from on creation.\n"; 
  help += "-rebind (-rb):       String     The name of the wrap node we are rebinding.\n"; 
  help += "-help (-h)           N/A        Display this text.\n";
  MGlobal::displayInfo(help);
}


CVWrapCmd::CVWrapCmd()
    : radius_(0.1),
      name_("cvWrap#"),
      command_(kCommandCreate),
      useBinding_(false),
      newBindMesh_(false) {
}


MSyntax CVWrapCmd::newSyntax() {
  MSyntax syntax;
  syntax.addFlag(kNameFlagShort, kNameFlagLong, MSyntax::kString);
  syntax.addFlag(kRadiusFlagShort, kRadiusFlagLong, MSyntax::kDouble);
  syntax.addFlag(kNewBindMeshFlagShort, kNewBindMeshFlagLong);
  syntax.addFlag(kExportFlagShort, kExportFlagLong, MSyntax::kString);
  syntax.addFlag(kImportFlagShort, kImportFlagLong, MSyntax::kString);
  syntax.addFlag(kBindingFlagShort, kBindingFlagLong, MSyntax::kString);
  syntax.addFlag(kRebindFlagShort, kRebindFlagLong, MSyntax::kString);
  syntax.addFlag(kHelpFlagShort, kHelpFlagLong);
  syntax.setObjectType(MSyntax::kSelectionList, 0, 255);
  syntax.useSelectionAsDefault(true);
  return syntax;
}


void* CVWrapCmd::creator() {                                
  return new CVWrapCmd;                    
}    


bool CVWrapCmd::isUndoable() const {
  return command_ == kCommandCreate;  // Only creation will be undoable
}


MStatus CVWrapCmd::doIt(const MArgList& args) {
  MStatus status;
    
  status = GatherCommandArguments(args);
  CHECK_MSTATUS_AND_RETURN_IT(status);

  if (command_ == kCommandImport || command_ == kCommandExport) {
    // In import/export mode, get the selected wrap deformer node so we can read/write
    // data from it.
    status = selectionList_.getDependNode(0, oWrapNode_);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    MFnDependencyNode fnNode(oWrapNode_);
    if (fnNode.typeId() != CVWrap::id) {
      MGlobal::displayError("No wrap node specified.");
      return MS::kFailure;
    }
  } else if (command_ == kCommandRebind) {
    status = GetGeometryPaths();
    CHECK_MSTATUS_AND_RETURN_IT(status);
    status = Rebind();
    CHECK_MSTATUS_AND_RETURN_IT(status);
  } else {
    // Otherwise get the driver and driven geometry paths.
    status = GetGeometryPaths();
    CHECK_MSTATUS_AND_RETURN_IT(status);

    // Add the cvWrap creation command to the modifier.
    MString command = "deformer -type cvWrap -n \"" + name_ + "\"";
    for (unsigned int i = 0; i < pathDriven_.length(); ++i) {
      MFnDagNode fnDriven(pathDriven_[i]);
      command += " " + fnDriven.partialPathName();
    }
    status = dgMod_.commandToExecute(command);
    CHECK_MSTATUS_AND_RETURN_IT(status);
  }

  return redoIt();
}


MStatus CVWrapCmd::GatherCommandArguments(const MArgList& args) {
  MStatus status;
  MArgDatabase argData(syntax(), args);
  argData.getObjects(selectionList_);
  if (argData.isFlagSet(kHelpFlagShort)) {
    command_ = kCommandHelp;
    DisplayHelp();
    return MS::kSuccess;
  } else if (argData.isFlagSet(kExportFlagShort)) {
    command_ = kCommandExport;
    filePath_ = argData.flagArgumentString(kExportFlagShort, 0, &status);
    CHECK_MSTATUS_AND_RETURN_IT(status);
  } else if (argData.isFlagSet(kImportFlagShort)) {
    command_ = kCommandImport;
    filePath_ = argData.flagArgumentString(kImportFlagShort, 0, &status);
    CHECK_MSTATUS_AND_RETURN_IT(status);
  }
  newBindMesh_ = argData.isFlagSet(kNewBindMeshFlagShort);
  if (argData.isFlagSet(kRadiusFlagShort)) {
    radius_ = argData.flagArgumentDouble(kRadiusFlagShort, 0, &status);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    // Make sure radius is positive
    if (radius_ <= 0.0) {
      radius_ = 0.001;
    }
  }
  if (argData.isFlagSet(kNameFlagShort)) {
    name_ = argData.flagArgumentString(kNameFlagShort, 0, &status);
    CHECK_MSTATUS_AND_RETURN_IT(status);
  }
  if (argData.isFlagSet(kBindingFlagShort)) {
    useBinding_ = true;
    filePath_ = argData.flagArgumentString(kBindingFlagShort, 0, &status);
    CHECK_MSTATUS_AND_RETURN_IT(status);
  }
  if (argData.isFlagSet(kRebindFlagShort)) {
    command_ = kCommandRebind;
    // Get the specified wrap node to rebind.
    MString wrapNode = argData.flagArgumentString(kRebindFlagShort, 0, &status);
    MSelectionList slist;
    status = slist.add(wrapNode);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    status = slist.getDependNode(0, oWrapNode_);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    MFnDependencyNode fnNode(oWrapNode_, &status);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    if (fnNode.typeId() != CVWrap::id) {
      MGlobal::displayError(fnNode.name() + " is not a cvWrap node.");
      return MS::kFailure;
    }
  }
  return MS::kSuccess;
}


MStatus CVWrapCmd::GetGeometryPaths() {
  MStatus status;
  // The driver is selected last
  status = selectionList_.getDagPath(selectionList_.length() - 1, pathDriver_, driverComponents_);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  status = GetShapeNode(pathDriver_);
  // The driver must be a mesh for this specific algorithm.
  if (!pathDriver_.hasFn(MFn::kMesh)) {
    MGlobal::displayError("cvWrap driver must be a mesh.");
    return MS::kFailure;
  }

  MItSelectionList iter(selectionList_);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  pathDriven_.clear();
  drivenComponents_.clear();
  for (unsigned int i = 0; i < selectionList_.length() - 1; ++i, iter.next()) {
    MDagPath path;
    MObject component;
    iter.getDagPath(path, component);
    status = GetShapeNode(path);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    pathDriven_.append(path);
    drivenComponents_.append(component);
  }
  
  return MS::kSuccess;
}


MStatus CVWrapCmd::redoIt() {
  MStatus status;
  if (command_ == kCommandImport) {
    std::ifstream in(filePath_.asChar(), ios::binary);
    if (!in.is_open()) {
      MGlobal::displayInfo("Unable to open file for importing.");
      CHECK_MSTATUS_AND_RETURN_IT(MS::kFailure);
    }
    BindingIO exporter;
    status = exporter.ImportBinding(in, oWrapNode_);
    in.close();
    CHECK_MSTATUS_AND_RETURN_IT(status);
    return MS::kSuccess;
  } else if (command_ == kCommandExport) {
    std::ofstream out(filePath_.asChar(), ios::binary);
    if (!out.is_open()) {
      MGlobal::displayError("Unable to open file for writing.");
      return MS::kFailure;
    }
    BindingIO exporter;
    status = exporter.ExportBinding(out, oWrapNode_);
    out.close();
    CHECK_MSTATUS_AND_RETURN_IT(status);
    return MS::kSuccess;
  } else if (command_ == kCommandRebind) {
    status = dgMod_.doIt();
    CHECK_MSTATUS_AND_RETURN_IT(status);
    return MS::kSuccess;
  } else if (command_ == kCommandCreate) {
    status = CreateWrapDeformer();
    CHECK_MSTATUS_AND_RETURN_IT(status);
    return MS::kSuccess;
  }
  return MS::kFailure;
}

   
MStatus CVWrapCmd::CreateWrapDeformer() {
  MStatus status;
  // Create the deformer
  status = dgMod_.doIt();
  CHECK_MSTATUS_AND_RETURN_IT(status);
  // Reacquire the paths because on referenced geo, a new driven path is created (the ShapeDeformed).
  status = GetGeometryPaths();
  CHECK_MSTATUS_AND_RETURN_IT(status);
  // Get the created wrap deformer node.
  status = GetLatestWrapNode();
  CHECK_MSTATUS_AND_RETURN_IT(status);

  MFnDependencyNode fnNode(oWrapNode_, &status);
  setResult(fnNode.name());
  CHECK_MSTATUS_AND_RETURN_IT(status);

  // Create a bind mesh so we can run rebind commands.  We need a mesh at the state of the 
  // initial binding in order to properly calculate rebinding information.  We can't use
  // the intermediate mesh for rebinding because we may not be binding at the rest pose.
  // Check if this driver already has a bind mesh.
  MDagPath pathBindMesh;
  status = GetExistingBindMesh(pathBindMesh);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  if (newBindMesh_ || !pathBindMesh.isValid()) {
    // No bind mesh exists or the user wants to force create a new one.
    status = CreateBindMesh(pathBindMesh);
    CHECK_MSTATUS_AND_RETURN_IT(status);
  }
  status = ConnectBindMesh(pathBindMesh);
  CHECK_MSTATUS_AND_RETURN_IT(status);

  if (useBinding_) {
    // Import a pre-existing binding.
    std::ifstream in(filePath_.asChar(), ios::binary);
    if (!in.is_open()) {
      MGlobal::displayInfo("Unable to open file for importing.");
      CHECK_MSTATUS_AND_RETURN_IT(MS::kFailure);
    }
    BindingIO exporter;
    status = exporter.ImportBinding(in, oWrapNode_);
    in.close();
    CHECK_MSTATUS_AND_RETURN_IT(status);
  } else {
    MDGModifier dgMod;
    BindData bindData;
    status = CalculateBinding(pathBindMesh, bindData, dgMod);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    status = dgMod.doIt();
    CHECK_MSTATUS_AND_RETURN_IT(status);
  }

  // Connect the driver mesh to the wrap deformer.
  MFnDagNode fnDriver(pathDriver_);
  MPlug plugDriverMesh = fnDriver.findPlug("worldMesh", false, &status);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  status = plugDriverMesh.selectAncestorLogicalIndex(0, plugDriverMesh.attribute());
  CHECK_MSTATUS_AND_RETURN_IT(status);
  MPlug plugDriverGeo(oWrapNode_, CVWrap::aDriverGeo);
  MDGModifier dgMod;
  dgMod.connect(plugDriverMesh, plugDriverGeo);
  status = dgMod.doIt();
  CHECK_MSTATUS_AND_RETURN_IT(status);

  return MS::kSuccess;
}


MStatus CVWrapCmd::GetLatestWrapNode() {
  MStatus status;
  MObject oDriven = pathDriven_[0].node();
  
  // Since we use MDGModifier to execute the deformer command, we can't get
  // the created deformer node, so we need to find it in the deformation chain.
  MItDependencyGraph itDG(oDriven,
                          MFn::kGeometryFilt,
                          MItDependencyGraph::kUpstream, 
                          MItDependencyGraph::kDepthFirst,
                          MItDependencyGraph::kNodeLevel, 
                          &status);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  MObject oDeformerNode;
  for (; !itDG.isDone(); itDG.next()) {
    oDeformerNode = itDG.currentItem();
    MFnDependencyNode fnNode(oDeformerNode, &status);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    if (fnNode.typeId() == CVWrap::id) {
      oWrapNode_ = oDeformerNode;
      return MS::kSuccess;
    }
  }
  return MS::kFailure;
}


MStatus CVWrapCmd::CreateBindMesh(MDagPath& pathBindMesh) {
  MStatus status;
  MStringArray duplicate;
  MFnDependencyNode fnWrap(oWrapNode_, &status);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  MFnDagNode fnDriver(pathDriver_);

  // Calling mesh.duplicate() can give incorrect results due to tweaks and such.
  // We are doing the duplicate here rather than the MDGModifier because we need the name
  // of the duplicated geometry and it would not be reliable to do it from the modifier.
  MGlobal::executeCommand("duplicate -rr -n " + fnWrap.name() + "Base " + fnDriver.partialPathName(), duplicate);
  status = GetDagPath(duplicate[0], pathBindMesh);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  status = DeleteIntermediateObjects(pathBindMesh);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  bindMeshes_.append(duplicate[0]);

  // Hide the duplicate
  MFnDagNode fnBindMesh(pathBindMesh, &status);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  MPlug plug = fnBindMesh.findPlug("visibility", false, &status);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  status = plug.setBool(false);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  
  return MS::kSuccess;
}


MStatus CVWrapCmd::ConnectBindMesh(MDagPath& pathBindMesh) {
  MStatus status;
  // Connect the bind mesh to the wrap node
  status = GetShapeNode(pathBindMesh);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  MFnDagNode fnBindMeshShape(pathBindMesh, &status);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  MPlug plugBindMessage = fnBindMeshShape.findPlug("message", false, &status);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  MPlug plugBindMesh(oWrapNode_, CVWrap::aBindDriverGeo);
  MDGModifier dgMod;
  dgMod.connect(plugBindMessage, plugBindMesh);
  status = dgMod.doIt();
  CHECK_MSTATUS_AND_RETURN_IT(status);
  return MS::kSuccess;
}


MStatus CVWrapCmd::CalculateBinding(MDagPath& pathBindMesh, BindData& bindData,
                                    MDGModifier& dgMod) {
  MStatus status;
  bindData.radius = radius_;

  // Store the bind mesh information.
  // Pre-gather the data from Maya so we can multithread the binding process
  bindData.driverMatrix = pathBindMesh.inclusiveMatrix();
  MObject oBindMesh = pathBindMesh.node();
  status = bindData.intersector.create(oBindMesh, bindData.driverMatrix);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  // We need the adjacency of each vertex in order to crawl the mesh.
  status = GetAdjacency(pathBindMesh, bindData.adjacency);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  MFnMesh fnBindMesh(pathBindMesh, &status);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  fnBindMesh.getPoints(bindData.driverPoints, MSpace::kWorld);
  fnBindMesh.getVertexNormals(false, bindData.driverNormals, MSpace::kWorld);
  bindData.perFaceVertices.resize(fnBindMesh.numPolygons());
  bindData.perFaceTriangleVertices.resize(fnBindMesh.numPolygons());
  MIntArray vertexCount, vertexList, triangleCounts, triangleVertices;
  status = fnBindMesh.getVertices(vertexCount, vertexList);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  status = fnBindMesh.getTriangles(triangleCounts, triangleVertices);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  for (unsigned int faceId = 0, iter = 0, triIter = 0; faceId < vertexCount.length(); ++faceId) {
    bindData.perFaceVertices[faceId].clear();
    for (int i = 0; i < vertexCount[faceId]; ++i, ++iter) {
      bindData.perFaceVertices[faceId].append(vertexList[iter]);
    }
    bindData.perFaceTriangleVertices[faceId].resize(triangleCounts[faceId]);
    for (int triId = 0; triId < triangleCounts[faceId]; ++triId) {
      bindData.perFaceTriangleVertices[faceId][triId].setLength(3);
      bindData.perFaceTriangleVertices[faceId][triId][0] = triangleVertices[triIter++];
      bindData.perFaceTriangleVertices[faceId][triId][1] = triangleVertices[triIter++];
      bindData.perFaceTriangleVertices[faceId][triId][2] = triangleVertices[triIter++];
    }
  }

  // Calculate the binding for each deformed geometry
  MPlug plugBindData(oWrapNode_, CVWrap::aBindData);
  MFnMatrixData fnMatrixData;
  for (unsigned int geomIndex = 0; geomIndex < pathDriven_.length(); ++geomIndex) {
    // Get the plugs to the binding attributes for this geometry
    MPlug plugBind = plugBindData.elementByLogicalIndex(geomIndex, &status);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    MPlug plugSampleWeights = plugBind.child(CVWrap::aSampleWeights, &status);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    MPlug plugSampleVerts = plugBind.child(CVWrap::aSampleComponents, &status);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    MPlug plugSampleBindMatrix = plugBind.child(CVWrap::aBindMatrix, &status);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    MPlug plugTriangleVerts = plugBind.child(CVWrap::aTriangleVerts, &status);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    MPlug plugBarycentricWeights = plugBind.child(CVWrap::aBarycentricWeights, &status);
    CHECK_MSTATUS_AND_RETURN_IT(status);

    // Use the intermediate object for the binding.  This assumes the intermediate object
    // has the same component count as the displayed shape.
    MDagPath pathDriven(pathDriven_[geomIndex]);
    status = GetShapeNode(pathDriven, true);
    if (MFAIL(status)) {
      pathDriven = pathDriven_[geomIndex];
    }
    MItGeometry itGeo(pathDriven, drivenComponents_[geomIndex], &status);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    int geoCount = itGeo.count();

    status = itGeo.allPositions(bindData.inputPoints, MSpace::kWorld);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    bindData.sampleIds.resize(itGeo.count());
    bindData.weights.resize(itGeo.count());
    bindData.bindMatrices.setLength(itGeo.count());
    bindData.coords.resize(itGeo.count());
    bindData.triangleVertices.resize(itGeo.count());

    // Send off the threads to calculate the binding.
    ThreadData<BindData> threadData[TASK_COUNT];
    CreateThreadData<BindData>(TASK_COUNT, itGeo.count(), &bindData, threadData);
    MThreadPool::init();
    MThreadPool::newParallelRegion(CreateTasks, (void *)threadData);
    MThreadPool::release();

    for (int ii = 0; !itGeo.isDone(); itGeo.next(), ++ii) {
      // Store all the binding data for this component
      // Note for nurbs surfaces the indices may not be continuous.
      int logicalIndex = itGeo.index();
      // Store sample vert ids.
      MFnIntArrayData fnIntData;
      MObject oIntData = fnIntData.create(bindData.sampleIds[ii], &status);
      CHECK_MSTATUS_AND_RETURN_IT(status);
      MPlug plugSampleVertsElement = plugSampleVerts.elementByLogicalIndex(logicalIndex, &status);
      CHECK_MSTATUS_AND_RETURN_IT(status);
      status = dgMod.newPlugValue(plugSampleVertsElement, oIntData);
      CHECK_MSTATUS_AND_RETURN_IT(status);

      // Store sample weights
      MFnDoubleArrayData fnDoubleData;
      MObject oDoubleData = fnDoubleData.create(bindData.weights[ii], &status);
      assert(bindData.weights[ii].length() > 0);
      CHECK_MSTATUS_AND_RETURN_IT(status);
      MPlug plugSampleWeightsElement = plugSampleWeights.elementByLogicalIndex(logicalIndex,
                                                                               &status);
      CHECK_MSTATUS_AND_RETURN_IT(status);
      status = dgMod.newPlugValue(plugSampleWeightsElement, oDoubleData);
      CHECK_MSTATUS_AND_RETURN_IT(status);

      // Store bind matrix
      MObject oMatrixData = fnMatrixData.create(bindData.bindMatrices[ii], &status);
      CHECK_MSTATUS_AND_RETURN_IT(status);
      MPlug plugSampleBindMatrixElement = plugSampleBindMatrix.elementByLogicalIndex(logicalIndex,
                                                                                     &status);
      CHECK_MSTATUS_AND_RETURN_IT(status);
      status = dgMod.newPlugValue(plugSampleBindMatrixElement, oMatrixData);
      CHECK_MSTATUS_AND_RETURN_IT(status);

      // Store triangle vertices
      MFnNumericData fnNumericData;
      MObject oNumericData = fnNumericData.create(MFnNumericData::k3Int, &status);
      CHECK_MSTATUS_AND_RETURN_IT(status);
      status = fnNumericData.setData3Int(bindData.triangleVertices[ii][0],
                                         bindData.triangleVertices[ii][1],
                                         bindData.triangleVertices[ii][2]);
      CHECK_MSTATUS_AND_RETURN_IT(status);
      MPlug plugTriangleVertsElement = plugTriangleVerts.elementByLogicalIndex(logicalIndex,
                                                                               &status);
      CHECK_MSTATUS_AND_RETURN_IT(status);
      status = dgMod.newPlugValue(plugTriangleVertsElement, oNumericData);
      CHECK_MSTATUS_AND_RETURN_IT(status);

      // Store barycentric coordinates
      oNumericData = fnNumericData.create(MFnNumericData::k3Float, &status);
      CHECK_MSTATUS_AND_RETURN_IT(status);
      status = fnNumericData.setData3Float(bindData.coords[ii][0], bindData.coords[ii][1],
                                           bindData.coords[ii][2]);
      CHECK_MSTATUS_AND_RETURN_IT(status);
      MPlug plugBarycentricWeightsElement = plugBarycentricWeights.elementByLogicalIndex(
        logicalIndex, &status);
      CHECK_MSTATUS_AND_RETURN_IT(status);
      status = dgMod.newPlugValue(plugBarycentricWeightsElement, oNumericData);
      CHECK_MSTATUS_AND_RETURN_IT(status);
    }
  }
  return MS::kSuccess;
}


void CVWrapCmd::CreateTasks(void *data, MThreadRootTask *pRoot) {
  ThreadData<BindData>* threadData = static_cast<ThreadData<BindData>*>(data);

  if (threadData) {
    int numTasks = threadData[0].numTasks;
    for(int i = 0; i < numTasks; i++) {
      MThreadPool::createTask(CalculateBindingTask, (void *)&threadData[i], pRoot);
    }
    MThreadPool::executeAndJoin(pRoot);
  }
}

bool SortCoords(std::pair<int, float> lhs, std::pair<int, float> rhs) {
  return (lhs.second > rhs.second); 
}


MThreadRetVal CVWrapCmd::CalculateBindingTask(void *pParam) {
  ThreadData<BindData>* pThreadData = static_cast<ThreadData<BindData>*>(pParam);
  double*& alignedStorage = pThreadData->alignedStorage;
  BindData* pData = pThreadData->pData;
  MMeshIntersector& intersector = pData->intersector;
  MMeshIntersector& subsetIntersector = pData->subsetIntersector;
  MPointArray& inputPoints = pData->inputPoints;
  MPointArray& driverPoints = pData->driverPoints;
  MFloatVectorArray& driverNormals = pData->driverNormals;
  std::vector<std::set<int> >& adjacency = pData->adjacency;
  std::vector<MIntArray>& sampleIds = pData->sampleIds;
  std::vector<MDoubleArray>& weights = pData->weights;
  std::vector<BaryCoords>& coords = pData->coords;
  std::vector<MIntArray>& triangleVertices = pData->triangleVertices;
  MMatrixArray& bindMatrices = pData->bindMatrices;

  double radius = pData->radius;

  MMatrix& driverMatrix = pData->driverMatrix;
  std::vector<MIntArray>& perFaceVertices = pData->perFaceVertices;
  std::vector<std::vector<MIntArray> >& perFaceTriangleVertices  = pData->perFaceTriangleVertices;

  unsigned int taskStart = pThreadData->start;
  unsigned int taskEnd = pThreadData->end;

  // Pre-allocate the aligned storage for intrinsics calculation so we are not dynamically allocating
  // memory in the loop.
  std::vector<std::pair<int, float> > sortedCoords(3);
  for (unsigned int i = taskStart; i < taskEnd; ++i) {
    if (i >= inputPoints.length()) {
      break;
    }
    // We need to calculate a bind matrix for each component.
    // The closest point will be the origin of the coordinate system.
    // The weighted normal of the vertices in the sample radius will be one axis.
    // The weight vector from the closest point to the sample vertices will be the other axis.

    MPoint inputPoint = inputPoints[i];
    MPointOnMesh pointOnMesh;
    if (subsetIntersector.isCreated()) {
      // If we are rebinding, limit the closest point to the subset.
      subsetIntersector.getClosestPoint(inputPoint, pointOnMesh);
      inputPoint = MPoint(pointOnMesh.getPoint()) * driverMatrix;
    }

    intersector.getClosestPoint(inputPoint, pointOnMesh);
    int faceId = pointOnMesh.faceIndex();
    int triangleId = pointOnMesh.triangleIndex();

    // Put point in world space so we can calculate the proper bind matrix.
    MPoint closestPoint = MPoint(pointOnMesh.getPoint()) * driverMatrix;

    // Get barycentric coordinates of closestPoint
    triangleVertices[i] = perFaceTriangleVertices[faceId][triangleId];
    GetBarycentricCoordinates(closestPoint, driverPoints[triangleVertices[i][0]],
                              driverPoints[triangleVertices[i][1]],
                              driverPoints[triangleVertices[i][2]],
                              coords[i]);

    // Sort coords highest to lowest so we can easility calculate the up vector
    for (int j = 0; j < 3; ++j) {
      sortedCoords[j] = std::pair<int, float>(triangleVertices[i][j], coords[i][j]);
    }
    std::sort(sortedCoords.begin(), sortedCoords.end(), SortCoords);
    for (int j = 0; j < 3; ++j) {
      triangleVertices[i][j] = sortedCoords[j].first;
      coords[i][j] = sortedCoords[j].second;
    }

    // Get vertices of closest face so we can crawl out from them.
    MIntArray& vertexList = perFaceVertices[faceId];

    // Crawl the surface to find all the vertices within the sample radius.
    std::map<int, double> distances;
    CrawlSurface(closestPoint, vertexList, driverPoints, radius, adjacency, distances);

    // Calculate the weight values per sampled vertex
    CalculateSampleWeights(distances, radius, sampleIds[i], weights[i]);

    // Get the components that form the orthonormal basis.
    MPoint origin;
    MVector up;
    MVector normal;
    CalculateBasisComponents(weights[i], coords[i], triangleVertices[i], driverPoints,
                             driverNormals, sampleIds[i], alignedStorage, origin, up, normal);
    CreateMatrix(origin, normal, up, bindMatrices[i]);
    bindMatrices[i] = bindMatrices[i].inverse();
  }
  return 0;
}


MStatus CVWrapCmd::GetExistingBindMesh(MDagPath &pathBindMesh) {
  MStatus status;
  MObject oDriver = pathDriver_.node();
  MFnDependencyNode fnDriver(oDriver, &status);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  
  // We'll find the bind mesh associated with the driver mesh by traversing the mesh connections
  // through the cvWrap node.
  MPlug plugOutGeom = fnDriver.findPlug("worldMesh", false, &status);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  status = plugOutGeom.selectAncestorLogicalIndex(0, plugOutGeom.attribute());
  CHECK_MSTATUS_AND_RETURN_IT(status);
  MPlugArray geomPlugs;
  plugOutGeom.connectedTo(geomPlugs, false, true);
  for (unsigned int i = 0; i < geomPlugs.length(); i++) {
    // First iterate through the outMesh connections to find a cvWrap node.
    MObject oThisNode = geomPlugs[i].node();
    MFnDependencyNode fnNode(oThisNode);
    if (fnNode.typeId() == CVWrap::id) {
      status = GetBindMesh(oThisNode, pathBindMesh);
      CHECK_MSTATUS_AND_RETURN_IT(status);
      return MS::kSuccess;
    }
  }
  return MS::kSuccess;
}


MStatus CVWrapCmd::Rebind() {
  MStatus status;

  // Create bind mesh based off of specified faces
  MDagPath pathDriverSubset;
  status = CreateRebindSubsetMesh(pathDriverSubset);
  CHECK_MSTATUS_AND_RETURN_IT(status);

  // Initialize the subset intersector to enable the rebind during the threaded calculation.
  BindData bindData;
  MObject oBindSubsetMesh = pathDriverSubset.node();
  status = bindData.subsetIntersector.create(oBindSubsetMesh, pathDriverSubset.inclusiveMatrix());
  CHECK_MSTATUS_AND_RETURN_IT(status);

  MDagPath pathBindMesh;
  status = GetBindMesh(oWrapNode_, pathBindMesh);
  CHECK_MSTATUS_AND_RETURN_IT(status);

  status = CalculateBinding(pathBindMesh, bindData, dgMod_);
  CHECK_MSTATUS_AND_RETURN_IT(status);

  // Delete the subset mesh since we don't need it anymore
  pathDriverSubset.pop();
  status = MGlobal::executeCommand("delete " + pathDriverSubset.partialPathName());
  CHECK_MSTATUS_AND_RETURN_IT(status);

  return MS::kSuccess;
}


MStatus CVWrapCmd::GetBindMesh(MObject& oWrapNode, MDagPath& pathBindMesh) {
  MStatus status;
  // Get the bind mesh connected to the message attribute of the wrap deformer
  MPlug plugBindMesh(oWrapNode, CVWrap::aBindDriverGeo);
  MPlugArray plugs;
  plugBindMesh.connectedTo(plugs, true, false, &status);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  if (plugs.length() == 0) {
    MGlobal::displayError("Unable to rebind.  No bind mesh is connected.");
    return MS::kFailure;
  }
  MObject oBindMesh = plugs[0].node();
  status = MDagPath::getAPathTo(oBindMesh, pathBindMesh);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  return MS::kSuccess;
}



MStatus CVWrapCmd::CreateRebindSubsetMesh(MDagPath& pathDriverSubset) {
  // We will create the mesh subset by deleting all the non-selected faces.
  MStatus status;

  MDagPath pathBindMesh;
  status = GetBindMesh(oWrapNode_, pathBindMesh);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  MFnMesh fnBindMesh(pathBindMesh);

  // Duplicate the bind mesh to create subset
  MStringArray duplicate;
  // Calling mesh.duplicate() gave jacked results.
  status = MGlobal::executeCommand("duplicate -rr " + fnBindMesh.partialPathName(), duplicate);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  status = GetDagPath(duplicate[0], pathDriverSubset);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  status = DeleteIntermediateObjects(pathDriverSubset);
  CHECK_MSTATUS_AND_RETURN_IT(status);

  // Get selected driver faces
  MFnSingleIndexedComponent fnDriverComp(driverComponents_, &status);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  MIntArray driverFaces;
  status = fnDriverComp.getElements(driverFaces);
  CHECK_MSTATUS_AND_RETURN_IT(status);

  int numFacesToDelete = fnBindMesh.numPolygons() - driverFaces.length();
  if (numFacesToDelete) {
    // Get all the face ids to delete.
    MIntArray facesToDelete;
    int selectedFaceIndex = 0;
    for (int i = 0; i < fnBindMesh.numPolygons(); i++) {
      if (i != driverFaces[selectedFaceIndex]) {
        facesToDelete.append(i);
      } else {
        selectedFaceIndex++;
      }
    }

    MFnSingleIndexedComponent fnDeleteComp;
    MObject oFacesToDelete = fnDeleteComp.create(MFn::kMeshPolygonComponent, &status);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    status = fnDeleteComp.addElements(facesToDelete);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    MSelectionList deleteList;
    status = deleteList.add(pathDriverSubset, oFacesToDelete);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    status = MGlobal::setActiveSelectionList(deleteList);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    status = MGlobal::executeCommand("delete;");
    CHECK_MSTATUS_AND_RETURN_IT(status);
    // Reacquire the the dag path since it is invalid now after deleting the faces.
    status = GetDagPath(duplicate[0], pathDriverSubset);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    status = GetShapeNode(pathDriverSubset);
    CHECK_MSTATUS_AND_RETURN_IT(status);
  }
  return MS::kSuccess;
}


MStatus CVWrapCmd::undoIt() {
  MStatus status;
  status = dgMod_.undoIt();
  CHECK_MSTATUS_AND_RETURN_IT(status);

  if (bindMeshes_.length()) {
    // Delete any created bind meshes.
    MDGModifier mod;
    for (unsigned int i = 0; i < bindMeshes_.length(); i++) {
      status = mod.commandToExecute("delete " + bindMeshes_[i]);
      CHECK_MSTATUS_AND_RETURN_IT(status);
    }
    status = mod.doIt();
    CHECK_MSTATUS_AND_RETURN_IT(status);
    bindMeshes_.clear();
  }

  return MS::kSuccess;
}


================================================
FILE: src/cvWrapCmd.h
================================================
#ifndef CVWRAPCMD_H
#define CVWRAPCMD_H

#include <maya/MArgList.h>
#include <maya/MDagPath.h>
#include <maya/MDagPathArray.h>
#include <maya/MDGModifier.h>
#include <maya/MFloatArray.h>
#include <maya/MFloatVectorArray.h>
#include <maya/MMatrixArray.h>
#include <maya/MMeshIntersector.h>
#include <maya/MObjectArray.h>
#include <maya/MPlug.h>
#include <maya/MPointArray.h>
#include <maya/MSelectionList.h>
#include <maya/MString.h>
#include <maya/MStringArray.h>
#include <maya/MThreadPool.h>

#include <maya/MPxCommand.h>

#include <stdio.h>
#include <math.h>
#include <fstream>
#include <iostream>
#include <vector>
#include <map>

#include "common.h"

struct BindData {
  MPointArray inputPoints;  /**< The world space points of the geometry to be wrapped. */
  MPointArray driverPoints;  /**< The world space points of the driver geometry. */
  MFloatVectorArray driverNormals;  /**< The world space normals of the driver geometry. */
  std::vector<MIntArray> perFaceVertices;  /**< The per-face vertex ids of the driver. */
  std::vector<std::vector<MIntArray> > perFaceTriangleVertices;  /**< The per-face per-triangle vertex ids of the driver. */
  MMeshIntersector intersector;  /**< Closest point intersector on the driver mesh. */
  MMeshIntersector subsetIntersector;  /**< Closest point intersector on a subset mesh if we are rebinding. */
  std::vector<std::set<int> > adjacency;  /**< Driver adjacency for surface crawling. */
  MMatrix driverMatrix;  /**< Driver matrix to convert closest points into world space. */
  double radius;  /**< Max crawl sample radius. */

  /**
    Elements calculated in the threads.
  */
  std::vector<MIntArray> sampleIds;
  std::vector<MDoubleArray> weights;
  MMatrixArray bindMatrices;
  std::vector<BaryCoords> coords;
  std::vector<MIntArray> triangleVertices;
};


/**
  The cvWrap command is used to create new cvWrap deformers and to import and export
  wrap bindings.
*/
class CVWrapCmd : public MPxCommand {              
 public:
  enum CommandMode { kCommandCreate, kCommandExport, kCommandImport, kCommandHelp, kCommandRebind };
  CVWrapCmd();              
  virtual MStatus  doIt(const MArgList&);
  virtual MStatus  undoIt();
  virtual MStatus  redoIt();
  virtual bool isUndoable() const;
  static void* creator();    
  static MSyntax newSyntax();

  /**
    Distributes the ThreadData objects to the parallel threads.
    @param[in] data The user defined data.  In this case, the ThreadData array.
    @param[in] pRoot Maya's root task.
  */
  static void CreateTasks(void *data, MThreadRootTask *pRoot);
  static MThreadRetVal CalculateBindingTask(void *pParam);

  const static char* kName;  /**< The name of the command. */
  
  /**
    Specifies the name of the cvWrap node.
  */
  const static char* kNameFlagShort;
  const static char* kNameFlagLong;
  
  /**
    Specifies the sample radius of the binding.
  */
  const static char* kRadiusFlagShort;
  const static char* kRadiusFlagLong;

  /**
    Specifies that a new bind mesh should be created.  The bind mesh is only used for rebinding
    vertices and can be deleted at any time.  Sometimes, artists may want to wrap different
    geometry with the same mesh.  By default the command will reuse the same bind mesh for a driver,
    but if new geometry is being wrapped at a different pose, a new bind mesh should be created
    in order to correctly rebind.
  */
  const static char* kNewBindMeshFlagShort;
  const static char* kNewBindMeshFlagLong;

  /**
    Export file path.
  */
  const static char* kExportFlagShort;
  const static char* kExportFlagLong;

  /**
    Import file path.
  */
  const static char* kImportFlagShort;
  const static char* kImportFlagLong;
  
  /**
    Path of a binding on disk rather than calculating binding from scratch.
  */
  const static char* kBindingFlagShort;  
  const static char* kBindingFlagLong;

  /**
    Specifies that the user wants to rebind the select vertices.
  */
  const static char* kRebindFlagShort;
  const static char* kRebindFlagLong;

  /**
    Displays help.
  */
  const static char* kHelpFlagShort;
  const static char* kHelpFlagLong;

 private:
  /**
    Gathers all the command arguments and sets necessary command states.
    @param[in] args Maya MArgList.
  */
  MStatus GatherCommandArguments(const MArgList& args);

  /**
    Acquires the driver and driven dag paths from the input selection list.
  */
  MStatus GetGeometryPaths();

  /**
    Creates a new wrap deformer.
  */
  MStatus CreateWrapDeformer();

  /**
    Gets the latest cvWrap node in the history of the deformed shape.
  */
  MStatus GetLatestWrapNode();

  /**
    Create a new bind mesh and connect it to the wrap node.
    The bind mesh the mesh at the time of binding and is used to calculate binding information.
  */
  MStatus CreateBindMesh(MDagPath& pathBindMesh);

  /**
    Connects the bind mesh message attribute to the wrap deformer.
  */
  MStatus ConnectBindMesh(MDagPath& pathBindMesh);


  /**
    Calculates the binding data for the wrap deformer to work.
    @param[in] pathBindMesh The path to the mesh to bind to.
    @param[in] bindData The structure containing all the bind information.
    @param[in,out] dgMod The modifier to hold all the plug operations.
  */
  MStatus CalculateBinding(MDagPath& pathBindMesh, BindData& bindData, MDGModifier& dgMod);
    
  /**
    Gets the MDagPath of any existing bind wrap mesh so we don't have to duplicate it for each
    new wrap.
    @param[out] pathBindMesh Storage for path to an existing bind mesh
  */
  MStatus GetExistingBindMesh(MDagPath &pathBindMesh);

  /**
    Calculates new binding data for the selected components.
  */
  MStatus Rebind();

  /**
    Get the bind mesh connected to the wrap node.
    @param[in] oWrapNode MObject to a cvWrap node..
    @param[out] pathBindMesh The path to the bind mesh.
  */
  MStatus GetBindMesh(MObject& oWrapNode, MDagPath& pathBindMesh);


  /**
    Creates the mesh with the subset of faces used to calculate the rebind.
    @param[out] pathDriverSubset Path the new driver subset mesh.
  */
  MStatus CreateRebindSubsetMesh(MDagPath& pathDriverSubset);


  MString name_;  /**< Name of cvWrap node to create. */
  double radius_;  /**< Binding sample radius. */
  CommandMode command_;
  MString filePath_;
  bool useBinding_;
  bool newBindMesh_;
  MSelectionList selectionList_;  /**< Selected command input nodes. */
  MObject oWrapNode_;  /**< MObject to the cvWrap node in focus. */
  MDagPath pathDriver_;  /**< Path to the shape wrapping the other shape. */
  MObject driverComponents_;  /**< Selected driver components used for rebinding. */
  MDagPathArray pathDriven_;  /**< Paths to the shapes being wrapped. */
  MObjectArray drivenComponents_;  /**< Selected driven components used for rebinding. */
  MDGModifier dgMod_;
  MStringArray bindMeshes_;

  
};  

#endif


================================================
FILE: src/cvWrapDeformer.cpp
================================================
#include "cvWrapDeformer.h"
#include <maya/MFnCompoundAttribute.h>
#include <maya/MFnDoubleArrayData.h>
#include <maya/MFnIntArrayData.h>
#include <maya/MFnMatrixAttribute.h>
#include <maya/MFnMesh.h>
#include <maya/MFnMessageAttribute.h>
#include <maya/MFnNumericAttribute.h>
#include <maya/MFnTypedAttribute.h>
#include <maya/MGlobal.h>
#include <maya/MItGeometry.h>
#include <maya/MNodeMessage.h>
#include <maya/MPlugArray.h>
#include <cassert>

MTypeId CVWrap::id(0x0011580B);

const char* CVWrap::kName = "cvWrap";
MObject CVWrap::aBindDriverGeo;
MObject CVWrap::aDriverGeo;
MObject CVWrap::aBindData;
MObject CVWrap::aSampleComponents;
MObject CVWrap::aSampleWeights;
MObject CVWrap::aTriangleVerts;
MObject CVWrap::aBarycentricWeights;
MObject CVWrap::aBindMatrix;
MObject CVWrap::aNumTasks;
MObject CVWrap::aScale;

MStatus CVWrap::initialize() {
  MFnCompoundAttribute cAttr;
  MFnMatrixAttribute mAttr;
  MFnMessageAttribute meAttr;
  MFnTypedAttribute tAttr;
  MFnNumericAttribute nAttr;
  MStatus status;

  aDriverGeo = tAttr.create("driver", "driver", MFnData::kMesh);
  status = addAttribute(aDriverGeo);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  status = attributeAffects(aDriverGeo, outputGeom);
  CHECK_MSTATUS_AND_RETURN_IT(status);

  aBindDriverGeo = meAttr.create("bindMesh", "bindMesh");
  status = addAttribute(aBindDriverGeo);
  CHECK_MSTATUS_AND_RETURN_IT(status);

  /* Each outputGeometry needs:
  -- bindData
     | -- sampleComponents
     | -- sampleWeights
     | -- triangleVerts
     | -- barycentricWeights
     | -- bindMatrix
  */

  aSampleComponents = tAttr.create("sampleComponents", "sampleComponents", MFnData::kIntArray);
  tAttr.setArray(true);

  aSampleWeights = tAttr.create("sampleWeights", "sampleWeights", MFnData::kDoubleArray);
  tAttr.setArray(true);

  aTriangleVerts = nAttr.create("triangleVerts", "triangleVerts", MFnNumericData::k3Int);
  nAttr.setArray(true);

  aBarycentricWeights = nAttr.create("barycentricWeights", "barycentricWeights", MFnNumericData::k3Float);
  nAttr.setArray(true);

  aBindMatrix = mAttr.create("bindMatrix", "bindMatrix");
  mAttr.setDefault(MMatrix::identity);
  mAttr.setArray(true);

  aBindData = cAttr.create("bindData", "bindData");
  cAttr.setArray(true);
  cAttr.addChild(aSampleComponents);
  cAttr.addChild(aSampleWeights);
  cAttr.addChild(aTriangleVerts);
  cAttr.addChild(aBarycentricWeights);
  cAttr.addChild(aBindMatrix);
  status = addAttribute(aBindData);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  status = attributeAffects(aSampleComponents, outputGeom);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  status = attributeAffects(aSampleWeights, outputGeom);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  status = attributeAffects(aBindMatrix, outputGeom);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  status = attributeAffects(aTriangleVerts, outputGeom);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  status = attributeAffects(aBarycentricWeights, outputGeom);
  CHECK_MSTATUS_AND_RETURN_IT(status);


  aScale = nAttr.create("scale", "scale", MFnNumericData::kFloat, 1.0);
  nAttr.setKeyable(true);
  status = addAttribute(aScale);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  status = attributeAffects(aScale, outputGeom);
  CHECK_MSTATUS_AND_RETURN_IT(status);

  aNumTasks = nAttr.create("numTasks", "numTasks", MFnNumericData::kInt, 32);
  nAttr.setMin(1);
  nAttr.setMax(64);
  status = addAttribute(aNumTasks);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  status = attributeAffects(aNumTasks, outputGeom);
  CHECK_MSTATUS_AND_RETURN_IT(status);

  status = MGlobal::executeCommandOnIdle("makePaintable -attrType multiFloat -sm deformer cvWrap weights");
  CHECK_MSTATUS_AND_RETURN_IT(status);

  return MS::kSuccess;
}


/**
  Utility method used by both the MPxDeformer and MPxGPUDeformer to pull the bind data out
  of the datablock.
  @param[in] data The node datablock.
  @param[in] geomIndex The geometry logical index.
  @param[out] taskData Bind info storage.
*/
MStatus GetBindInfo(MDataBlock& data, unsigned int geomIndex, TaskData& taskData) {
  MStatus status;
  MArrayDataHandle hBindDataArray = data.inputArrayValue(CVWrap::aBindData);
  status = hBindDataArray.jumpToElement(geomIndex);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  MDataHandle hBindData = hBindDataArray.inputValue();

  MArrayDataHandle hSampleWeights = hBindData.child(CVWrap::aSampleWeights);
  unsigned int numVerts = hSampleWeights.elementCount();
  if (numVerts == 0) {
    // No binding information yet.
    return MS::kNotImplemented;
  }
  MArrayDataHandle hComponents = hBindData.child(CVWrap::aSampleComponents);
  MArrayDataHandle hBindMatrix = hBindData.child(CVWrap::aBindMatrix);
  MArrayDataHandle hTriangleVerts = hBindData.child(CVWrap::aTriangleVerts);
  MArrayDataHandle hBarycentricWeights = hBindData.child(CVWrap::aBarycentricWeights);

  hSampleWeights.jumpToArrayElement(0);
  hComponents.jumpToArrayElement(0);
  hBindMatrix.jumpToArrayElement(0);
  hTriangleVerts.jumpToArrayElement(0);
  hBarycentricWeights.jumpToArrayElement(0);

  MFnNumericData fnNumericData;
  taskData.bindMatrices.setLength(numVerts);
  taskData.sampleIds.resize(numVerts);
  taskData.sampleWeights.resize(numVerts);
  taskData.triangleVerts.resize(numVerts);
  taskData.baryCoords.resize(numVerts);

  int sampleLength = (int)taskData.bindMatrices.length();
  for (unsigned int i = 0; i < numVerts; ++i) {
    int logicalIndex = hComponents.elementIndex();
    if (logicalIndex >= sampleLength) {
      // Nurbs surfaces may be sparse so make sure we have enough space.
      taskData.bindMatrices.setLength(logicalIndex+1);
      taskData.sampleIds.resize(logicalIndex+1);
      taskData.sampleWeights.resize(logicalIndex+1);
      taskData.triangleVerts.resize(logicalIndex+1);
      taskData.baryCoords.resize(logicalIndex+1);
    }

    // Get sample ids
    MObject oIndexData = hComponents.inputValue().data();
    MFnIntArrayData fnIntData(oIndexData, &status);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    taskData.sampleIds[logicalIndex] = fnIntData.array();

    // Get sample weights
    MObject oWeightData = hSampleWeights.inputValue().data();
    MFnDoubleArrayData fnDoubleData(oWeightData, &status);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    taskData.sampleWeights[logicalIndex] = fnDoubleData.array();
    assert(taskData.sampleWeights[logicalIndex].length() == taskData.sampleIds[logicalIndex].length());
    
    // Get bind matrix
    taskData.bindMatrices[logicalIndex] = hBindMatrix.inputValue().asMatrix();

    // Get triangle vertex binding
    int3& verts = hTriangleVerts.inputValue(&status).asInt3();
    CHECK_MSTATUS_AND_RETURN_IT(status);
    MIntArray& triangleVerts = taskData.triangleVerts[logicalIndex];
    triangleVerts.setLength(3);
    triangleVerts[0] = verts[0];
    triangleVerts[1] = verts[1];
    triangleVerts[2] = verts[2];

    // Get barycentric weights
    float3& baryWeights = hBarycentricWeights.inputValue(&status).asFloat3();
    CHECK_MSTATUS_AND_RETURN_IT(status);
    BaryCoords& coords = taskData.baryCoords[logicalIndex];
    coords[0] = baryWeights[0];
    coords[1] = baryWeights[1];
    coords[2] = baryWeights[2];

    hSampleWeights.next();
    hComponents.next();
    hBindMatrix.next();
    hTriangleVerts.next();
    hBarycentricWeights.next();
  }
  return MS::kSuccess;
}

MStatus GetDriverData(MDataBlock& data, TaskData& taskData) {
  MStatus status;
  // Get driver geo
  MDataHandle hDriverGeo = data.inputValue(CVWrap::aDriverGeo, &status);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  MObject oDriverGeo = hDriverGeo.asMesh();
  CHECK_MSTATUS_AND_RETURN_IT(status);
  MFnMesh fnDriver(oDriverGeo, &status);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  // Get the driver point positions
  status = fnDriver.getPoints(taskData.driverPoints, MSpace::kWorld);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  unsigned int numDriverPoints = taskData.driverPoints.length();
  // Get the driver normals
  taskData.driverNormals.setLength(numDriverPoints);
  fnDriver.getVertexNormals(false, taskData.driverNormals, MSpace::kWorld);
  return MS::kSuccess;
}



CVWrap::CVWrap() {
  MThreadPool::init();
  onDeleteCallbackId = 0;
}

CVWrap::~CVWrap() {
  if (onDeleteCallbackId != 0)
    MMessage::removeCallback(onDeleteCallbackId);
	
  MThreadPool::release();
  std::vector<ThreadData<TaskData>*>::iterator iter;
  for (iter = threadData_.begin(); iter != threadData_.end(); ++iter) {
    delete [] *iter;
  }
  threadData_.clear();
}


void* CVWrap::creator() { return new CVWrap(); }

void CVWrap::postConstructor()
{
  MPxDeformerNode::postConstructor();

  MStatus status = MS::kSuccess;
  MObject obj = thisMObject();
  onDeleteCallbackId = MNodeMessage::addNodeAboutToDeleteCallback(obj, aboutToDeleteCB, NULL, &status);
}

void CVWrap::aboutToDeleteCB(MObject &node, MDGModifier &modifier, void *clientData)
{
  // Find any node connected to .bindMesh and delete it with the deformer, for compatibility with wrap.
  MPlug bindPlug(node, aBindDriverGeo);
  MPlugArray bindGeometries;
  bindPlug.connectedTo(bindGeometries, true, false);
  for (unsigned int i = 0; i < bindGeometries.length(); i++) {
    MObject node = bindGeometries[i].node();
    modifier.deleteNode(node);
  }
}


MStatus CVWrap::setDependentsDirty(const MPlug& plugBeingDirtied, MPlugArray& affectedPlugs) {
  // Extract the geom index from the dirty plug and set the dirty flag so we know that we need to
  // re-read the binding data.
  if (plugBeingDirtied.isElement()) {
    MPlug parent = plugBeingDirtied.array().parent();
    if (parent == aBindData) {
      unsigned int geomIndex = parent.logicalIndex();
      dirty_[geomIndex] = true;
    }
  }
  return MS::kSuccess;
}


MStatus CVWrap::deform(MDataBlock& data, MItGeometry& itGeo, const MMatrix& localToWorldMatrix,
                       unsigned int geomIndex) {
  MStatus status;
  if (geomIndex >= taskData_.size()) {
    taskData_.resize(geomIndex+1);
  }
  TaskData& taskData = taskData_[geomIndex];
  
  // Get driver geo
  MDataHandle hDriverGeo = data.inputValue(aDriverGeo, &status);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  MObject oDriverGeo = hDriverGeo.asMesh();
  CHECK_MSTATUS_AND_RETURN_IT(status);
  if (oDriverGeo.isNull()) {
    // Without a driver mesh, we can't do anything
    return MS::kSuccess;
  }

  // Only pull bind information from the data block if it is dirty
  if (dirty_[geomIndex] || taskData.sampleIds.size() == 0) {
    dirty_[geomIndex] = false;
    status = GetBindInfo(data, geomIndex, taskData);
    if (status == MS::kNotImplemented) {
      // If no bind information is stored yet, don't do anything.
      return MS::kSuccess;
    } else if (MFAIL(status)) {
      CHECK_MSTATUS_AND_RETURN_IT(status);
    }
  }

  // Get driver geo information
  MFnMesh fnDriver(oDriverGeo, &status);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  // Get the driver point positions
  status = fnDriver.getPoints(taskData.driverPoints, MSpace::kWorld);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  unsigned int numDriverPoints = taskData.driverPoints.length();
  // Get the driver normals
  taskData.driverNormals.setLength(numDriverPoints);
  fnDriver.getVertexNormals(false, taskData.driverNormals, MSpace::kWorld);

  // Get the deformer membership and paint weights
  unsigned int membershipCount = itGeo.count();
  taskData.membership.setLength(membershipCount);
  taskData.paintWeights.setLength(membershipCount);
  status = itGeo.allPositions(taskData.points);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  for (int i = 0; !itGeo.isDone(); itGeo.next(), i++) {
    taskData.membership[i] = itGeo.index();
    taskData.paintWeights[i] = weightValue(data, geomIndex, itGeo.index());
  }
  
  taskData.drivenMatrix = localToWorldMatrix;
  taskData.drivenInverseMatrix = localToWorldMatrix.inverse();
  
  // See if we even need to calculate anything.
  taskData.scale = data.inputValue(aScale).asFloat();
  taskData.envelope = data.inputValue(envelope).asFloat();
  int taskCount = data.inputValue(aNumTasks).asInt();
  if (taskData.envelope == 0.0f || taskCount <= 0) {
    return MS::kSuccess;
  }

  if (geomIndex >= threadData_.size()) {
    // Make sure a ThreadData objects exist for this geomIndex.
    size_t currentSize = threadData_.size();
    threadData_.resize(geomIndex+1);
    for (size_t i = currentSize; i < geomIndex+1; ++i) {
      threadData_[i] = new ThreadData<TaskData>[taskCount];
    }
  } else {
    // Make sure the number of ThreadData instances is correct for this geomIndex
    if (threadData_[geomIndex][0].numTasks != taskCount) {
      delete [] threadData_[geomIndex];
      threadData_[geomIndex] = new ThreadData<TaskData>[taskCount];
    }
  }

  CreateThreadData<TaskData>(taskCount, taskData_[geomIndex].points.length(),
                             &taskData_[geomIndex], threadData_[geomIndex]);
  MThreadPool::newParallelRegion(CreateTasks, (void *)threadData_[geomIndex]);

  status = itGeo.setAllPositions(taskData.points);
  CHECK_MSTATUS_AND_RETURN_IT(status);

  return MS::kSuccess;
}


void CVWrap::CreateTasks(void *data, MThreadRootTask *pRoot) {
  ThreadData<TaskData>* threadData = static_cast<ThreadData<TaskData>*>(data);

  if (threadData) {
    int numTasks = threadData[0].numTasks;
    for(int i = 0; i < numTasks; i++) {
      MThreadPool::createTask(EvaluateWrap, (void *)&threadData[i], pRoot);
    }
    MThreadPool::executeAndJoin(pRoot);
  }
}


MThreadRetVal CVWrap::EvaluateWrap(void *pParam) {
  ThreadData<TaskData>* pThreadData = static_cast<ThreadData<TaskData>*>(pParam);
  double*& alignedStorage = pThreadData->alignedStorage;
  TaskData* pData = pThreadData->pData;
  // Get the data out of the struct so it is easier to work with.
  MMatrix& drivenMatrix = pData->drivenMatrix;
  MMatrix& drivenInverseMatrix = pData->drivenInverseMatrix;
  float env = pThreadData->pData->envelope;
  float scale = pThreadData->pData->scale;
  MIntArray& membership = pData->membership;
  MFloatArray& paintWeights = pData->paintWeights;
  MPointArray& points = pData->points;
  MPointArray& driverPoints = pData->driverPoints;
  MFloatVectorArray& driverNormals = pData->driverNormals;
  MMatrixArray& bindMatrices = pData->bindMatrices;
  std::vector <MIntArray>& sampleIds = pData->sampleIds;
  std::vector <MDoubleArray>& sampleWeights = pData->sampleWeights;
  std::vector <MIntArray>& triangleVerts = pData->triangleVerts;
  std::vector <BaryCoords>& baryCoords = pData->baryCoords;

  unsigned int taskStart = pThreadData->start;
  unsigned int taskEnd = pThreadData->end;

  MPoint newPt;
  MMatrix scaleMatrix, matrix;
  scaleMatrix[0][0] = scale;
  scaleMatrix[1][1] = scale;
  scaleMatrix[2][2] = scale;
  for (unsigned int i = taskStart; i < taskEnd; ++i) {
    if (i >= points.length()) {
      break;
    }
    int index = membership[i];

    MPoint origin;
    MVector normal, up;
    CalculateBasisComponents(sampleWeights[index], baryCoords[index], triangleVerts[index],
                             driverPoints, driverNormals, sampleIds[index], alignedStorage,
                             origin, up, normal);

    CreateMatrix(origin, normal, up, matrix);
    matrix = scaleMatrix * matrix;
    MPoint newPt = ((points[i]  * drivenMatrix) * (bindMatrices[index] * matrix)) * drivenInverseMatrix;
    points[i] = points[i] + ((newPt - points[i]) * paintWeights[i] * env);
  }
  return 0;
}


#if MAYA_API_VERSION >= 201600
MString CVWrapGPU::pluginLoadPath;

#if MAYA_API_VERSION >= 201650
cl_command_queue (*getMayaDefaultOpenCLCommandQueue)() = MOpenCLInfo::getMayaDefaultOpenCLCommandQueue;
#else
cl_command_queue (*getMayaDefaultOpenCLCommandQueue)() = MOpenCLInfo::getOpenCLCommandQueue;
#endif
/**
  Convenience function to copy array data to the gpu.
*/
cl_int EnqueueBuffer(MAutoCLMem& mclMem, size_t bufferSize, void* data) {
  cl_int err = CL_SUCCESS;
  if (!mclMem.get())	{
    // The buffer doesn't exist yet so create it and copy the data over.
		mclMem.attach(clCreateBuffer(MOpenCLInfo::getOpenCLContext(),
                                        CL_MEM_COPY_HOST_PTR | CL_MEM_READ_ONLY,
                                        bufferSize, data, &err));
	}	else {
		// The buffer already exists so just copy the data over.
		err = clEnqueueWriteBuffer(getMayaDefaultOpenCLCommandQueue(),
                               mclMem.get(), CL_TRUE, 0, bufferSize,
                               data, 0, NULL, NULL);
	}
  return err;
}

MGPUDeformerRegistrationInfo* CVWrapGPU::GetGPUDeformerInfo() {
  static CVWrapGPUDeformerInfo wrapInfo;
  return &wrapInfo;
}

CVWrapGPU::CVWrapGPU() {
  // Remember the ctor must be fast.  No heavy work should be done here.
  // Maya may allocate one of these and then never use it.
}

CVWrapGPU::~CVWrapGPU() {
  terminate();
}

#if MAYA_API_VERSION <= 201700
MPxGPUDeformer::DeformerStatus CVWrapGPU::evaluate(MDataBlock& block,
                                                   const MEvaluationNode& evaluationNode,
                                                   const MPlug& plug,
                                                   unsigned int numElements,
                                                   const MAutoCLMem inputBuffer,
                                                   const MAutoCLEvent inputEvent,
                                                   MAutoCLMem outputBuffer,
                                                   MAutoCLEvent& outputEvent) {
#else
MPxGPUDeformer::DeformerStatus  CVWrapGPU::evaluate(MDataBlock& block,
													const MEvaluationNode& evaluationNode,
													const MPlug& plug,
													const MGPUDeformerData& inputData,
													MGPUDeformerData& outputData) {
	// get the input GPU data and event
	MGPUDeformerBuffer inputDeformerBuffer = inputData.getBuffer(sPositionsName());
	const MAutoCLMem inputBuffer = inputDeformerBuffer.buffer();
	unsigned int numElements = inputDeformerBuffer.elementCount();
	const MAutoCLEvent inputEvent = inputDeformerBuffer.bufferReadyEvent();

	// create the output buffer
	MGPUDeformerBuffer outputDeformerBuffer = createOutputBuffer(inputDeformerBuffer);
	MAutoCLEvent outputEvent;
	MAutoCLMem outputBuffer = outputDeformerBuffer.buffer();
#endif

  MStatus status;
  numElements_ = numElements;
  // Copy all necessary data to the gpu.
  status = EnqueueBindData(block, evaluationNode, plug);
  CHECK_MSTATUS(status);
  status = EnqueueDriverData(block, evaluationNode, plug);
  CHECK_MSTATUS(status);
  status = EnqueuePaintMapData(block, evaluationNode, numElements, plug);
  CHECK_MSTATUS(status);

  if (!kernel_.get())  {
    // Load the OpenCL kernel if we haven't yet.
    MString openCLKernelFile(pluginLoadPath);
#if MAYA_API_VERSION > 201700
	openCLKernelFile += "/cvwrap.cl";
#else
    openCLKernelFile += "/cvwrap_pre2018.cl";
#endif
    kernel_ = MOpenCLInfo::getOpenCLKernel(openCLKernelFile, "cvwrap");
    if (kernel_.isNull())  {
      std::cerr << "Could not compile kernel " << openCLKernelFile.asChar() << "\n";
      return MPxGPUDeformer::kDeformerFailure;
    }
  }
  float envelope = block.inputValue(MPxDeformerNode::envelope, &status).asFloat();
  CHECK_MSTATUS(status);
  cl_int err = CL_SUCCESS;
  
  // Set all of our kernel parameters.  Input buffer and output buffer may be changing every frame
  // so always set them.
  unsigned int parameterId = 0;
  err = clSetKernelArg(kernel_.get(), parameterId++, sizeof(cl_mem), (void*)outputBuffer.getReadOnlyRef());
  MOpenCLInfo::checkCLErrorStatus(err);
  err = clSetKernelArg(kernel_.get(), parameterId++, sizeof(cl_mem), (void*)inputBuffer.getReadOnlyRef());
  MOpenCLInfo::checkCLErrorStatus(err);
  err = clSetKernelArg(kernel_.get(), parameterId++, sizeof(cl_mem), (void*)driverPoints_.getReadOnlyRef());
  MOpenCLInfo::checkCLErrorStatus(err);
  err = clSetKernelArg(kernel_.get(), parameterId++, sizeof(cl_mem), (void*)driverNormals_.getReadOnlyRef());
  MOpenCLInfo::checkCLErrorStatus(err);
  err = clSetKernelArg(kernel_.get(), parameterId++, sizeof(cl_mem), (void*)paintWeights_.getReadOnlyRef());
  MOpenCLInfo::checkCLErrorStatus(err);
  err = clSetKernelArg(kernel_.get(), parameterId++, sizeof(cl_mem), (void*)sampleCounts_.getReadOnlyRef());
  MOpenCLInfo::checkCLErrorStatus(err);
  err = clSetKernelArg(kernel_.get(), parameterId++, sizeof(cl_mem), (void*)sampleOffsets_.getReadOnlyRef());
  MOpenCLInfo::checkCLErrorStatus(err);
  err = clSetKernelArg(kernel_.get(), parameterId++, sizeof(cl_mem), (void*)sampleIds_.getReadOnlyRef());
  MOpenCLInfo::checkCLErrorStatus(err);
  err = clSetKernelArg(kernel_.get(), parameterId++, sizeof(cl_mem), (void*)sampleWeights_.getReadOnlyRef());
  MOpenCLInfo::checkCLErrorStatus(err);
  err = clSetKernelArg(kernel_.get(), parameterId++, sizeof(cl_mem), (void*)triangleVerts_.getReadOnlyRef());
  MOpenCLInfo::checkCLErrorStatus(err);
  err = clSetKernelArg(kernel_.get(), parameterId++, sizeof(cl_mem), (void*)baryCoords_.getReadOnlyRef());
  MOpenCLInfo::checkCLErrorStatus(err);
  err = clSetKernelArg(kernel_.get(), parameterId++, sizeof(cl_mem), (void*)bindMatrices_.getReadOnlyRef());
  MOpenCLInfo::checkCLErrorStatus(err);
  err = clSetKernelArg(kernel_.get(), parameterId++, sizeof(cl_mem), (void*)drivenMatrices_.getReadOnlyRef());
  MOpenCLInfo::checkCLErrorStatus(err);
#if MAYA_API_VERSION > 201700
  // get the world space and inverse world space matrix mem handles
  MGPUDeformerBuffer inputWorldSpaceMatrixDeformerBuffer = inputData.getBuffer(sGeometryMatrixName());
  const MAutoCLMem deformerWorldSpaceMatrix = inputWorldSpaceMatrixDeformerBuffer.buffer();
  MGPUDeformerBuffer inputInvWorldSpaceMatrixDeformerBuffer = inputData.getBuffer(sInverseGeometryMatrixName());
  const MAutoCLMem deformerInvWorldSpaceMatrix = inputInvWorldSpaceMatrixDeformerBuffer.buffer();
  // Note: these matrices are in row major order
  err = clSetKernelArg(kernel_.get(), parameterId++, sizeof(cl_mem), (void*)deformerWorldSpaceMatrix.getReadOnlyRef());
  MOpenCLInfo::checkCLErrorStatus(err);
  err = clSetKernelArg(kernel_.get(), parameterId++, sizeof(cl_mem), (void*)deformerInvWorldSpaceMatrix.getReadOnlyRef());
  MOpenCLInfo::checkCLErrorStatus(err);
#endif
  err = clSetKernelArg(kernel_.get(), parameterId++, sizeof(cl_float), (void*)&envelope);
  MOpenCLInfo::checkCLErrorStatus(err);
  err = clSetKernelArg(kernel_.get(), parameterId++, sizeof(cl_uint), (void*)&numElements_);
  MOpenCLInfo::checkCLErrorStatus(err);

  // Figure out a good work group size for our kernel.
  size_t workGroupSize;
  size_t retSize;
  err = clGetKernelWorkGroupInfo(
    kernel_.get(),
    MOpenCLInfo::getOpenCLDeviceId(),
    CL_KERNEL_WORK_GROUP_SIZE,
    sizeof(size_t),
    &workGroupSize,
    &retSize);
  MOpenCLInfo::checkCLErrorStatus(err);

  size_t localWorkSize = 256;
  if (retSize > 0) {
    localWorkSize = workGroupSize;
  }
  // global work size must be a multiple of localWorkSize
  size_t globalWorkSize = (localWorkSize - numElements_ % localWorkSize) + numElements_;

  // set up our input events.  The input event could be NULL, in that case we need to pass
  // slightly different parameters into clEnqueueNDRangeKernel
  unsigned int numInputEvents = 0;
  if (inputEvent.get()) {
    numInputEvents = 1;
  }

  // run the kernel
  err = clEnqueueNDRangeKernel(
    getMayaDefaultOpenCLCommandQueue(),
    kernel_.get(),
    1,
    NULL,
    &globalWorkSize,
    &localWorkSize,
    numInputEvents,
    numInputEvents ? inputEvent.getReadOnlyRef() : 0,
    outputEvent.getReferenceForAssignment() );
  MOpenCLInfo::checkCLErrorStatus(err);

#if MAYA_API_VERSION > 201700
  // set the buffer into the output data
  outputDeformerBuffer.setBufferReadyEvent(outputEvent);
  outputData.setBuffer(outputDeformerBuffer);
#endif

  return MPxGPUDeformer::kDeformerSuccess;
}

MStatus CVWrapGPU::EnqueueBindData(MDataBlock& data, const MEvaluationNode& evaluationNode,
                                   const MPlug& plug) {
  MStatus status;
	if ((bindMatrices_.get() && (
        !evaluationNode.dirtyPlugExists(CVWrap::aBindData, &status) &&
        !evaluationNode.dirtyPlugExists(CVWrap::aSampleComponents, &status) &&
        !evaluationNode.dirtyPlugExists(CVWrap::aSampleWeights, &status) &&
        !evaluationNode.dirtyPlugExists(CVWrap::aTriangleVerts, &status) &&
        !evaluationNode.dirtyPlugExists(CVWrap::aBarycentricWeights, &status) &&
        !evaluationNode.dirtyPlugExists(CVWrap::aBindMatrix, &status)
      )) || !status) {
    // No bind data has changed, nothing to do.
    return MS::kSuccess;
  }

  TaskData taskData;
  unsigned int geomIndex = plug.logicalIndex();
  status = GetBindInfo(data, geomIndex, taskData);
  CHECK_MSTATUS_AND_RETURN_IT(status);

  // Flatten out bind matrices to float array
  size_t arraySize = taskData.bindMatrices.length() * 16;
	float* bindMatrices = new float[arraySize];
  for(unsigned int i = 0, idx = 0; i < taskData.bindMatrices.length(); ++i) {
    for(unsigned int row = 0; row < 4; row++) {
		  for(unsigned int column = 0; column < 4; column++) {
			  bindMatrices[idx++] = (float)taskData.bindMatrices[i](row, column);
      }
		}
	}
  cl_int err = EnqueueBuffer(bindMatrices_, arraySize * sizeof(float), (void*)bindMatrices);
  delete [] bindMatrices;

  // Store samples per vertex
  arraySize = taskData.sampleIds.size();
  int* samplesPerVertex = new int[arraySize];
  int* sampleOffsets = new int[arraySize];
  int totalSamples = 0;
  for(size_t i = 0; i < taskData.sampleIds.size(); ++i) {
    samplesPerVertex[i] = (int)taskData.sampleIds[i].length();
    sampleOffsets[i] = totalSamples;
    totalSamples += samplesPerVertex[i];
  }
  err = EnqueueBuffer(sampleCounts_, arraySize * sizeof(int), (void*)samplesPerVertex);
  err = EnqueueBuffer(sampleOffsets_, arraySize * sizeof(int), (void*)sampleOffsets);
  delete [] samplesPerVertex;
  delete [] sampleOffsets;

  // Store sampleIds and sampleWeights
  int* sampleIds = new int[totalSamples];
  float* sampleWeights = new float[totalSamples];
  int iter = 0;
  for(size_t i = 0; i < taskData.sampleIds.size(); ++i) {
    for(unsigned int j = 0; j < taskData.sampleIds[i].length(); ++j) {
      sampleIds[iter] = taskData.sampleIds[i][j];
      sampleWeights[iter] = (float)taskData.sampleWeights[i][j];
      iter++;
    }
  }
  err = EnqueueBuffer(sampleIds_, totalSamples * sizeof(int), (void*)sampleIds);
  err = EnqueueBuffer(sampleWeights_, totalSamples * sizeof(float), (void*)sampleWeights);
  delete [] sampleIds;
  delete [] sampleWeights;

  // Store triangle verts and bary coords
  arraySize = taskData.triangleVerts.size() * 3;
  int* triangleVerts = new int[arraySize];
  float* baryCoords = new float[arraySize];
  iter = 0;
  for(size_t i = 0; i < taskData.triangleVerts.size(); ++i) {
    for(unsigned int j = 0; j < 3; ++j) {
      triangleVerts[iter] = taskData.triangleVerts[i][j];
      baryCoords[iter] = (float)taskData.baryCoords[i][j];
      iter++;
    }
  }
  err = EnqueueBuffer(triangleVerts_, arraySize * sizeof(int), (void*)triangleVerts);
  err = EnqueueBuffer(baryCoords_, arraySize * sizeof(float), (void*)baryCoords);
  delete [] triangleVerts;
  delete [] baryCoords;
  return MS::kSuccess;
}


MStatus CVWrapGPU::EnqueueDriverData(MDataBlock& data, const MEvaluationNode& evaluationNode, const MPlug& plug) {
  MStatus status;
  TaskData taskData;
  status = GetDriverData(data, taskData);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  cl_int err = CL_SUCCESS;
  // Store world space driver points and normals into float arrays.
  // Reuse the same array for points and normals so we're not dynamically allocating double
  // the memory.
  unsigned int pointCount = taskData.driverPoints.length();
  float* driverData = new float[pointCount * 3];

  // Store the driver points on the gpu.
  for (unsigned int i = 0, iter = 0; i < pointCount; ++i) {
    driverData[iter++] = (float)taskData.driverPoints[i].x;
    driverData[iter++] = (float)taskData.driverPoints[i].y;
    driverData[iter++] = (float)taskData.driverPoints[i].z;
  }
  err = EnqueueBuffer(driverPoints_, pointCount * 3 * sizeof(float), (void*)driverData);

  // Store the driver normals on the gpu.
  for (unsigned int i = 0, iter = 0; i < pointCount; ++i) {
    driverData[iter++] = taskData.driverNormals[i].x;
    driverData[iter++] = taskData.driverNormals[i].y;
    driverData[iter++] = taskData.driverNormals[i].z;
  }
  err = EnqueueBuffer(driverNormals_, pointCount * 3 * sizeof(float), (void*)driverData);
	delete [] driverData;

  int idx = 0;
#if MAYA_API_VERSION <= 201700
  // Store the driven matrices on the gpu.
  MArrayDataHandle hInputs = data.inputValue(CVWrap::input, &status);
  unsigned int geomIndex = plug.logicalIndex();
  status = hInputs.jumpToElement(geomIndex);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  MDataHandle hInput = hInputs.inputValue(&status);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  MDataHandle hGeom = hInput.child(CVWrap::inputGeom);
  MMatrix localToWorldMatrix = hGeom.geometryTransformMatrix();
  MMatrix worldToLocalMatrix = localToWorldMatrix.inverse();
  float drivenMatrices[48]; // 0-15: localToWorld, 16-31: worldToLocal, 32-47: scale

  // Store in column order so we can dot in the cl kernel.
  for(unsigned int column = 0; column < 4; column++) {
    for(unsigned int row = 0; row < 4; row++) {
			drivenMatrices[idx++] = (float)localToWorldMatrix(row, column);
    }
	}
  for(unsigned int column = 0; column < 4; column++) {
    for(unsigned int row = 0; row < 4; row++) {
			drivenMatrices[idx++] = (float)worldToLocalMatrix(row, column);
    }
	}
#else
	float drivenMatrices[16]; // 0-15: scale
#endif
  // Scale matrix is stored row major
  float scale = data.inputValue(CVWrap::aScale, &status).asFloat();
  CHECK_MSTATUS_AND_RETURN_IT(status);
  MMatrix scaleMatrix;
  scaleMatrix[0][0] = scale;
  scaleMatrix[1][1] = scale;
  scaleMatrix[2][2] = scale;
  for(unsigned int row = 0; row < 4; row++) {
    for(unsigned int column = 0; column < 4; column++) {
			drivenMatrices[idx++] = (float)scaleMatrix(row, column);
    }
	}
#if MAYA_API_VERSION <= 201700
  err = EnqueueBuffer(drivenMatrices_, 48 * sizeof(float), (void*)drivenMatrices);
#else
  err = EnqueueBuffer(drivenMatrices_, 16 * sizeof(float), (void*)drivenMatrices);
#endif
  return MS::kSuccess;
}


MStatus CVWrapGPU::EnqueuePaintMapData(MDataBlock& data,
                                       const MEvaluationNode& evaluationNode,
                                       unsigned int numElements,
                                       const MPlug& plug) {
  MStatus status;
  if ((paintWeights_.get() &&
       !evaluationNode.dirtyPlugExists(MPxDeformerNode::weightList, &status)) || !status) {
    // The paint weights are not dirty so no need to get them.
		return MS::kSuccess;
	}

  cl_int err = CL_SUCCESS;

  // Store the paint weights on the gpu.
  // Since we can't call MPxDeformerNode::weightValue, get the paint weights from the data block.
  float* paintWeights = new float[numElements];
  MArrayDataHandle weightList = data.outputArrayValue(MPxDeformerNode::weightList, &status);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  unsigned int geomIndex = plug.logicalIndex();
  status = weightList.jumpToElement(geomIndex);
  // it is possible that the jumpToElement fails.  In that case all weights are 1.
  if (!status) {  
    for(unsigned int i = 0; i < numElements; i++) {
      paintWeights[i] = 1.0f;
    }
  } else {
    // Initialize all weights to 1.0f
    for(unsigned int i = 0; i < numElements; i++) {
      paintWeights[i] = 1.0f;
    }
    MDataHandle weightsStructure = weightList.inputValue(&status);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    MArrayDataHandle weights = weightsStructure.child(MPxDeformerNode::weights);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    // Gather all the non-zero weights
    unsigned int numWeights = weights.elementCount(&status);
    CHECK_MSTATUS_AND_RETURN_IT(status);
    for (unsigned int i = 0; i < numWeights; i++, weights.next()) {
      unsigned int weightsElementIndex = weights.elementIndex(&status);
      MDataHandle value = weights.inputValue(&status);
      // BUG: The weightsElementIndex may be sparse for nurbs surfaces so this would be incorrect
      paintWeights[weightsElementIndex] = value.asFloat();
    }
  }
  err = EnqueueBuffer(paintWeights_, numElements * sizeof(float), (void*)paintWeights);
  delete [] paintWeights;
  return MS::kSuccess;
}


void CVWrapGPU::terminate() {
  driverPoints_.reset();
  driverNormals_.reset();
  paintWeights_.reset();
  bindMatrices_.reset();
  sampleCounts_.reset();
  sampleIds_.reset();
  sampleWeights_.reset();
  triangleVerts_.reset();
  baryCoords_.reset();
  drivenMatrices_.reset();
	MOpenCLInfo::releaseOpenCLKernel(kernel_);
	kernel_.reset();
}

#endif



================================================
FILE: src/cvWrapDeformer.h
================================================
#ifndef CVWRAPDEFORMER_H
#define CVWRAPDEFORMER_H

#include <maya/MDGModifier.h>
#include <maya/MFloatArray.h>
#include <maya/MIntArray.h>
#include <maya/MMatrix.h> 
#include <maya/MMatrixArray.h> 
#include <maya/MMessage.h>
#include <maya/MPoint.h> 
#include <maya/MThreadPool.h>
#include <maya/MPxDeformerNode.h>

#if MAYA_API_VERSION >= 201600
#include <maya/MPxGPUDeformer.h>
#include <maya/MGPUDeformerRegistry.h>
#include <maya/MOpenCLInfo.h>
#include <clew/clew_cl.h>
#endif

#include <map>
#include <vector>
#include "common.h"

struct TaskData {
  MMatrix drivenMatrix;
  MMatrix drivenInverseMatrix;
  float envelope;
  float scale;

  MIntArray membership;
  MFloatArray paintWeights;
  MPointArray points;

  MPointArray driverPoints;
  MFloatVectorArray driverNormals;
  MMatrixArray bindMatrices;
  std::vector<MIntArray> sampleIds;
  std::vector<MDoubleArray> sampleWeights;
  std::vector<MIntArray> triangleVerts;
  std::vector<BaryCoords> baryCoords;
};
 

class CVWrap : public MPxDeformerNode {
 public:
  CVWrap();
  virtual ~CVWrap(); 
  virtual void postConstructor();
  virtual MStatus deform(MDataBlock& data, MItGeometry& iter, const MMatrix& mat,
                         unsigned int mIndex);
  virtual MStatus setDependentsDirty(const MPlug& plugBeingDirtied, MPlugArray& affectedPlugs);

  static void* creator();
  static MStatus initialize();


  /**
    Distributes the ThreadData objects to the parallel threads.
    @param[in] data The user defined data.  In this case, the ThreadData array.
    @param[in] pRoot Maya's root task.
  */
  static void CreateTasks(void *data, MThreadRootTask *pRoot);
  static MThreadRetVal EvaluateWrap(void *pParam);
    
  const static char* kName;  /**< The name of the node. */
  static MObject aBindDriverGeo;
  static MObject aDriverGeo;
  static MObject aBindData;
  static MObject aSampleComponents;
  static MObject aSampleWeights;
    /** The vertex indices of the triangle containing the origin of each coordinate system. */
  static MObject aTriangleVerts;
  /** The indices of the tangents used to calculate the up vector. */
  static MObject aBarycentricWeights;

  static MObject aBindMatrix;
  static MObject aNumTasks;
  static MObject aScale;
  static MTypeId id;

private:
  static void aboutToDeleteCB(MObject &node, MDGModifier &modifier, void *clientData);

  std::map<unsigned int, bool> dirty_;
  std::vector<TaskData> taskData_;  /**< Per geometry evaluation data. */
  std::vector<ThreadData<TaskData>*> threadData_;
  MCallbackId onDeleteCallbackId;
};



#if MAYA_API_VERSION >= 201600
// the GPU override implementation of the offsetNode
// 

class CVWrapGPU : public MPxGPUDeformer {
 public:
	// Virtual methods from MPxGPUDeformer
	CVWrapGPU();
	virtual ~CVWrapGPU();

#if MAYA_API_VERSION <= 201700
	virtual MPxGPUDeformer::DeformerStatus evaluate(MDataBlock& block, const MEvaluationNode&,
                                                  const MPlug& plug, unsigned int numElements,
                                                  const MAutoCLMem, const MAutoCLEvent,
                                                  MAutoCLMem, MAutoCLEvent&);
#else
	virtual MPxGPUDeformer::DeformerStatus evaluate(MDataBlock& block, const MEvaluationNode& evaluationNode,
													const MPlug& plug, const MGPUDeformerData& inputData,
													MGPUDeformerData& outputData);
#endif
	virtual void terminate();

	static MGPUDeformerRegistrationInfo* GetGPUDeformerInfo();
	static bool ValidateNode(MDataBlock& block, const MEvaluationNode&, const MPlug& plug, MStringArray* messages);
  /**< The path of where the plug-in is loaded from.  Used to find the cl kernel. */
  static MString pluginLoadPath;

private:
	// helper methods
	MStatus EnqueueBindData(MDataBlock& data, const MEvaluationNode& evaluationNode, const MPlug& plug);
	MStatus EnqueueDriverData(MDataBlock& data, const MEvaluationNode& evaluationNode, const MPlug& plug);
	MStatus EnqueuePaintMapData(MDataBlock& data, const MEvaluationNode& evaluationNode, unsigned int numElements, const MPlug& plug);

	// Storage for data on the GPU
	MAutoCLMem driverPoints_;
	MAutoCLMem driverNormals_;
	MAutoCLMem paintWeights_;
	MAutoCLMem bindMatrices_;
	MAutoCLMem sampleCounts_;
	MAutoCLMem sampleOffsets_;
	MAutoCLMem sampleIds_;
	MAutoCLMem sampleWeights_;
	MAutoCLMem triangleVerts_;
	MAutoCLMem baryCoords_;
	MAutoCLMem drivenMatrices_;

	unsigned int numElements_;

	// Kernel
	MAutoCLKernel kernel_;
};


/**
  The 
*/
class CVWrapGPUDeformerInfo : public MGPUDeformerRegistrationInfo {
 public:
	CVWrapGPUDeformerInfo(){}
	virtual ~CVWrapGPUDeformerInfo(){}

	virtual MPxGPUDeformer* createGPUDeformer()	{
		return new CVWrapGPU();
	}
	


#if MAYA_API_VERSION >= 201650
	virtual bool validateNodeInGraph(MDataBlock& block, const MEvaluationNode& evaluationNode,
                                   const MPlug& plug, MStringArray* messages)	{
		return true;
	}

	virtual bool validateNodeValues(MDataBlock& block, const MEvaluationNode& evaluationNode,
                                  const MPlug& plug, MStringArray* messages) {
		return true;
	}
#else
  virtual bool validateNode(MDataBlock& block, const MEvaluationNode& evaluationNode,
                            const MPlug& plug, MStringArray* messages) {
		return true;
	}
#endif
};

#endif // End Maya 2016

#endif


================================================
FILE: src/cvwrap.cl
================================================
/*
  cvwrap kernel
*/

__kernel void cvwrap(__global float* finalPos,
                     __global const float* initialPos,
                     __global const float* driverPoints,
                     __global const float* driverNormals,
                     __global const float* paintWeights,
                     __global const int* sampleCounts,
                     __global const int* sampleOffsets,
                     __global const int* sampleIds,
                     __global const float* sampleWeights,
                     __global const int* triangleVerts,
                     __global const float* baryCoords,
                     __global const float4* bindMatrices,
                     __global const float4* scaleMatrix,
                     __global const float* drivenWorldMatrix,
                     __global const float* drivenInvMatrix,
                     const float envelope,
                     const uint positionCount) {
  unsigned int positionId = get_global_id(0);
  if (positionId >= positionCount) {
    return;          
  }
  unsigned int positionOffset = positionId * 3;

  // Start with the recreated point and normal using the barycentric coordinates of the hit point.
  /*
    Equivalent CPU code:
    ====================
    MVector hitNormal;
    for (int i = 0; i < 3; ++i) {
      origin += points[triangleVertices[i]] * coords[i];
      hitNormal += MVector(normals[triangleVertices[i]]) * coords[i];
    }
  */
  float baryA = baryCoords[positionOffset];
  float baryB = baryCoords[positionOffset+1];
  float baryC = baryCoords[positionOffset+2];
  int triVertA = triangleVerts[positionOffset] * 3;
  int triVertB = triangleVerts[positionOffset+1] * 3;
  int triVertC = triangleVerts[positionOffset+2] * 3;
  float originX = driverPoints[triVertA] * baryA +
                  driverPoints[triVertB] * baryB +
                  driverPoints[triVertC] * baryC;
  float originY = driverPoints[triVertA+1] * baryA +
                  driverPoints[triVertB+1] * baryB +
                  driverPoints[triVertC+1] * baryC;
  float originZ = driverPoints[triVertA+2] * baryA +
                  driverPoints[triVertB+2] * baryB +
                  driverPoints[triVertC+2] * baryC;
  float hitNormalX = driverNormals[triVertA] * baryA +
                     driverNormals[triVertB] * baryB +
                     driverNormals[triVertC] * baryC;
  float hitNormalY = driverNormals[triVertA+1] * baryA +
                     driverNormals[triVertB+1] * baryB +
                     driverNormals[triVertC+1] * baryC;
  float hitNormalZ = driverNormals[triVertA+2] * baryA +
                     driverNormals[triVertB+2] * baryB +
                     driverNormals[triVertC+2] * baryC;

  /*
    Equivalent CPU code:
    ====================
    unsigned int hitIndex = weights.length()-1;
    normal = hitNormal * weights[hitIndex];
  */
  int offset = sampleOffsets[positionId];
  int hitIndex = offset + sampleCounts[positionId] - 1;
  float hitWeight = sampleWeights[hitIndex];
  float normalX = hitNormalX * hitWeight;
  float normalY = hitNormalY * hitWeight;
  float normalZ = hitNormalZ * hitWeight;

  // Use crawl data to calculate normal
  /*
    Equivalent CPU code:
    ====================
    for (unsigned int j = 0; j < hitIndex; j++) {
      normal += MVector(normals[sampleIds[j]]) * weights[j];
    }
  */
  for (int j = offset; j < hitIndex; j++) {
    float sw = sampleWeights[j];
    int sampleId = sampleIds[j] * 3;
    normalX += driverNormals[sampleId]   * sw;
    normalY += driverNormals[sampleId+1] * sw;
    normalZ += driverNormals[sampleId+2] * sw;
  }

  // Calculate the up vector
  /*
    Equivalent CPU code:
    ====================
    up = ((points[triangleVertices[0]] + points[triangleVertices[1]]) * 0.5) - origin;
  */
  float upX = ((driverPoints[triVertA] + driverPoints[triVertB]) * 0.5f) - originX;
  float upY = ((driverPoints[triVertA+1] + driverPoints[triVertB+1]) * 0.5f) - originY;
  float upZ = ((driverPoints[triVertA+2] + driverPoints[triVertB+2]) * 0.5f) - originZ;

  // Use float3 so we can use the built-in functions.  We are mostly using single floats
  // because the preferred vector width of most gpu's these days is 1.
  /*
    Equivalent CPU code:
    ====================
    MVector unitUp = up.normal();
    // Adjust up if it's parallel to normal or if it's zero length
    if (abs((unitUp * normal) - 1.0) < 0.001 || up.length() < 0.0001) {
      for (unsigned int j = 0; j < weights.length()-1; ++j) {
        up -= (points[sampleIds[j]] - origin) * weights[j];
        unitUp = up.normal();
        if (abs((unitUp * normal) - 1.0) > 0.001 && up.length() > 0.0001) {
          // If the up and normal vectors are no longer parallel and the up vector has a length,
          // then we are good to go.
          break;
        }
      }
      up.normalize();
    } else {
      up = unitUp;
    }
  */
  float3 up = (float3)(upX, upY, upZ);
  float3 normal = (float3)(normalX, normalY, normalZ);
  normal = normalize(normal);
  float3 unitUp = normalize(up);
  float upLength = length(up);
  if (fabs(dot(unitUp, normal) - 1.0f) < 0.001f || upLength < 0.0001f) {
    for (int j = offset; j < hitIndex; j++) {
      float sw = sampleWeights[j];
      int sampleId = sampleIds[j] * 3;
      up.x -= (driverPoints[sampleId] - originX) * sw;
      up.y -= (driverPoints[sampleId+1] - originY) * sw;
      up.z -= (driverPoints[sampleId+2] - originZ) * sw;
      unitUp = normalize(up);
      upLength = length(up);
      if (fabs(dot(unitUp, normal) - 1.0f) > 0.001f && upLength > 0.0001f) {
        // If the up and normal vectors are no longer parallel and the up vector has a length,
        // then we are good to go.
        break;
      }
    }
    up = normalize(up);
  } else {
    up = unitUp;
  }

  // Create the transform matrix
  // Store by columns so we can use dot to multiply with the scale matrix
  float3 x = cross(normal, up);
  float3 z = cross(x, normal);
  x = normalize(x);
  z = normalize(z);

  float4 matrix0 = (float4)(x.x, normal.x, z.x, originX);
  float4 matrix1 = (float4)(x.y, normal.y, z.y, originY);
  float4 matrix2 = (float4)(x.z, normal.z, z.z, originZ);
  float4 matrix3 = (float4)(0.0f, 0.0f, 0.0f, 1.0f);

  // Scale matrix mult
  /*
    Equivalent CPU code:
    ====================
    matrix = scaleMatrix * matrix;
  */
  float4 scaleMatrix0 = (float4)(dot(scaleMatrix[0], matrix0),
                        dot(scaleMatrix[0], matrix1),
                        dot(scaleMatrix[0], matrix2),
                        dot(scaleMatrix[0], matrix3));
  float4 scaleMatrix1 = (float4)(dot(scaleMatrix[1], matrix0),
                        dot(scaleMatrix[1], matrix1),
                        dot(scaleMatrix[1], matrix2),
                        dot(scaleMatrix[1], matrix3));
  float4 scaleMatrix2 = (float4)(dot(scaleMatrix[2], matrix0),
                        dot(scaleMatrix[2], matrix1),
                        dot(scaleMatrix[2], matrix2),
                        dot(scaleMatrix[2], matrix3));
  float4 scaleMatrix3 = (float4)(dot(scaleMatrix[3], matrix0),
                        dot(scaleMatrix[3], matrix1),
                        dot(scaleMatrix[3], matrix2),
                        dot(scaleMatrix[3], matrix3));
  // Transpose so we can dot with bindMatrices
  float4 smX = (float4)(scaleMatrix0.x, scaleMatrix1.x, scaleMatrix2.x, scaleMatrix3.x);
  float4 smY = (float4)(scaleMatrix0.y, scaleMatrix1.y, scaleMatrix2.y, scaleMatrix3.y);
  float4 smZ = (float4)(scaleMatrix0.z, scaleMatrix1.z, scaleMatrix2.z, scaleMatrix3.z);
  float4 smW = (float4)(scaleMatrix0.w, scaleMatrix1.w, scaleMatrix2.w, scaleMatrix3.w);

  // Multiply bindMatrix with matrix
  /*
    Equivalent CPU code:
    ====================
    MPoint newPt = ((points[i]  * drivenMatrix) * (bindMatrices[index] * matrix)) * drivenInverseMatrix;
  */
  float4 bm0 = bindMatrices[positionId*4];
  float4 bm1 = bindMatrices[positionId*4+1];
  float4 bm2 = bindMatrices[positionId*4+2];
  float4 bm3 = bindMatrices[positionId*4+3];
  float4 m0 = (float4)(dot(bm0, smX), dot(bm0, smY), dot(bm0, smZ), dot(bm0, smW));
  float4 m1 = (float4)(dot(bm1, smX), dot(bm1, smY), dot(bm1, smZ), dot(bm1, smW));
  float4 m2 = (float4)(dot(bm2, smX), dot(bm2, smY), dot(bm2, smZ), dot(bm2, smW));
  float4 m3 = (float4)(dot(bm3, smX), dot(bm3, smY), dot(bm3, smZ), dot(bm3, smW));

  float4 initialPosition = (float4)(initialPos[positionOffset],
                                    initialPos[positionOffset+1],
                                    initialPos[positionOffset+2],
                                    1.0f);

  float4 drivenMatrixTransposed[4];
  float4 drivenInvMatrixTransposed[4];
  for (uint i=0; i < 4; i++)
  {
    drivenMatrixTransposed[i] = (float4)(drivenWorldMatrix[i], drivenWorldMatrix[i+4], drivenWorldMatrix[i+8], drivenWorldMatrix[i+12]);
    drivenInvMatrixTransposed[i] = (float4)(drivenInvMatrix[i], drivenInvMatrix[i+4], drivenInvMatrix[i+8], drivenInvMatrix[i+12]);
  }

  float4 worldPt = (float4)(dot(initialPosition, drivenMatrixTransposed[0]),
                            dot(initialPosition, drivenMatrixTransposed[1]),
                            dot(initialPosition, drivenMatrixTransposed[2]),
                            dot(initialPosition, drivenMatrixTransposed[3]));
  worldPt = (float4)(dot(worldPt, (float4)(m0.x, m1.x, m2.x, m3.x)),
                     dot(worldPt, (float4)(m0.y, m1.y, m2.y, m3.y)),
                     dot(worldPt, (float4)(m0.z, m1.z, m2.z, m3.z)),
                     dot(worldPt, (float4)(m0.w, m1.w, m2.w, m3.w)));
  float3 newPt = (float3)(dot(worldPt, drivenInvMatrixTransposed[0]),
                          dot(worldPt, drivenInvMatrixTransposed[1]),
                          dot(worldPt, drivenInvMatrixTransposed[2]));
  /*
    Equivalent CPU code:
    ====================
    points[i] = points[i] + ((newPt - points[i]) * paintWeights[i] * env);
  */
  float weight = paintWeights[positionId] * envelope;
  finalPos[positionOffset] = initialPosition.x + ((newPt.x - initialPosition.x) * weight);
  finalPos[positionOffset+1] = initialPosition.y + ((newPt.y - initialPosition.y) * weight);
  finalPos[positionOffset+2] = initialPosition.z + ((newPt.z - initialPosition.z) * weight);
}

================================================
FILE: src/cvwrap_pre2018.cl
================================================
/*
  cvwrap kernel
*/

__kernel void cvwrap(__global float* finalPos,
                     __global const float* initialPos,
                     __global const float* driverPoints,
                     __global const float* driverNormals,
                     __global const float* paintWeights,
                     __global const int* sampleCounts,
                     __global const int* sampleOffsets,
                     __global const int* sampleIds,
                     __global const float* sampleWeights,
                     __global const int* triangleVerts,
                     __global const float* baryCoords,
                     __global const float4* bindMatrices,
                     __global const float4* drivenMatrices,
                     const float envelope,
                     const uint positionCount) {
  unsigned int positionId = get_global_id(0);
  if (positionId >= positionCount) {
    return;          
  }
  unsigned int positionOffset = positionId * 3;

  // Start with the recreated point and normal using the barycentric coordinates of the hit point.
  /*
    Equivalent CPU code:
    ====================
    MVector hitNormal;
    for (int i = 0; i < 3; ++i) {
      origin += points[triangleVertices[i]] * coords[i];
      hitNormal += MVector(normals[triangleVertices[i]]) * coords[i];
    }
  */
  float baryA = baryCoords[positionOffset];
  float baryB = baryCoords[positionOffset+1];
  float baryC = baryCoords[positionOffset+2];
  int triVertA = triangleVerts[positionOffset] * 3;
  int triVertB = triangleVerts[positionOffset+1] * 3;
  int triVertC = triangleVerts[positionOffset+2] * 3;
  float originX = driverPoints[triVertA] * baryA +
                  driverPoints[triVertB] * baryB +
                  driverPoints[triVertC] * baryC;
  float originY = driverPoints[triVertA+1] * baryA +
                  driverPoints[triVertB+1] * baryB +
                  driverPoints[triVertC+1] * baryC;
  float originZ = driverPoints[triVertA+2] * baryA +
                  driverPoints[triVertB+2] * baryB +
                  driverPoints[triVertC+2] * baryC;
  float hitNormalX = driverNormals[triVertA] * baryA +
                     driverNormals[triVertB] * baryB +
                     driverNormals[triVertC] * baryC;
  float hitNormalY = driverNormals[triVertA+1] * baryA +
                     driverNormals[triVertB+1] * baryB +
                     driverNormals[triVertC+1] * baryC;
  float hitNormalZ = driverNormals[triVertA+2] * baryA +
                     driverNormals[triVertB+2] * baryB +
                     driverNormals[triVertC+2] * baryC;

  /*
    Equivalent CPU code:
    ====================
    unsigned int hitIndex = weights.length()-1;
    normal = hitNormal * weights[hitIndex];
  */
  int offset = sampleOffsets[positionId];
  int hitIndex = offset + sampleCounts[positionId] - 1;
  float hitWeight = sampleWeights[hitIndex];
  float normalX = hitNormalX * hitWeight;
  float normalY = hitNormalY * hitWeight;
  float normalZ = hitNormalZ * hitWeight;

  // Use crawl data to calculate normal
  /*
    Equivalent CPU code:
    ====================
    for (unsigned int j = 0; j < hitIndex; j++) {
      normal += MVector(normals[sampleIds[j]]) * weights[j];
    }
  */
  for (int j = offset; j < hitIndex; j++) {
    float sw = sampleWeights[j];
    int sampleId = sampleIds[j] * 3;
    normalX += driverNormals[sampleId]   * sw;
    normalY += driverNormals[sampleId+1] * sw;
    normalZ += driverNormals[sampleId+2] * sw;
  }

  // Calculate the up vector
  /*
    Equivalent CPU code:
    ====================
    up = ((points[triangleVertices[0]] + points[triangleVertices[1]]) * 0.5) - origin;
  */
  float upX = ((driverPoints[triVertA] + driverPoints[triVertB]) * 0.5f) - originX;
  float upY = ((driverPoints[triVertA+1] + driverPoints[triVertB+1]) * 0.5f) - originY;
  float upZ = ((driverPoints[triVertA+2] + driverPoints[triVertB+2]) * 0.5f) - originZ;

  // Use float3 so we can use the built-in functions.  We are mostly using single floats
  // because the preferred vector width of most gpu's these days is 1.
  /*
    Equivalent CPU code:
    ====================
    MVector unitUp = up.normal();
    // Adjust up if it's parallel to normal or if it's zero length
    if (abs((unitUp * normal) - 1.0) < 0.001 || up.length() < 0.0001) {
      for (unsigned int j = 0; j < weights.length()-1; ++j) {
        up -= (points[sampleIds[j]] - origin) * weights[j];
        unitUp = up.normal();
        if (abs((unitUp * normal) - 1.0) > 0.001 && up.length() > 0.0001) {
          // If the up and normal vectors are no longer parallel and the up vector has a length,
          // then we are good to go.
          break;
        }
      }
      up.normalize();
    } else {
      up = unitUp;
    }
  */
  float3 up = (float3)(upX, upY, upZ);
  float3 normal = (float3)(normalX, normalY, normalZ);
  normal = normalize(normal);
  float3 unitUp = normalize(up);
  float upLength = length(up);
  if (fabs(dot(unitUp, normal) - 1.0f) < 0.001f || upLength < 0.0001f) {
    for (int j = offset; j < hitIndex; j++) {
      float sw = sampleWeights[j];
      int sampleId = sampleIds[j] * 3;
      up.x -= (driverPoints[sampleId] - originX) * sw;
      up.y -= (driverPoints[sampleId+1] - originY) * sw;
      up.z -= (driverPoints[sampleId+2] - originZ) * sw;
      unitUp = normalize(up);
      upLength = length(up);
      if (fabs(dot(unitUp, normal) - 1.0f) > 0.001f && upLength > 0.0001f) {
        // If the up and normal vectors are no longer parallel and the up vector has a length,
        // then we are good to go.
        break;
      }
    }
    up = normalize(up);
  } else {
    up = unitUp;
  }

  // Create the transform matrix
  // Store by columns so we can use dot to multiply with the scale matrix
  float3 x = cross(normal, up);
  float3 z = cross(x, normal);
  x = normalize(x);
  z = normalize(z);

  float4 matrix0 = (float4)(x.x, normal.x, z.x, originX);
  float4 matrix1 = (float4)(x.y, normal.y, z.y, originY);
  float4 matrix2 = (float4)(x.z, normal.z, z.z, originZ);
  float4 matrix3 = (float4)(0.0f, 0.0f, 0.0f, 1.0f);

  // Scale matrix mult
  /*
    Equivalent CPU code:
    ====================
    matrix = scaleMatrix * matrix;
  */
  __global const float4* scaleMatrix = &(drivenMatrices[8]);
  float4 scaleMatrix0 = (float4)(dot(scaleMatrix[0], matrix0),
                        dot(scaleMatrix[0], matrix1),
                        dot(scaleMatrix[0], matrix2),
                        dot(scaleMatrix[0], matrix3));
  float4 scaleMatrix1 = (float4)(dot(scaleMatrix[1], matrix0),
                        dot(scaleMatrix[1], matrix1),
                        dot(scaleMatrix[1], matrix2),
                        dot(scaleMatrix[1], matrix3));
  float4 scaleMatrix2 = (float4)(dot(scaleMatrix[2], matrix0),
                        dot(scaleMatrix[2], matrix1),
                        dot(scaleMatrix[2], matrix2),
                        dot(scaleMatrix[2], matrix3));
  float4 scaleMatrix3 = (float4)(dot(scaleMatrix[3], matrix0),
                        dot(scaleMatrix[3], matrix1),
                        dot(scaleMatrix[3], matrix2),
                        dot(scaleMatrix[3], matrix3));
  // Transpose so we can dot with bindMatrices
  float4 smX = (float4)(scaleMatrix0.x, scaleMatrix1.x, scaleMatrix2.x, scaleMatrix3.x);
  float4 smY = (float4)(scaleMatrix0.y, scaleMatrix1.y, scaleMatrix2.y, scaleMatrix3.y);
  float4 smZ = (float4)(scaleMatrix0.z, scaleMatrix1.z, scaleMatrix2.z, scaleMatrix3.z);
  float4 smW = (float4)(scaleMatrix0.w, scaleMatrix1.w, scaleMatrix2.w, scaleMatrix3.w);

  // Multiply bindMatrix with matrix
  /*
    Equivalent CPU code:
    ====================
    MPoint newPt = ((points[i]  * drivenMatrix) * (bindMatrices[index] * matrix)) * drivenInverseMatrix;
  */
  float4 bm0 = bindMatrices[positionId*4];
  float4 bm1 = bindMatrices[positionId*4+1];
  float4 bm2 = bindMatrices[positionId*4+2];
  float4 bm3 = bindMatrices[positionId*4+3];
  float4 m0 = (float4)(dot(bm0, smX), dot(bm0, smY), dot(bm0, smZ), dot(bm0, smW));
  float4 m1 = (float4)(dot(bm1, smX), dot(bm1, smY), dot(bm1, smZ), dot(bm1, smW));
  float4 m2 = (float4)(dot(bm2, smX), dot(bm2, smY), dot(bm2, smZ), dot(bm2, smW));
  float4 m3 = (float4)(dot(bm3, smX), dot(bm3, smY), dot(bm3, smZ), dot(bm3, smW));

  float4 initialPosition = (float4)(initialPos[positionOffset],
                                    initialPos[positionOffset+1],
                                    initialPos[positionOffset+2],
                                    1.0f);
  __global const float4* drivenInverseMatrix = &(drivenMatrices[4]);
	__global const float4* drivenMatrix = drivenMatrices;
  float4 worldPt = (float4)(dot(initialPosition, drivenMatrix[0]),
                            dot(initialPosition, drivenMatrix[1]),
                            dot(initialPosition, drivenMatrix[2]),
                            dot(initialPosition, drivenMatrix[3]));
  worldPt = (float4)(dot(worldPt, (float4)(m0.x, m1.x, m2.x, m3.x)),
                     dot(worldPt, (float4)(m0.y, m1.y, m2.y, m3.y)),
                     dot(worldPt, (float4)(m0.z, m1.z, m2.z, m3.z)),
                     dot(worldPt, (float4)(m0.w, m1.w, m2.w, m3.w)));
  float3 newPt = (float3)(dot(worldPt, drivenInverseMatrix[0]),
                          dot(worldPt, drivenInverseMatrix[1]),
                          dot(worldPt, drivenInverseMatrix[2]));
  /*
    Equivalent CPU code:
    ====================
    points[i] = points[i] + ((newPt - points[i]) * paintWeights[i] * env);
  */
  float weight = paintWeights[positionId] * envelope;
  finalPos[positionOffset] = initialPosition.x + ((newPt.x - initialPosition.x) * weight);
  finalPos[positionOffset+1] = initialPosition.y + ((newPt.y - initialPosition.y) * weight);
  finalPos[positionOffset+2] = initialPosition.z + ((newPt.z - initialPosition.z) * weight);
}

================================================
FILE: src/pluginMain.cpp
================================================
#include "cvWrapDeformer.h"
#include "cvWrapCmd.h"

#include <maya/MFnPlugin.h>
#include <maya/MGlobal.h>

MStatus initializePlugin(MObject obj) { 
  MStatus status;
  MFnPlugin plugin(obj, "Chad Vernon", "1.0", "Any");
  status = plugin.registerNode(CVWrap::kName, CVWrap::id, CVWrap::creator, CVWrap::initialize,
                               MPxNode::kDeformerNode);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  status = plugin.registerCommand(CVWrapCmd::kName, CVWrapCmd::creator, CVWrapCmd::newSyntax);
  CHECK_MSTATUS_AND_RETURN_IT(status);
#if MAYA_API_VERSION >= 201600
	status = MGPUDeformerRegistry::registerGPUDeformerCreator(CVWrap::kName, "cvWrapOverride",
                                                            CVWrapGPU::GetGPUDeformerInfo());
  CHECK_MSTATUS_AND_RETURN_IT(status);
  // Set the load path so we can find the cl kernel.
  CVWrapGPU::pluginLoadPath = plugin.loadPath();
#endif


  if (MGlobal::mayaState() == MGlobal::kInteractive) {
    MGlobal::executePythonCommandOnIdle("import cvwrap.menu");
		MGlobal::executePythonCommandOnIdle("cvwrap.menu.create_menuitems()");
  }

  return status;
}

MStatus uninitializePlugin( MObject obj) {
  MStatus status;
  MFnPlugin plugin(obj);

#if MAYA_API_VERSION >= 201600
  status = MGPUDeformerRegistry::deregisterGPUDeformerCreator(CVWrap::kName, "cvWrapOverride");
  CHECK_MSTATUS_AND_RETURN_IT(status);
#endif
  status = plugin.deregisterCommand(CVWrapCmd::kName);
  CHECK_MSTATUS_AND_RETURN_IT(status);
  status = plugin.deregisterNode(CVWrap::id);
  CHECK_MSTATUS_AND_RETURN_IT(status);

  if (MGlobal::mayaState() == MGlobal::kInteractive) {
    MGlobal::executePythonCommandOnIdle("import cvwrap.menu");
		MGlobal::executePythonCommandOnIdle("cvwrap.menu.destroy_menuitems()");
  }
  
  return status;
}
Download .txt
gitextract_0vtif6cx/

├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── LICENSE
├── README.md
├── build.bat
├── module.txt
├── scripts/
│   ├── AEcvWrapTemplate.mel
│   └── cvwrap/
│       ├── __init__.py
│       ├── bindui.py
│       └── menu.py
└── src/
    ├── CMakeLists.txt
    ├── bindingio.cpp
    ├── bindingio.h
    ├── common.cpp
    ├── common.h
    ├── cvWrapCmd.cpp
    ├── cvWrapCmd.h
    ├── cvWrapDeformer.cpp
    ├── cvWrapDeformer.h
    ├── cvwrap.cl
    ├── cvwrap_pre2018.cl
    └── pluginMain.cpp
Download .txt
SYMBOL INDEX (78 symbols across 11 files)

FILE: scripts/cvwrap/bindui.py
  function show (line 10) | def show():
  class BindingDialog (line 17) | class BindingDialog(MayaQWidgetBaseMixin, QtGui.QDialog):
    method __init__ (line 19) | def __init__(self, parent=None):
    method set_selected_text (line 86) | def set_selected_text(self, widget):
    method populate_cvwrap_dropdown (line 91) | def populate_cvwrap_dropdown(self, text):
    method rebind (line 104) | def rebind(self):

FILE: scripts/cvwrap/menu.py
  function create_menuitems (line 18) | def create_menuitems():
  function create_cvwrap (line 62) | def create_cvwrap(*args, **kwargs):
  function get_create_command_kwargs (line 73) | def get_create_command_kwargs():
  function display_cvwrap_options (line 112) | def display_cvwrap_options(*args, **kwargs):
  function apply_and_close (line 153) | def apply_and_close(*args, **kwargs):
  function close_option_box (line 160) | def close_option_box(*args, **kwargs):
  function display_bind_file_dialog (line 164) | def display_bind_file_dialog(*args, **kwargs):
  function reset_to_defaults (line 174) | def reset_to_defaults(*args, **kwargs):
  function edit_binding (line 182) | def edit_binding(*args, **kwargs):
  function export_binding (line 186) | def export_binding(*args, **kwargs):
  function import_binding (line 198) | def import_binding(*args, **kwargs):
  function get_wrap_node_from_selected (line 210) | def get_wrap_node_from_selected():
  function destroy_menuitems (line 229) | def destroy_menuitems():
  function paint_cvwrap_weights (line 237) | def paint_cvwrap_weights(*args, **kwargs):

FILE: src/bindingio.cpp
  function MStatus (line 35) | MStatus BindingIO::ExportBinding(std::ofstream& out, MObject& oWrapNode) {
  function MStatus (line 132) | MStatus BindingIO::ImportBinding(std::ifstream& in, MObject& oWrapNode) {

FILE: src/bindingio.h
  function class (line 12) | class BindingIO {

FILE: src/common.cpp
  function StartProgress (line 17) | void StartProgress(const MString& title, unsigned int count) {
  function StepProgress (line 29) | void StepProgress(int step) {
  function ProgressCancelled (line 39) | bool ProgressCancelled() {
  function EndProgress (line 49) | void EndProgress() {
  function IsShapeNode (line 56) | bool IsShapeNode(MDagPath& path) {
  function MStatus (line 63) | MStatus GetShapeNode(MDagPath& path, bool intermediate) {
  function MStatus (line 98) | MStatus GetDagPath(MString& name, MDagPath& path) {
  function MStatus (line 108) | MStatus DeleteIntermediateObjects(MDagPath& path) {
  function GetBarycentricCoordinates (line 119) | void GetBarycentricCoordinates(const MPoint& P, const MPoint& A, const M...
  function MStatus (line 149) | MStatus GetAdjacency(MDagPath& pathMesh, std::vector<std::set<int> >& ad...
  type CrawlData (line 180) | struct CrawlData {
  function MStatus (line 187) | MStatus CrawlSurface(const MPoint& startPoint, const MIntArray& vertexIn...
  function SampleSort (line 257) | bool SampleSort(std::pair<int, double> lhs, std::pair<int, double> rhs) {
  function CalculateSampleWeights (line 262) | void CalculateSampleWeights(const std::map<int, double>& distances, doub...
  function CreateMatrix (line 303) | void CreateMatrix(const MPoint& origin, const MVector& normal, const MVe...
  function CalculateBasisComponents (line 319) | void CalculateBasisComponents(const MDoubleArray& weights, const BaryCoo...
  function GetValidUp (line 389) | void GetValidUp(const MDoubleArray& weights, const MPointArray& points,

FILE: src/common.h
  type BaryCoords (line 89) | struct BaryCoords {

FILE: src/cvWrapCmd.cpp
  function DisplayHelp (line 51) | void DisplayHelp() {
  function MSyntax (line 76) | MSyntax CVWrapCmd::newSyntax() {
  function MStatus (line 102) | MStatus CVWrapCmd::doIt(const MArgList& args) {
  function MStatus (line 142) | MStatus CVWrapCmd::GatherCommandArguments(const MArgList& args) {
  function MStatus (line 197) | MStatus CVWrapCmd::GetGeometryPaths() {
  function MStatus (line 227) | MStatus CVWrapCmd::redoIt() {
  function MStatus (line 264) | MStatus CVWrapCmd::CreateWrapDeformer() {
  function MStatus (line 331) | MStatus CVWrapCmd::GetLatestWrapNode() {
  function MStatus (line 358) | MStatus CVWrapCmd::CreateBindMesh(MDagPath& pathBindMesh) {
  function MStatus (line 387) | MStatus CVWrapCmd::ConnectBindMesh(MDagPath& pathBindMesh) {
  function MStatus (line 405) | MStatus CVWrapCmd::CalculateBinding(MDagPath& pathBindMesh, BindData& bi...
  function SortCoords (line 564) | bool SortCoords(std::pair<int, float> lhs, std::pair<int, float> rhs) {
  function MThreadRetVal (line 569) | MThreadRetVal CVWrapCmd::CalculateBindingTask(void *pParam) {
  function MStatus (line 661) | MStatus CVWrapCmd::GetExistingBindMesh(MDagPath &pathBindMesh) {
  function MStatus (line 689) | MStatus CVWrapCmd::Rebind() {
  function MStatus (line 719) | MStatus CVWrapCmd::GetBindMesh(MObject& oWrapNode, MDagPath& pathBindMes...
  function MStatus (line 738) | MStatus CVWrapCmd::CreateRebindSubsetMesh(MDagPath& pathDriverSubset) {
  function MStatus (line 799) | MStatus CVWrapCmd::undoIt() {

FILE: src/cvWrapCmd.h
  type BindData (line 31) | struct BindData {
  function class (line 58) | class CVWrapCmd : public MPxCommand {

FILE: src/cvWrapDeformer.cpp
  function MStatus (line 30) | MStatus CVWrap::initialize() {
  function MStatus (line 123) | MStatus GetBindInfo(MDataBlock& data, unsigned int geomIndex, TaskData& ...
  function MStatus (line 208) | MStatus GetDriverData(MDataBlock& data, TaskData& taskData) {
  function MStatus (line 271) | MStatus CVWrap::setDependentsDirty(const MPlug& plugBeingDirtied, MPlugA...
  function MStatus (line 285) | MStatus CVWrap::deform(MDataBlock& data, MItGeometry& itGeo, const MMatr...
  function MThreadRetVal (line 387) | MThreadRetVal CVWrap::EvaluateWrap(void *pParam) {
  function cl_int (line 447) | cl_int EnqueueBuffer(MAutoCLMem& mclMem, size_t bufferSize, void* data) {
  function MGPUDeformerRegistrationInfo (line 463) | MGPUDeformerRegistrationInfo* CVWrapGPU::GetGPUDeformerInfo() {
  function MStatus (line 626) | MStatus CVWrapGPU::EnqueueBindData(MDataBlock& data, const MEvaluationNo...
  function MStatus (line 710) | MStatus CVWrapGPU::EnqueueDriverData(MDataBlock& data, const MEvaluation...
  function MStatus (line 788) | MStatus CVWrapGPU::EnqueuePaintMapData(MDataBlock& data,

FILE: src/cvWrapDeformer.h
  type TaskData (line 25) | struct TaskData {
  function class (line 45) | class CVWrap : public MPxDeformerNode {
  function class (line 97) | class CVWrapGPU : public MPxGPUDeformer {
  function class (line 149) | class CVWrapGPUDeformerInfo : public MGPUDeformerRegistrationInfo {

FILE: src/pluginMain.cpp
  function MStatus (line 7) | MStatus initializePlugin(MObject obj) {
  function MStatus (line 32) | MStatus uninitializePlugin( MObject obj) {
Condensed preview — 23 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (159K chars).
[
  {
    "path": ".gitignore",
    "chars": 290,
    "preview": "\n#ignore thumbnails created by windows\nThumbs.db\n#Ignore files build by Visual Studio\n*.obj\n*.exe\n*.pdb\n*.user\n*.aps\n*.p"
  },
  {
    "path": ".gitmodules",
    "chars": 79,
    "preview": "[submodule \"cgcmake\"]\n\tpath = cgcmake\n\turl = git@github.com:chadmv/cgcmake.git\n"
  },
  {
    "path": "CMakeLists.txt",
    "chars": 380,
    "preview": "cmake_minimum_required(VERSION 2.6)\nproject(cvwrap)\n\nset(PROJECT_PATH ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT})\nset(CMAKE_"
  },
  {
    "path": "LICENSE",
    "chars": 1079,
    "preview": "The MIT License (MIT)\n\nCopyright (c) 2015 Chad Vernon\n\nPermission is hereby granted, free of charge, to any person obtai"
  },
  {
    "path": "README.md",
    "chars": 1208,
    "preview": "# cvwrap\nA Maya wrap deformer that is faster than Maya's wrap deformer, can be rebounded, has a GPU implementation, and "
  },
  {
    "path": "build.bat",
    "chars": 409,
    "preview": "@echo off\nFOR %%G IN (2019, 2020, 2022, 2023) DO (call :subroutine \"%%G\")\nGOTO :eof\n\n:subroutine\nset builddir=build.%1\ni"
  },
  {
    "path": "module.txt",
    "chars": 40,
    "preview": "+ ${PROJECT_NAME} 1.0.0 ${PROJECT_PATH}\n"
  },
  {
    "path": "scripts/AEcvWrapTemplate.mel",
    "chars": 656,
    "preview": "global proc AEcvWrapTemplate(string $nodeName) {\n    editorTemplate -beginScrollLayout;\n        editorTemplate -beginLay"
  },
  {
    "path": "scripts/cvwrap/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "scripts/cvwrap/bindui.py",
    "chars": 4238,
    "preview": "import maya.cmds as cmds\nif cmds.about(api=True) >= 201700:\n    from PySide2 import QtWidgets as QtGui\nelse:\n    from Py"
  },
  {
    "path": "scripts/cvwrap/menu.py",
    "chars": 10379,
    "preview": "import maya.cmds as cmds\nimport maya.mel as mel\nimport maya.OpenMayaUI as OpenMayaUI\nimport os\nif cmds.about(api=True) >"
  },
  {
    "path": "src/CMakeLists.txt",
    "chars": 726,
    "preview": "set(SOURCE_FILES\n    \"pluginMain.cpp\"\n    \"cvWrapCmd.cpp\"\n    \"cvWrapCmd.h\"\n    \"cvWrapDeformer.cpp\"\n    \"cvWrapDeformer"
  },
  {
    "path": "src/bindingio.cpp",
    "chars": 9369,
    "preview": "#include \"bindingio.h\"\n#include \"cvWrapDeformer.h\"\n\n#include <maya/MGlobal.h>\n#include <maya/MObjectArray.h>\n#include <m"
  },
  {
    "path": "src/bindingio.h",
    "chars": 2024,
    "preview": "#ifndef CVWRAP_BindingIO_H\n#define CVWRAP_BindingIO_H\n\n#include <maya/MMatrix.h>\n#include <maya/MObject.h>\n#include <may"
  },
  {
    "path": "src/common.cpp",
    "chars": 14233,
    "preview": "#include \"common.h\"\n\n#include <maya/MGlobal.h>\n#include <maya/MFnDagNode.h>\n#include <maya/MFnMesh.h>\n#include <maya/MIt"
  },
  {
    "path": "src/common.h",
    "chars": 9187,
    "preview": "/**\n  Contains various helper functions.\n*/\n\n#ifndef CVWRAP_COMMON_H\n#define CVWRAP_COMMON_H\n\n#include <maya/MDagPath.h>"
  },
  {
    "path": "src/cvWrapCmd.cpp",
    "chars": 31769,
    "preview": "#include \"cvWrapCmd.h\"\n#include \"cvWrapDeformer.h\"\n#include \"bindingio.h\"\n\n#include <maya/MArgDatabase.h>\n#include <maya"
  },
  {
    "path": "src/cvWrapCmd.h",
    "chars": 6875,
    "preview": "#ifndef CVWRAPCMD_H\n#define CVWRAPCMD_H\n\n#include <maya/MArgList.h>\n#include <maya/MDagPath.h>\n#include <maya/MDagPathAr"
  },
  {
    "path": "src/cvWrapDeformer.cpp",
    "chars": 32708,
    "preview": "#include \"cvWrapDeformer.h\"\n#include <maya/MFnCompoundAttribute.h>\n#include <maya/MFnDoubleArrayData.h>\n#include <maya/M"
  },
  {
    "path": "src/cvWrapDeformer.h",
    "chars": 5340,
    "preview": "#ifndef CVWRAPDEFORMER_H\n#define CVWRAPDEFORMER_H\n\n#include <maya/MDGModifier.h>\n#include <maya/MFloatArray.h>\n#include "
  },
  {
    "path": "src/cvwrap.cl",
    "chars": 10290,
    "preview": "/*\n  cvwrap kernel\n*/\n\n__kernel void cvwrap(__global float* finalPos,\n                     __global const float* initial"
  },
  {
    "path": "src/cvwrap_pre2018.cl",
    "chars": 9915,
    "preview": "/*\n  cvwrap kernel\n*/\n\n__kernel void cvwrap(__global float* finalPos,\n                     __global const float* initial"
  },
  {
    "path": "src/pluginMain.cpp",
    "chars": 1785,
    "preview": "#include \"cvWrapDeformer.h\"\n#include \"cvWrapCmd.h\"\n\n#include <maya/MFnPlugin.h>\n#include <maya/MGlobal.h>\n\nMStatus initi"
  }
]

About this extraction

This page contains the full source code of the chadmv/cvwrap GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 23 files (149.4 KB), approximately 40.2k tokens, and a symbol index with 78 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!